Merge tag 'perf-core-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning)
1303                 tracing_off();
1304 }
1305
1306 /**
1307  * tracer_tracing_is_on - show real state of ring buffer enabled
1308  * @tr : the trace array to know if ring buffer is enabled
1309  *
1310  * Shows real state of the ring buffer if it is enabled or not.
1311  */
1312 bool tracer_tracing_is_on(struct trace_array *tr)
1313 {
1314         if (tr->array_buffer.buffer)
1315                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1316         return !tr->buffer_disabled;
1317 }
1318
1319 /**
1320  * tracing_is_on - show state of ring buffers enabled
1321  */
1322 int tracing_is_on(void)
1323 {
1324         return tracer_tracing_is_on(&global_trace);
1325 }
1326 EXPORT_SYMBOL_GPL(tracing_is_on);
1327
1328 static int __init set_buf_size(char *str)
1329 {
1330         unsigned long buf_size;
1331
1332         if (!str)
1333                 return 0;
1334         buf_size = memparse(str, &str);
1335         /* nr_entries can not be zero */
1336         if (buf_size == 0)
1337                 return 0;
1338         trace_buf_size = buf_size;
1339         return 1;
1340 }
1341 __setup("trace_buf_size=", set_buf_size);
1342
1343 static int __init set_tracing_thresh(char *str)
1344 {
1345         unsigned long threshold;
1346         int ret;
1347
1348         if (!str)
1349                 return 0;
1350         ret = kstrtoul(str, 0, &threshold);
1351         if (ret < 0)
1352                 return 0;
1353         tracing_thresh = threshold * 1000;
1354         return 1;
1355 }
1356 __setup("tracing_thresh=", set_tracing_thresh);
1357
1358 unsigned long nsecs_to_usecs(unsigned long nsecs)
1359 {
1360         return nsecs / 1000;
1361 }
1362
1363 /*
1364  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1365  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1366  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1367  * of strings in the order that the evals (enum) were defined.
1368  */
1369 #undef C
1370 #define C(a, b) b
1371
1372 /* These must match the bit postions in trace_iterator_flags */
1373 static const char *trace_options[] = {
1374         TRACE_FLAGS
1375         NULL
1376 };
1377
1378 static struct {
1379         u64 (*func)(void);
1380         const char *name;
1381         int in_ns;              /* is this clock in nanoseconds? */
1382 } trace_clocks[] = {
1383         { trace_clock_local,            "local",        1 },
1384         { trace_clock_global,           "global",       1 },
1385         { trace_clock_counter,          "counter",      0 },
1386         { trace_clock_jiffies,          "uptime",       0 },
1387         { trace_clock,                  "perf",         1 },
1388         { ktime_get_mono_fast_ns,       "mono",         1 },
1389         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1390         { ktime_get_boot_fast_ns,       "boot",         1 },
1391         ARCH_TRACE_CLOCKS
1392 };
1393
1394 bool trace_clock_in_ns(struct trace_array *tr)
1395 {
1396         if (trace_clocks[tr->clock_id].in_ns)
1397                 return true;
1398
1399         return false;
1400 }
1401
1402 /*
1403  * trace_parser_get_init - gets the buffer for trace parser
1404  */
1405 int trace_parser_get_init(struct trace_parser *parser, int size)
1406 {
1407         memset(parser, 0, sizeof(*parser));
1408
1409         parser->buffer = kmalloc(size, GFP_KERNEL);
1410         if (!parser->buffer)
1411                 return 1;
1412
1413         parser->size = size;
1414         return 0;
1415 }
1416
1417 /*
1418  * trace_parser_put - frees the buffer for trace parser
1419  */
1420 void trace_parser_put(struct trace_parser *parser)
1421 {
1422         kfree(parser->buffer);
1423         parser->buffer = NULL;
1424 }
1425
1426 /*
1427  * trace_get_user - reads the user input string separated by  space
1428  * (matched by isspace(ch))
1429  *
1430  * For each string found the 'struct trace_parser' is updated,
1431  * and the function returns.
1432  *
1433  * Returns number of bytes read.
1434  *
1435  * See kernel/trace/trace.h for 'struct trace_parser' details.
1436  */
1437 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1438         size_t cnt, loff_t *ppos)
1439 {
1440         char ch;
1441         size_t read = 0;
1442         ssize_t ret;
1443
1444         if (!*ppos)
1445                 trace_parser_clear(parser);
1446
1447         ret = get_user(ch, ubuf++);
1448         if (ret)
1449                 goto out;
1450
1451         read++;
1452         cnt--;
1453
1454         /*
1455          * The parser is not finished with the last write,
1456          * continue reading the user input without skipping spaces.
1457          */
1458         if (!parser->cont) {
1459                 /* skip white space */
1460                 while (cnt && isspace(ch)) {
1461                         ret = get_user(ch, ubuf++);
1462                         if (ret)
1463                                 goto out;
1464                         read++;
1465                         cnt--;
1466                 }
1467
1468                 parser->idx = 0;
1469
1470                 /* only spaces were written */
1471                 if (isspace(ch) || !ch) {
1472                         *ppos += read;
1473                         ret = read;
1474                         goto out;
1475                 }
1476         }
1477
1478         /* read the non-space input */
1479         while (cnt && !isspace(ch) && ch) {
1480                 if (parser->idx < parser->size - 1)
1481                         parser->buffer[parser->idx++] = ch;
1482                 else {
1483                         ret = -EINVAL;
1484                         goto out;
1485                 }
1486                 ret = get_user(ch, ubuf++);
1487                 if (ret)
1488                         goto out;
1489                 read++;
1490                 cnt--;
1491         }
1492
1493         /* We either got finished input or we have to wait for another call. */
1494         if (isspace(ch) || !ch) {
1495                 parser->buffer[parser->idx] = 0;
1496                 parser->cont = false;
1497         } else if (parser->idx < parser->size - 1) {
1498                 parser->cont = true;
1499                 parser->buffer[parser->idx++] = ch;
1500                 /* Make sure the parsed string always terminates with '\0'. */
1501                 parser->buffer[parser->idx] = 0;
1502         } else {
1503                 ret = -EINVAL;
1504                 goto out;
1505         }
1506
1507         *ppos += read;
1508         ret = read;
1509
1510 out:
1511         return ret;
1512 }
1513
1514 /* TODO add a seq_buf_to_buffer() */
1515 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1516 {
1517         int len;
1518
1519         if (trace_seq_used(s) <= s->seq.readpos)
1520                 return -EBUSY;
1521
1522         len = trace_seq_used(s) - s->seq.readpos;
1523         if (cnt > len)
1524                 cnt = len;
1525         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1526
1527         s->seq.readpos += cnt;
1528         return cnt;
1529 }
1530
1531 unsigned long __read_mostly     tracing_thresh;
1532 static const struct file_operations tracing_max_lat_fops;
1533
1534 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1535         defined(CONFIG_FSNOTIFY)
1536
1537 static struct workqueue_struct *fsnotify_wq;
1538
1539 static void latency_fsnotify_workfn(struct work_struct *work)
1540 {
1541         struct trace_array *tr = container_of(work, struct trace_array,
1542                                               fsnotify_work);
1543         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1544                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1545 }
1546
1547 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1548 {
1549         struct trace_array *tr = container_of(iwork, struct trace_array,
1550                                               fsnotify_irqwork);
1551         queue_work(fsnotify_wq, &tr->fsnotify_work);
1552 }
1553
1554 static void trace_create_maxlat_file(struct trace_array *tr,
1555                                      struct dentry *d_tracer)
1556 {
1557         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1558         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1559         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1560                                               d_tracer, &tr->max_latency,
1561                                               &tracing_max_lat_fops);
1562 }
1563
1564 __init static int latency_fsnotify_init(void)
1565 {
1566         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1567                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1568         if (!fsnotify_wq) {
1569                 pr_err("Unable to allocate tr_max_lat_wq\n");
1570                 return -ENOMEM;
1571         }
1572         return 0;
1573 }
1574
1575 late_initcall_sync(latency_fsnotify_init);
1576
1577 void latency_fsnotify(struct trace_array *tr)
1578 {
1579         if (!fsnotify_wq)
1580                 return;
1581         /*
1582          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1583          * possible that we are called from __schedule() or do_idle(), which
1584          * could cause a deadlock.
1585          */
1586         irq_work_queue(&tr->fsnotify_irqwork);
1587 }
1588
1589 /*
1590  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1591  *  defined(CONFIG_FSNOTIFY)
1592  */
1593 #else
1594
1595 #define trace_create_maxlat_file(tr, d_tracer)                          \
1596         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1597                           &tr->max_latency, &tracing_max_lat_fops)
1598
1599 #endif
1600
1601 #ifdef CONFIG_TRACER_MAX_TRACE
1602 /*
1603  * Copy the new maximum trace into the separate maximum-trace
1604  * structure. (this way the maximum trace is permanently saved,
1605  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1606  */
1607 static void
1608 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1609 {
1610         struct array_buffer *trace_buf = &tr->array_buffer;
1611         struct array_buffer *max_buf = &tr->max_buffer;
1612         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1613         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1614
1615         max_buf->cpu = cpu;
1616         max_buf->time_start = data->preempt_timestamp;
1617
1618         max_data->saved_latency = tr->max_latency;
1619         max_data->critical_start = data->critical_start;
1620         max_data->critical_end = data->critical_end;
1621
1622         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1623         max_data->pid = tsk->pid;
1624         /*
1625          * If tsk == current, then use current_uid(), as that does not use
1626          * RCU. The irq tracer can be called out of RCU scope.
1627          */
1628         if (tsk == current)
1629                 max_data->uid = current_uid();
1630         else
1631                 max_data->uid = task_uid(tsk);
1632
1633         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1634         max_data->policy = tsk->policy;
1635         max_data->rt_priority = tsk->rt_priority;
1636
1637         /* record this tasks comm */
1638         tracing_record_cmdline(tsk);
1639         latency_fsnotify(tr);
1640 }
1641
1642 /**
1643  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1644  * @tr: tracer
1645  * @tsk: the task with the latency
1646  * @cpu: The cpu that initiated the trace.
1647  * @cond_data: User data associated with a conditional snapshot
1648  *
1649  * Flip the buffers between the @tr and the max_tr and record information
1650  * about which task was the cause of this latency.
1651  */
1652 void
1653 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1654               void *cond_data)
1655 {
1656         if (tr->stop_count)
1657                 return;
1658
1659         WARN_ON_ONCE(!irqs_disabled());
1660
1661         if (!tr->allocated_snapshot) {
1662                 /* Only the nop tracer should hit this when disabling */
1663                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664                 return;
1665         }
1666
1667         arch_spin_lock(&tr->max_lock);
1668
1669         /* Inherit the recordable setting from array_buffer */
1670         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1671                 ring_buffer_record_on(tr->max_buffer.buffer);
1672         else
1673                 ring_buffer_record_off(tr->max_buffer.buffer);
1674
1675 #ifdef CONFIG_TRACER_SNAPSHOT
1676         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1677                 goto out_unlock;
1678 #endif
1679         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1680
1681         __update_max_tr(tr, tsk, cpu);
1682
1683  out_unlock:
1684         arch_spin_unlock(&tr->max_lock);
1685 }
1686
1687 /**
1688  * update_max_tr_single - only copy one trace over, and reset the rest
1689  * @tr: tracer
1690  * @tsk: task with the latency
1691  * @cpu: the cpu of the buffer to copy.
1692  *
1693  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1694  */
1695 void
1696 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1697 {
1698         int ret;
1699
1700         if (tr->stop_count)
1701                 return;
1702
1703         WARN_ON_ONCE(!irqs_disabled());
1704         if (!tr->allocated_snapshot) {
1705                 /* Only the nop tracer should hit this when disabling */
1706                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1707                 return;
1708         }
1709
1710         arch_spin_lock(&tr->max_lock);
1711
1712         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1713
1714         if (ret == -EBUSY) {
1715                 /*
1716                  * We failed to swap the buffer due to a commit taking
1717                  * place on this CPU. We fail to record, but we reset
1718                  * the max trace buffer (no one writes directly to it)
1719                  * and flag that it failed.
1720                  */
1721                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1722                         "Failed to swap buffers due to commit in progress\n");
1723         }
1724
1725         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1726
1727         __update_max_tr(tr, tsk, cpu);
1728         arch_spin_unlock(&tr->max_lock);
1729 }
1730 #endif /* CONFIG_TRACER_MAX_TRACE */
1731
1732 static int wait_on_pipe(struct trace_iterator *iter, int full)
1733 {
1734         /* Iterators are static, they should be filled or empty */
1735         if (trace_buffer_iter(iter, iter->cpu_file))
1736                 return 0;
1737
1738         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1739                                 full);
1740 }
1741
1742 #ifdef CONFIG_FTRACE_STARTUP_TEST
1743 static bool selftests_can_run;
1744
1745 struct trace_selftests {
1746         struct list_head                list;
1747         struct tracer                   *type;
1748 };
1749
1750 static LIST_HEAD(postponed_selftests);
1751
1752 static int save_selftest(struct tracer *type)
1753 {
1754         struct trace_selftests *selftest;
1755
1756         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1757         if (!selftest)
1758                 return -ENOMEM;
1759
1760         selftest->type = type;
1761         list_add(&selftest->list, &postponed_selftests);
1762         return 0;
1763 }
1764
1765 static int run_tracer_selftest(struct tracer *type)
1766 {
1767         struct trace_array *tr = &global_trace;
1768         struct tracer *saved_tracer = tr->current_trace;
1769         int ret;
1770
1771         if (!type->selftest || tracing_selftest_disabled)
1772                 return 0;
1773
1774         /*
1775          * If a tracer registers early in boot up (before scheduling is
1776          * initialized and such), then do not run its selftests yet.
1777          * Instead, run it a little later in the boot process.
1778          */
1779         if (!selftests_can_run)
1780                 return save_selftest(type);
1781
1782         /*
1783          * Run a selftest on this tracer.
1784          * Here we reset the trace buffer, and set the current
1785          * tracer to be this tracer. The tracer can then run some
1786          * internal tracing to verify that everything is in order.
1787          * If we fail, we do not register this tracer.
1788          */
1789         tracing_reset_online_cpus(&tr->array_buffer);
1790
1791         tr->current_trace = type;
1792
1793 #ifdef CONFIG_TRACER_MAX_TRACE
1794         if (type->use_max_tr) {
1795                 /* If we expanded the buffers, make sure the max is expanded too */
1796                 if (ring_buffer_expanded)
1797                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1798                                            RING_BUFFER_ALL_CPUS);
1799                 tr->allocated_snapshot = true;
1800         }
1801 #endif
1802
1803         /* the test is responsible for initializing and enabling */
1804         pr_info("Testing tracer %s: ", type->name);
1805         ret = type->selftest(type, tr);
1806         /* the test is responsible for resetting too */
1807         tr->current_trace = saved_tracer;
1808         if (ret) {
1809                 printk(KERN_CONT "FAILED!\n");
1810                 /* Add the warning after printing 'FAILED' */
1811                 WARN_ON(1);
1812                 return -1;
1813         }
1814         /* Only reset on passing, to avoid touching corrupted buffers */
1815         tracing_reset_online_cpus(&tr->array_buffer);
1816
1817 #ifdef CONFIG_TRACER_MAX_TRACE
1818         if (type->use_max_tr) {
1819                 tr->allocated_snapshot = false;
1820
1821                 /* Shrink the max buffer again */
1822                 if (ring_buffer_expanded)
1823                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1824                                            RING_BUFFER_ALL_CPUS);
1825         }
1826 #endif
1827
1828         printk(KERN_CONT "PASSED\n");
1829         return 0;
1830 }
1831
1832 static __init int init_trace_selftests(void)
1833 {
1834         struct trace_selftests *p, *n;
1835         struct tracer *t, **last;
1836         int ret;
1837
1838         selftests_can_run = true;
1839
1840         mutex_lock(&trace_types_lock);
1841
1842         if (list_empty(&postponed_selftests))
1843                 goto out;
1844
1845         pr_info("Running postponed tracer tests:\n");
1846
1847         tracing_selftest_running = true;
1848         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1849                 /* This loop can take minutes when sanitizers are enabled, so
1850                  * lets make sure we allow RCU processing.
1851                  */
1852                 cond_resched();
1853                 ret = run_tracer_selftest(p->type);
1854                 /* If the test fails, then warn and remove from available_tracers */
1855                 if (ret < 0) {
1856                         WARN(1, "tracer: %s failed selftest, disabling\n",
1857                              p->type->name);
1858                         last = &trace_types;
1859                         for (t = trace_types; t; t = t->next) {
1860                                 if (t == p->type) {
1861                                         *last = t->next;
1862                                         break;
1863                                 }
1864                                 last = &t->next;
1865                         }
1866                 }
1867                 list_del(&p->list);
1868                 kfree(p);
1869         }
1870         tracing_selftest_running = false;
1871
1872  out:
1873         mutex_unlock(&trace_types_lock);
1874
1875         return 0;
1876 }
1877 core_initcall(init_trace_selftests);
1878 #else
1879 static inline int run_tracer_selftest(struct tracer *type)
1880 {
1881         return 0;
1882 }
1883 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1884
1885 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1886
1887 static void __init apply_trace_boot_options(void);
1888
1889 /**
1890  * register_tracer - register a tracer with the ftrace system.
1891  * @type: the plugin for the tracer
1892  *
1893  * Register a new plugin tracer.
1894  */
1895 int __init register_tracer(struct tracer *type)
1896 {
1897         struct tracer *t;
1898         int ret = 0;
1899
1900         if (!type->name) {
1901                 pr_info("Tracer must have a name\n");
1902                 return -1;
1903         }
1904
1905         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1906                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1907                 return -1;
1908         }
1909
1910         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1911                 pr_warn("Can not register tracer %s due to lockdown\n",
1912                            type->name);
1913                 return -EPERM;
1914         }
1915
1916         mutex_lock(&trace_types_lock);
1917
1918         tracing_selftest_running = true;
1919
1920         for (t = trace_types; t; t = t->next) {
1921                 if (strcmp(type->name, t->name) == 0) {
1922                         /* already found */
1923                         pr_info("Tracer %s already registered\n",
1924                                 type->name);
1925                         ret = -1;
1926                         goto out;
1927                 }
1928         }
1929
1930         if (!type->set_flag)
1931                 type->set_flag = &dummy_set_flag;
1932         if (!type->flags) {
1933                 /*allocate a dummy tracer_flags*/
1934                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1935                 if (!type->flags) {
1936                         ret = -ENOMEM;
1937                         goto out;
1938                 }
1939                 type->flags->val = 0;
1940                 type->flags->opts = dummy_tracer_opt;
1941         } else
1942                 if (!type->flags->opts)
1943                         type->flags->opts = dummy_tracer_opt;
1944
1945         /* store the tracer for __set_tracer_option */
1946         type->flags->trace = type;
1947
1948         ret = run_tracer_selftest(type);
1949         if (ret < 0)
1950                 goto out;
1951
1952         type->next = trace_types;
1953         trace_types = type;
1954         add_tracer_options(&global_trace, type);
1955
1956  out:
1957         tracing_selftest_running = false;
1958         mutex_unlock(&trace_types_lock);
1959
1960         if (ret || !default_bootup_tracer)
1961                 goto out_unlock;
1962
1963         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1964                 goto out_unlock;
1965
1966         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1967         /* Do we want this tracer to start on bootup? */
1968         tracing_set_tracer(&global_trace, type->name);
1969         default_bootup_tracer = NULL;
1970
1971         apply_trace_boot_options();
1972
1973         /* disable other selftests, since this will break it. */
1974         tracing_selftest_disabled = true;
1975 #ifdef CONFIG_FTRACE_STARTUP_TEST
1976         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1977                type->name);
1978 #endif
1979
1980  out_unlock:
1981         return ret;
1982 }
1983
1984 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1985 {
1986         struct trace_buffer *buffer = buf->buffer;
1987
1988         if (!buffer)
1989                 return;
1990
1991         ring_buffer_record_disable(buffer);
1992
1993         /* Make sure all commits have finished */
1994         synchronize_rcu();
1995         ring_buffer_reset_cpu(buffer, cpu);
1996
1997         ring_buffer_record_enable(buffer);
1998 }
1999
2000 void tracing_reset_online_cpus(struct array_buffer *buf)
2001 {
2002         struct trace_buffer *buffer = buf->buffer;
2003         int cpu;
2004
2005         if (!buffer)
2006                 return;
2007
2008         ring_buffer_record_disable(buffer);
2009
2010         /* Make sure all commits have finished */
2011         synchronize_rcu();
2012
2013         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2014
2015         for_each_online_cpu(cpu)
2016                 ring_buffer_reset_cpu(buffer, cpu);
2017
2018         ring_buffer_record_enable(buffer);
2019 }
2020
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2023 {
2024         struct trace_array *tr;
2025
2026         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027                 if (!tr->clear_trace)
2028                         continue;
2029                 tr->clear_trace = false;
2030                 tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032                 tracing_reset_online_cpus(&tr->max_buffer);
2033 #endif
2034         }
2035 }
2036
2037 static int *tgid_map;
2038
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044         unsigned *map_cmdline_to_pid;
2045         unsigned cmdline_num;
2046         int cmdline_idx;
2047         char *saved_cmdlines;
2048 };
2049 static struct saved_cmdlines_buffer *savedcmd;
2050
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2053
2054 static inline char *get_saved_cmdlines(int idx)
2055 {
2056         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2057 }
2058
2059 static inline void set_cmdline(int idx, const char *cmdline)
2060 {
2061         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2062 }
2063
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065                                     struct saved_cmdlines_buffer *s)
2066 {
2067         s->map_cmdline_to_pid = kmalloc_array(val,
2068                                               sizeof(*s->map_cmdline_to_pid),
2069                                               GFP_KERNEL);
2070         if (!s->map_cmdline_to_pid)
2071                 return -ENOMEM;
2072
2073         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074         if (!s->saved_cmdlines) {
2075                 kfree(s->map_cmdline_to_pid);
2076                 return -ENOMEM;
2077         }
2078
2079         s->cmdline_idx = 0;
2080         s->cmdline_num = val;
2081         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082                sizeof(s->map_pid_to_cmdline));
2083         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084                val * sizeof(*s->map_cmdline_to_pid));
2085
2086         return 0;
2087 }
2088
2089 static int trace_create_savedcmd(void)
2090 {
2091         int ret;
2092
2093         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2094         if (!savedcmd)
2095                 return -ENOMEM;
2096
2097         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2098         if (ret < 0) {
2099                 kfree(savedcmd);
2100                 savedcmd = NULL;
2101                 return -ENOMEM;
2102         }
2103
2104         return 0;
2105 }
2106
2107 int is_tracing_stopped(void)
2108 {
2109         return global_trace.stop_count;
2110 }
2111
2112 /**
2113  * tracing_start - quick start of the tracer
2114  *
2115  * If tracing is enabled but was stopped by tracing_stop,
2116  * this will start the tracer back up.
2117  */
2118 void tracing_start(void)
2119 {
2120         struct trace_buffer *buffer;
2121         unsigned long flags;
2122
2123         if (tracing_disabled)
2124                 return;
2125
2126         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127         if (--global_trace.stop_count) {
2128                 if (global_trace.stop_count < 0) {
2129                         /* Someone screwed up their debugging */
2130                         WARN_ON_ONCE(1);
2131                         global_trace.stop_count = 0;
2132                 }
2133                 goto out;
2134         }
2135
2136         /* Prevent the buffers from switching */
2137         arch_spin_lock(&global_trace.max_lock);
2138
2139         buffer = global_trace.array_buffer.buffer;
2140         if (buffer)
2141                 ring_buffer_record_enable(buffer);
2142
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144         buffer = global_trace.max_buffer.buffer;
2145         if (buffer)
2146                 ring_buffer_record_enable(buffer);
2147 #endif
2148
2149         arch_spin_unlock(&global_trace.max_lock);
2150
2151  out:
2152         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2153 }
2154
2155 static void tracing_start_tr(struct trace_array *tr)
2156 {
2157         struct trace_buffer *buffer;
2158         unsigned long flags;
2159
2160         if (tracing_disabled)
2161                 return;
2162
2163         /* If global, we need to also start the max tracer */
2164         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165                 return tracing_start();
2166
2167         raw_spin_lock_irqsave(&tr->start_lock, flags);
2168
2169         if (--tr->stop_count) {
2170                 if (tr->stop_count < 0) {
2171                         /* Someone screwed up their debugging */
2172                         WARN_ON_ONCE(1);
2173                         tr->stop_count = 0;
2174                 }
2175                 goto out;
2176         }
2177
2178         buffer = tr->array_buffer.buffer;
2179         if (buffer)
2180                 ring_buffer_record_enable(buffer);
2181
2182  out:
2183         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2184 }
2185
2186 /**
2187  * tracing_stop - quick stop of the tracer
2188  *
2189  * Light weight way to stop tracing. Use in conjunction with
2190  * tracing_start.
2191  */
2192 void tracing_stop(void)
2193 {
2194         struct trace_buffer *buffer;
2195         unsigned long flags;
2196
2197         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198         if (global_trace.stop_count++)
2199                 goto out;
2200
2201         /* Prevent the buffers from switching */
2202         arch_spin_lock(&global_trace.max_lock);
2203
2204         buffer = global_trace.array_buffer.buffer;
2205         if (buffer)
2206                 ring_buffer_record_disable(buffer);
2207
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209         buffer = global_trace.max_buffer.buffer;
2210         if (buffer)
2211                 ring_buffer_record_disable(buffer);
2212 #endif
2213
2214         arch_spin_unlock(&global_trace.max_lock);
2215
2216  out:
2217         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2218 }
2219
2220 static void tracing_stop_tr(struct trace_array *tr)
2221 {
2222         struct trace_buffer *buffer;
2223         unsigned long flags;
2224
2225         /* If global, we need to also stop the max tracer */
2226         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227                 return tracing_stop();
2228
2229         raw_spin_lock_irqsave(&tr->start_lock, flags);
2230         if (tr->stop_count++)
2231                 goto out;
2232
2233         buffer = tr->array_buffer.buffer;
2234         if (buffer)
2235                 ring_buffer_record_disable(buffer);
2236
2237  out:
2238         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2239 }
2240
2241 static int trace_save_cmdline(struct task_struct *tsk)
2242 {
2243         unsigned pid, idx;
2244
2245         /* treat recording of idle task as a success */
2246         if (!tsk->pid)
2247                 return 1;
2248
2249         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2250                 return 0;
2251
2252         /*
2253          * It's not the end of the world if we don't get
2254          * the lock, but we also don't want to spin
2255          * nor do we want to disable interrupts,
2256          * so if we miss here, then better luck next time.
2257          */
2258         if (!arch_spin_trylock(&trace_cmdline_lock))
2259                 return 0;
2260
2261         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262         if (idx == NO_CMDLINE_MAP) {
2263                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2264
2265                 /*
2266                  * Check whether the cmdline buffer at idx has a pid
2267                  * mapped. We are going to overwrite that entry so we
2268                  * need to clear the map_pid_to_cmdline. Otherwise we
2269                  * would read the new comm for the old pid.
2270                  */
2271                 pid = savedcmd->map_cmdline_to_pid[idx];
2272                 if (pid != NO_CMDLINE_MAP)
2273                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2274
2275                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2277
2278                 savedcmd->cmdline_idx = idx;
2279         }
2280
2281         set_cmdline(idx, tsk->comm);
2282
2283         arch_spin_unlock(&trace_cmdline_lock);
2284
2285         return 1;
2286 }
2287
2288 static void __trace_find_cmdline(int pid, char comm[])
2289 {
2290         unsigned map;
2291
2292         if (!pid) {
2293                 strcpy(comm, "<idle>");
2294                 return;
2295         }
2296
2297         if (WARN_ON_ONCE(pid < 0)) {
2298                 strcpy(comm, "<XXX>");
2299                 return;
2300         }
2301
2302         if (pid > PID_MAX_DEFAULT) {
2303                 strcpy(comm, "<...>");
2304                 return;
2305         }
2306
2307         map = savedcmd->map_pid_to_cmdline[pid];
2308         if (map != NO_CMDLINE_MAP)
2309                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2310         else
2311                 strcpy(comm, "<...>");
2312 }
2313
2314 void trace_find_cmdline(int pid, char comm[])
2315 {
2316         preempt_disable();
2317         arch_spin_lock(&trace_cmdline_lock);
2318
2319         __trace_find_cmdline(pid, comm);
2320
2321         arch_spin_unlock(&trace_cmdline_lock);
2322         preempt_enable();
2323 }
2324
2325 int trace_find_tgid(int pid)
2326 {
2327         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2328                 return 0;
2329
2330         return tgid_map[pid];
2331 }
2332
2333 static int trace_save_tgid(struct task_struct *tsk)
2334 {
2335         /* treat recording of idle task as a success */
2336         if (!tsk->pid)
2337                 return 1;
2338
2339         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2340                 return 0;
2341
2342         tgid_map[tsk->pid] = tsk->tgid;
2343         return 1;
2344 }
2345
2346 static bool tracing_record_taskinfo_skip(int flags)
2347 {
2348         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2349                 return true;
2350         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2351                 return true;
2352         if (!__this_cpu_read(trace_taskinfo_save))
2353                 return true;
2354         return false;
2355 }
2356
2357 /**
2358  * tracing_record_taskinfo - record the task info of a task
2359  *
2360  * @task:  task to record
2361  * @flags: TRACE_RECORD_CMDLINE for recording comm
2362  *         TRACE_RECORD_TGID for recording tgid
2363  */
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2365 {
2366         bool done;
2367
2368         if (tracing_record_taskinfo_skip(flags))
2369                 return;
2370
2371         /*
2372          * Record as much task information as possible. If some fail, continue
2373          * to try to record the others.
2374          */
2375         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2377
2378         /* If recording any information failed, retry again soon. */
2379         if (!done)
2380                 return;
2381
2382         __this_cpu_write(trace_taskinfo_save, false);
2383 }
2384
2385 /**
2386  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2387  *
2388  * @prev: previous task during sched_switch
2389  * @next: next task during sched_switch
2390  * @flags: TRACE_RECORD_CMDLINE for recording comm
2391  *         TRACE_RECORD_TGID for recording tgid
2392  */
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394                                           struct task_struct *next, int flags)
2395 {
2396         bool done;
2397
2398         if (tracing_record_taskinfo_skip(flags))
2399                 return;
2400
2401         /*
2402          * Record as much task information as possible. If some fail, continue
2403          * to try to record the others.
2404          */
2405         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2409
2410         /* If recording any information failed, retry again soon. */
2411         if (!done)
2412                 return;
2413
2414         __this_cpu_write(trace_taskinfo_save, false);
2415 }
2416
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2419 {
2420         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2421 }
2422
2423 void tracing_record_tgid(struct task_struct *task)
2424 {
2425         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2426 }
2427
2428 /*
2429  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431  * simplifies those functions and keeps them in sync.
2432  */
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2434 {
2435         return trace_seq_has_overflowed(s) ?
2436                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2437 }
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2439
2440 void
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442                              unsigned long flags, int pc)
2443 {
2444         struct task_struct *tsk = current;
2445
2446         entry->preempt_count            = pc & 0xff;
2447         entry->pid                      = (tsk) ? tsk->pid : 0;
2448         entry->type                     = type;
2449         entry->flags =
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2452 #else
2453                 TRACE_FLAG_IRQS_NOSUPPORT |
2454 #endif
2455                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2456                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2460 }
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2462
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2465                           int type,
2466                           unsigned long len,
2467                           unsigned long flags, int pc)
2468 {
2469         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2470 }
2471
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2475
2476 /**
2477  * trace_buffered_event_enable - enable buffering events
2478  *
2479  * When events are being filtered, it is quicker to use a temporary
2480  * buffer to write the event data into if there's a likely chance
2481  * that it will not be committed. The discard of the ring buffer
2482  * is not as fast as committing, and is much slower than copying
2483  * a commit.
2484  *
2485  * When an event is to be filtered, allocate per cpu buffers to
2486  * write the event data into, and if the event is filtered and discarded
2487  * it is simply dropped, otherwise, the entire data is to be committed
2488  * in one shot.
2489  */
2490 void trace_buffered_event_enable(void)
2491 {
2492         struct ring_buffer_event *event;
2493         struct page *page;
2494         int cpu;
2495
2496         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2497
2498         if (trace_buffered_event_ref++)
2499                 return;
2500
2501         for_each_tracing_cpu(cpu) {
2502                 page = alloc_pages_node(cpu_to_node(cpu),
2503                                         GFP_KERNEL | __GFP_NORETRY, 0);
2504                 if (!page)
2505                         goto failed;
2506
2507                 event = page_address(page);
2508                 memset(event, 0, sizeof(*event));
2509
2510                 per_cpu(trace_buffered_event, cpu) = event;
2511
2512                 preempt_disable();
2513                 if (cpu == smp_processor_id() &&
2514                     this_cpu_read(trace_buffered_event) !=
2515                     per_cpu(trace_buffered_event, cpu))
2516                         WARN_ON_ONCE(1);
2517                 preempt_enable();
2518         }
2519
2520         return;
2521  failed:
2522         trace_buffered_event_disable();
2523 }
2524
2525 static void enable_trace_buffered_event(void *data)
2526 {
2527         /* Probably not needed, but do it anyway */
2528         smp_rmb();
2529         this_cpu_dec(trace_buffered_event_cnt);
2530 }
2531
2532 static void disable_trace_buffered_event(void *data)
2533 {
2534         this_cpu_inc(trace_buffered_event_cnt);
2535 }
2536
2537 /**
2538  * trace_buffered_event_disable - disable buffering events
2539  *
2540  * When a filter is removed, it is faster to not use the buffered
2541  * events, and to commit directly into the ring buffer. Free up
2542  * the temp buffers when there are no more users. This requires
2543  * special synchronization with current events.
2544  */
2545 void trace_buffered_event_disable(void)
2546 {
2547         int cpu;
2548
2549         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2550
2551         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2552                 return;
2553
2554         if (--trace_buffered_event_ref)
2555                 return;
2556
2557         preempt_disable();
2558         /* For each CPU, set the buffer as used. */
2559         smp_call_function_many(tracing_buffer_mask,
2560                                disable_trace_buffered_event, NULL, 1);
2561         preempt_enable();
2562
2563         /* Wait for all current users to finish */
2564         synchronize_rcu();
2565
2566         for_each_tracing_cpu(cpu) {
2567                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568                 per_cpu(trace_buffered_event, cpu) = NULL;
2569         }
2570         /*
2571          * Make sure trace_buffered_event is NULL before clearing
2572          * trace_buffered_event_cnt.
2573          */
2574         smp_wmb();
2575
2576         preempt_disable();
2577         /* Do the work on each cpu */
2578         smp_call_function_many(tracing_buffer_mask,
2579                                enable_trace_buffered_event, NULL, 1);
2580         preempt_enable();
2581 }
2582
2583 static struct trace_buffer *temp_buffer;
2584
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587                           struct trace_event_file *trace_file,
2588                           int type, unsigned long len,
2589                           unsigned long flags, int pc)
2590 {
2591         struct ring_buffer_event *entry;
2592         int val;
2593
2594         *current_rb = trace_file->tr->array_buffer.buffer;
2595
2596         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598             (entry = this_cpu_read(trace_buffered_event))) {
2599                 /* Try to use the per cpu buffer first */
2600                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2601                 if (val == 1) {
2602                         trace_event_setup(entry, type, flags, pc);
2603                         entry->array[0] = len;
2604                         return entry;
2605                 }
2606                 this_cpu_dec(trace_buffered_event_cnt);
2607         }
2608
2609         entry = __trace_buffer_lock_reserve(*current_rb,
2610                                             type, len, flags, pc);
2611         /*
2612          * If tracing is off, but we have triggers enabled
2613          * we still need to look at the event data. Use the temp_buffer
2614          * to store the trace event for the tigger to use. It's recusive
2615          * safe and will not be recorded anywhere.
2616          */
2617         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618                 *current_rb = temp_buffer;
2619                 entry = __trace_buffer_lock_reserve(*current_rb,
2620                                                     type, len, flags, pc);
2621         }
2622         return entry;
2623 }
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2625
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2628
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2630 {
2631         struct trace_event_call *event_call;
2632         struct trace_event_file *file;
2633         struct trace_event *event;
2634         unsigned long flags;
2635         struct trace_iterator *iter = tracepoint_print_iter;
2636
2637         /* We should never get here if iter is NULL */
2638         if (WARN_ON_ONCE(!iter))
2639                 return;
2640
2641         event_call = fbuffer->trace_file->event_call;
2642         if (!event_call || !event_call->event.funcs ||
2643             !event_call->event.funcs->trace)
2644                 return;
2645
2646         file = fbuffer->trace_file;
2647         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649              !filter_match_preds(file->filter, fbuffer->entry)))
2650                 return;
2651
2652         event = &fbuffer->trace_file->event_call->event;
2653
2654         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655         trace_seq_init(&iter->seq);
2656         iter->ent = fbuffer->entry;
2657         event_call->event.funcs->trace(iter, 0, event);
2658         trace_seq_putc(&iter->seq, 0);
2659         printk("%s", iter->seq.buffer);
2660
2661         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2662 }
2663
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665                              void __user *buffer, size_t *lenp,
2666                              loff_t *ppos)
2667 {
2668         int save_tracepoint_printk;
2669         int ret;
2670
2671         mutex_lock(&tracepoint_printk_mutex);
2672         save_tracepoint_printk = tracepoint_printk;
2673
2674         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2675
2676         /*
2677          * This will force exiting early, as tracepoint_printk
2678          * is always zero when tracepoint_printk_iter is not allocated
2679          */
2680         if (!tracepoint_print_iter)
2681                 tracepoint_printk = 0;
2682
2683         if (save_tracepoint_printk == tracepoint_printk)
2684                 goto out;
2685
2686         if (tracepoint_printk)
2687                 static_key_enable(&tracepoint_printk_key.key);
2688         else
2689                 static_key_disable(&tracepoint_printk_key.key);
2690
2691  out:
2692         mutex_unlock(&tracepoint_printk_mutex);
2693
2694         return ret;
2695 }
2696
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2698 {
2699         if (static_key_false(&tracepoint_printk_key.key))
2700                 output_printk(fbuffer);
2701
2702         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703                                     fbuffer->event, fbuffer->entry,
2704                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2705 }
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2707
2708 /*
2709  * Skip 3:
2710  *
2711  *   trace_buffer_unlock_commit_regs()
2712  *   trace_event_buffer_commit()
2713  *   trace_event_raw_event_xxx()
2714  */
2715 # define STACK_SKIP 3
2716
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718                                      struct trace_buffer *buffer,
2719                                      struct ring_buffer_event *event,
2720                                      unsigned long flags, int pc,
2721                                      struct pt_regs *regs)
2722 {
2723         __buffer_unlock_commit(buffer, event);
2724
2725         /*
2726          * If regs is not set, then skip the necessary functions.
2727          * Note, we can still get here via blktrace, wakeup tracer
2728          * and mmiotrace, but that's ok if they lose a function or
2729          * two. They are not that meaningful.
2730          */
2731         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732         ftrace_trace_userstack(buffer, flags, pc);
2733 }
2734
2735 /*
2736  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2737  */
2738 void
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740                                    struct ring_buffer_event *event)
2741 {
2742         __buffer_unlock_commit(buffer, event);
2743 }
2744
2745 static void
2746 trace_process_export(struct trace_export *export,
2747                struct ring_buffer_event *event)
2748 {
2749         struct trace_entry *entry;
2750         unsigned int size = 0;
2751
2752         entry = ring_buffer_event_data(event);
2753         size = ring_buffer_event_length(event);
2754         export->write(export, entry, size);
2755 }
2756
2757 static DEFINE_MUTEX(ftrace_export_lock);
2758
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2760
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2762
2763 static inline void ftrace_exports_enable(void)
2764 {
2765         static_branch_enable(&ftrace_exports_enabled);
2766 }
2767
2768 static inline void ftrace_exports_disable(void)
2769 {
2770         static_branch_disable(&ftrace_exports_enabled);
2771 }
2772
2773 static void ftrace_exports(struct ring_buffer_event *event)
2774 {
2775         struct trace_export *export;
2776
2777         preempt_disable_notrace();
2778
2779         export = rcu_dereference_raw_check(ftrace_exports_list);
2780         while (export) {
2781                 trace_process_export(export, event);
2782                 export = rcu_dereference_raw_check(export->next);
2783         }
2784
2785         preempt_enable_notrace();
2786 }
2787
2788 static inline void
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2790 {
2791         rcu_assign_pointer(export->next, *list);
2792         /*
2793          * We are entering export into the list but another
2794          * CPU might be walking that list. We need to make sure
2795          * the export->next pointer is valid before another CPU sees
2796          * the export pointer included into the list.
2797          */
2798         rcu_assign_pointer(*list, export);
2799 }
2800
2801 static inline int
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804         struct trace_export **p;
2805
2806         for (p = list; *p != NULL; p = &(*p)->next)
2807                 if (*p == export)
2808                         break;
2809
2810         if (*p != export)
2811                 return -1;
2812
2813         rcu_assign_pointer(*p, (*p)->next);
2814
2815         return 0;
2816 }
2817
2818 static inline void
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2820 {
2821         if (*list == NULL)
2822                 ftrace_exports_enable();
2823
2824         add_trace_export(list, export);
2825 }
2826
2827 static inline int
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2829 {
2830         int ret;
2831
2832         ret = rm_trace_export(list, export);
2833         if (*list == NULL)
2834                 ftrace_exports_disable();
2835
2836         return ret;
2837 }
2838
2839 int register_ftrace_export(struct trace_export *export)
2840 {
2841         if (WARN_ON_ONCE(!export->write))
2842                 return -1;
2843
2844         mutex_lock(&ftrace_export_lock);
2845
2846         add_ftrace_export(&ftrace_exports_list, export);
2847
2848         mutex_unlock(&ftrace_export_lock);
2849
2850         return 0;
2851 }
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2853
2854 int unregister_ftrace_export(struct trace_export *export)
2855 {
2856         int ret;
2857
2858         mutex_lock(&ftrace_export_lock);
2859
2860         ret = rm_ftrace_export(&ftrace_exports_list, export);
2861
2862         mutex_unlock(&ftrace_export_lock);
2863
2864         return ret;
2865 }
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2867
2868 void
2869 trace_function(struct trace_array *tr,
2870                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2871                int pc)
2872 {
2873         struct trace_event_call *call = &event_function;
2874         struct trace_buffer *buffer = tr->array_buffer.buffer;
2875         struct ring_buffer_event *event;
2876         struct ftrace_entry *entry;
2877
2878         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2879                                             flags, pc);
2880         if (!event)
2881                 return;
2882         entry   = ring_buffer_event_data(event);
2883         entry->ip                       = ip;
2884         entry->parent_ip                = parent_ip;
2885
2886         if (!call_filter_check_discard(call, entry, buffer, event)) {
2887                 if (static_branch_unlikely(&ftrace_exports_enabled))
2888                         ftrace_exports(event);
2889                 __buffer_unlock_commit(buffer, event);
2890         }
2891 }
2892
2893 #ifdef CONFIG_STACKTRACE
2894
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING   4
2897
2898 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2899
2900 struct ftrace_stack {
2901         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2902 };
2903
2904
2905 struct ftrace_stacks {
2906         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2907 };
2908
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2911
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913                                  unsigned long flags,
2914                                  int skip, int pc, struct pt_regs *regs)
2915 {
2916         struct trace_event_call *call = &event_kernel_stack;
2917         struct ring_buffer_event *event;
2918         unsigned int size, nr_entries;
2919         struct ftrace_stack *fstack;
2920         struct stack_entry *entry;
2921         int stackidx;
2922
2923         /*
2924          * Add one, for this function and the call to save_stack_trace()
2925          * If regs is set, then these functions will not be in the way.
2926          */
2927 #ifndef CONFIG_UNWINDER_ORC
2928         if (!regs)
2929                 skip++;
2930 #endif
2931
2932         /*
2933          * Since events can happen in NMIs there's no safe way to
2934          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2935          * or NMI comes in, it will just have to use the default
2936          * FTRACE_STACK_SIZE.
2937          */
2938         preempt_disable_notrace();
2939
2940         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2941
2942         /* This should never happen. If it does, yell once and skip */
2943         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2944                 goto out;
2945
2946         /*
2947          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2948          * interrupt will either see the value pre increment or post
2949          * increment. If the interrupt happens pre increment it will have
2950          * restored the counter when it returns.  We just need a barrier to
2951          * keep gcc from moving things around.
2952          */
2953         barrier();
2954
2955         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2956         size = ARRAY_SIZE(fstack->calls);
2957
2958         if (regs) {
2959                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2960                                                    size, skip);
2961         } else {
2962                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2963         }
2964
2965         size = nr_entries * sizeof(unsigned long);
2966         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2967                                             sizeof(*entry) + size, flags, pc);
2968         if (!event)
2969                 goto out;
2970         entry = ring_buffer_event_data(event);
2971
2972         memcpy(&entry->caller, fstack->calls, size);
2973         entry->size = nr_entries;
2974
2975         if (!call_filter_check_discard(call, entry, buffer, event))
2976                 __buffer_unlock_commit(buffer, event);
2977
2978  out:
2979         /* Again, don't let gcc optimize things here */
2980         barrier();
2981         __this_cpu_dec(ftrace_stack_reserve);
2982         preempt_enable_notrace();
2983
2984 }
2985
2986 static inline void ftrace_trace_stack(struct trace_array *tr,
2987                                       struct trace_buffer *buffer,
2988                                       unsigned long flags,
2989                                       int skip, int pc, struct pt_regs *regs)
2990 {
2991         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2992                 return;
2993
2994         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2995 }
2996
2997 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2998                    int pc)
2999 {
3000         struct trace_buffer *buffer = tr->array_buffer.buffer;
3001
3002         if (rcu_is_watching()) {
3003                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3004                 return;
3005         }
3006
3007         /*
3008          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3009          * but if the above rcu_is_watching() failed, then the NMI
3010          * triggered someplace critical, and rcu_irq_enter() should
3011          * not be called from NMI.
3012          */
3013         if (unlikely(in_nmi()))
3014                 return;
3015
3016         rcu_irq_enter_irqson();
3017         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3018         rcu_irq_exit_irqson();
3019 }
3020
3021 /**
3022  * trace_dump_stack - record a stack back trace in the trace buffer
3023  * @skip: Number of functions to skip (helper handlers)
3024  */
3025 void trace_dump_stack(int skip)
3026 {
3027         unsigned long flags;
3028
3029         if (tracing_disabled || tracing_selftest_running)
3030                 return;
3031
3032         local_save_flags(flags);
3033
3034 #ifndef CONFIG_UNWINDER_ORC
3035         /* Skip 1 to skip this function. */
3036         skip++;
3037 #endif
3038         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3039                              flags, skip, preempt_count(), NULL);
3040 }
3041 EXPORT_SYMBOL_GPL(trace_dump_stack);
3042
3043 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3044 static DEFINE_PER_CPU(int, user_stack_count);
3045
3046 static void
3047 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3048 {
3049         struct trace_event_call *call = &event_user_stack;
3050         struct ring_buffer_event *event;
3051         struct userstack_entry *entry;
3052
3053         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3054                 return;
3055
3056         /*
3057          * NMIs can not handle page faults, even with fix ups.
3058          * The save user stack can (and often does) fault.
3059          */
3060         if (unlikely(in_nmi()))
3061                 return;
3062
3063         /*
3064          * prevent recursion, since the user stack tracing may
3065          * trigger other kernel events.
3066          */
3067         preempt_disable();
3068         if (__this_cpu_read(user_stack_count))
3069                 goto out;
3070
3071         __this_cpu_inc(user_stack_count);
3072
3073         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3074                                             sizeof(*entry), flags, pc);
3075         if (!event)
3076                 goto out_drop_count;
3077         entry   = ring_buffer_event_data(event);
3078
3079         entry->tgid             = current->tgid;
3080         memset(&entry->caller, 0, sizeof(entry->caller));
3081
3082         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3083         if (!call_filter_check_discard(call, entry, buffer, event))
3084                 __buffer_unlock_commit(buffer, event);
3085
3086  out_drop_count:
3087         __this_cpu_dec(user_stack_count);
3088  out:
3089         preempt_enable();
3090 }
3091 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3092 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3093                                    unsigned long flags, int pc)
3094 {
3095 }
3096 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3097
3098 #endif /* CONFIG_STACKTRACE */
3099
3100 /* created for use with alloc_percpu */
3101 struct trace_buffer_struct {
3102         int nesting;
3103         char buffer[4][TRACE_BUF_SIZE];
3104 };
3105
3106 static struct trace_buffer_struct *trace_percpu_buffer;
3107
3108 /*
3109  * Thise allows for lockless recording.  If we're nested too deeply, then
3110  * this returns NULL.
3111  */
3112 static char *get_trace_buf(void)
3113 {
3114         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3115
3116         if (!buffer || buffer->nesting >= 4)
3117                 return NULL;
3118
3119         buffer->nesting++;
3120
3121         /* Interrupts must see nesting incremented before we use the buffer */
3122         barrier();
3123         return &buffer->buffer[buffer->nesting][0];
3124 }
3125
3126 static void put_trace_buf(void)
3127 {
3128         /* Don't let the decrement of nesting leak before this */
3129         barrier();
3130         this_cpu_dec(trace_percpu_buffer->nesting);
3131 }
3132
3133 static int alloc_percpu_trace_buffer(void)
3134 {
3135         struct trace_buffer_struct *buffers;
3136
3137         buffers = alloc_percpu(struct trace_buffer_struct);
3138         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3139                 return -ENOMEM;
3140
3141         trace_percpu_buffer = buffers;
3142         return 0;
3143 }
3144
3145 static int buffers_allocated;
3146
3147 void trace_printk_init_buffers(void)
3148 {
3149         if (buffers_allocated)
3150                 return;
3151
3152         if (alloc_percpu_trace_buffer())
3153                 return;
3154
3155         /* trace_printk() is for debug use only. Don't use it in production. */
3156
3157         pr_warn("\n");
3158         pr_warn("**********************************************************\n");
3159         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3160         pr_warn("**                                                      **\n");
3161         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3162         pr_warn("**                                                      **\n");
3163         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3164         pr_warn("** unsafe for production use.                           **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("** If you see this message and you are not debugging    **\n");
3167         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3168         pr_warn("**                                                      **\n");
3169         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3170         pr_warn("**********************************************************\n");
3171
3172         /* Expand the buffers to set size */
3173         tracing_update_buffers();
3174
3175         buffers_allocated = 1;
3176
3177         /*
3178          * trace_printk_init_buffers() can be called by modules.
3179          * If that happens, then we need to start cmdline recording
3180          * directly here. If the global_trace.buffer is already
3181          * allocated here, then this was called by module code.
3182          */
3183         if (global_trace.array_buffer.buffer)
3184                 tracing_start_cmdline_record();
3185 }
3186 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3187
3188 void trace_printk_start_comm(void)
3189 {
3190         /* Start tracing comms if trace printk is set */
3191         if (!buffers_allocated)
3192                 return;
3193         tracing_start_cmdline_record();
3194 }
3195
3196 static void trace_printk_start_stop_comm(int enabled)
3197 {
3198         if (!buffers_allocated)
3199                 return;
3200
3201         if (enabled)
3202                 tracing_start_cmdline_record();
3203         else
3204                 tracing_stop_cmdline_record();
3205 }
3206
3207 /**
3208  * trace_vbprintk - write binary msg to tracing buffer
3209  * @ip:    The address of the caller
3210  * @fmt:   The string format to write to the buffer
3211  * @args:  Arguments for @fmt
3212  */
3213 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3214 {
3215         struct trace_event_call *call = &event_bprint;
3216         struct ring_buffer_event *event;
3217         struct trace_buffer *buffer;
3218         struct trace_array *tr = &global_trace;
3219         struct bprint_entry *entry;
3220         unsigned long flags;
3221         char *tbuffer;
3222         int len = 0, size, pc;
3223
3224         if (unlikely(tracing_selftest_running || tracing_disabled))
3225                 return 0;
3226
3227         /* Don't pollute graph traces with trace_vprintk internals */
3228         pause_graph_tracing();
3229
3230         pc = preempt_count();
3231         preempt_disable_notrace();
3232
3233         tbuffer = get_trace_buf();
3234         if (!tbuffer) {
3235                 len = 0;
3236                 goto out_nobuffer;
3237         }
3238
3239         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3240
3241         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3242                 goto out_put;
3243
3244         local_save_flags(flags);
3245         size = sizeof(*entry) + sizeof(u32) * len;
3246         buffer = tr->array_buffer.buffer;
3247         ring_buffer_nest_start(buffer);
3248         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3249                                             flags, pc);
3250         if (!event)
3251                 goto out;
3252         entry = ring_buffer_event_data(event);
3253         entry->ip                       = ip;
3254         entry->fmt                      = fmt;
3255
3256         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3257         if (!call_filter_check_discard(call, entry, buffer, event)) {
3258                 __buffer_unlock_commit(buffer, event);
3259                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3260         }
3261
3262 out:
3263         ring_buffer_nest_end(buffer);
3264 out_put:
3265         put_trace_buf();
3266
3267 out_nobuffer:
3268         preempt_enable_notrace();
3269         unpause_graph_tracing();
3270
3271         return len;
3272 }
3273 EXPORT_SYMBOL_GPL(trace_vbprintk);
3274
3275 __printf(3, 0)
3276 static int
3277 __trace_array_vprintk(struct trace_buffer *buffer,
3278                       unsigned long ip, const char *fmt, va_list args)
3279 {
3280         struct trace_event_call *call = &event_print;
3281         struct ring_buffer_event *event;
3282         int len = 0, size, pc;
3283         struct print_entry *entry;
3284         unsigned long flags;
3285         char *tbuffer;
3286
3287         if (tracing_disabled || tracing_selftest_running)
3288                 return 0;
3289
3290         /* Don't pollute graph traces with trace_vprintk internals */
3291         pause_graph_tracing();
3292
3293         pc = preempt_count();
3294         preempt_disable_notrace();
3295
3296
3297         tbuffer = get_trace_buf();
3298         if (!tbuffer) {
3299                 len = 0;
3300                 goto out_nobuffer;
3301         }
3302
3303         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3304
3305         local_save_flags(flags);
3306         size = sizeof(*entry) + len + 1;
3307         ring_buffer_nest_start(buffer);
3308         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3309                                             flags, pc);
3310         if (!event)
3311                 goto out;
3312         entry = ring_buffer_event_data(event);
3313         entry->ip = ip;
3314
3315         memcpy(&entry->buf, tbuffer, len + 1);
3316         if (!call_filter_check_discard(call, entry, buffer, event)) {
3317                 __buffer_unlock_commit(buffer, event);
3318                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3319         }
3320
3321 out:
3322         ring_buffer_nest_end(buffer);
3323         put_trace_buf();
3324
3325 out_nobuffer:
3326         preempt_enable_notrace();
3327         unpause_graph_tracing();
3328
3329         return len;
3330 }
3331
3332 __printf(3, 0)
3333 int trace_array_vprintk(struct trace_array *tr,
3334                         unsigned long ip, const char *fmt, va_list args)
3335 {
3336         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3337 }
3338
3339 __printf(3, 0)
3340 int trace_array_printk(struct trace_array *tr,
3341                        unsigned long ip, const char *fmt, ...)
3342 {
3343         int ret;
3344         va_list ap;
3345
3346         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3347                 return 0;
3348
3349         if (!tr)
3350                 return -ENOENT;
3351
3352         va_start(ap, fmt);
3353         ret = trace_array_vprintk(tr, ip, fmt, ap);
3354         va_end(ap);
3355         return ret;
3356 }
3357 EXPORT_SYMBOL_GPL(trace_array_printk);
3358
3359 __printf(3, 4)
3360 int trace_array_printk_buf(struct trace_buffer *buffer,
3361                            unsigned long ip, const char *fmt, ...)
3362 {
3363         int ret;
3364         va_list ap;
3365
3366         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3367                 return 0;
3368
3369         va_start(ap, fmt);
3370         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3371         va_end(ap);
3372         return ret;
3373 }
3374
3375 __printf(2, 0)
3376 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3377 {
3378         return trace_array_vprintk(&global_trace, ip, fmt, args);
3379 }
3380 EXPORT_SYMBOL_GPL(trace_vprintk);
3381
3382 static void trace_iterator_increment(struct trace_iterator *iter)
3383 {
3384         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3385
3386         iter->idx++;
3387         if (buf_iter)
3388                 ring_buffer_iter_advance(buf_iter);
3389 }
3390
3391 static struct trace_entry *
3392 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3393                 unsigned long *lost_events)
3394 {
3395         struct ring_buffer_event *event;
3396         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3397
3398         if (buf_iter) {
3399                 event = ring_buffer_iter_peek(buf_iter, ts);
3400                 if (lost_events)
3401                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3402                                 (unsigned long)-1 : 0;
3403         } else {
3404                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3405                                          lost_events);
3406         }
3407
3408         if (event) {
3409                 iter->ent_size = ring_buffer_event_length(event);
3410                 return ring_buffer_event_data(event);
3411         }
3412         iter->ent_size = 0;
3413         return NULL;
3414 }
3415
3416 static struct trace_entry *
3417 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3418                   unsigned long *missing_events, u64 *ent_ts)
3419 {
3420         struct trace_buffer *buffer = iter->array_buffer->buffer;
3421         struct trace_entry *ent, *next = NULL;
3422         unsigned long lost_events = 0, next_lost = 0;
3423         int cpu_file = iter->cpu_file;
3424         u64 next_ts = 0, ts;
3425         int next_cpu = -1;
3426         int next_size = 0;
3427         int cpu;
3428
3429         /*
3430          * If we are in a per_cpu trace file, don't bother by iterating over
3431          * all cpu and peek directly.
3432          */
3433         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3434                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3435                         return NULL;
3436                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3437                 if (ent_cpu)
3438                         *ent_cpu = cpu_file;
3439
3440                 return ent;
3441         }
3442
3443         for_each_tracing_cpu(cpu) {
3444
3445                 if (ring_buffer_empty_cpu(buffer, cpu))
3446                         continue;
3447
3448                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3449
3450                 /*
3451                  * Pick the entry with the smallest timestamp:
3452                  */
3453                 if (ent && (!next || ts < next_ts)) {
3454                         next = ent;
3455                         next_cpu = cpu;
3456                         next_ts = ts;
3457                         next_lost = lost_events;
3458                         next_size = iter->ent_size;
3459                 }
3460         }
3461
3462         iter->ent_size = next_size;
3463
3464         if (ent_cpu)
3465                 *ent_cpu = next_cpu;
3466
3467         if (ent_ts)
3468                 *ent_ts = next_ts;
3469
3470         if (missing_events)
3471                 *missing_events = next_lost;
3472
3473         return next;
3474 }
3475
3476 #define STATIC_TEMP_BUF_SIZE    128
3477 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3478
3479 /* Find the next real entry, without updating the iterator itself */
3480 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3481                                           int *ent_cpu, u64 *ent_ts)
3482 {
3483         /* __find_next_entry will reset ent_size */
3484         int ent_size = iter->ent_size;
3485         struct trace_entry *entry;
3486
3487         /*
3488          * If called from ftrace_dump(), then the iter->temp buffer
3489          * will be the static_temp_buf and not created from kmalloc.
3490          * If the entry size is greater than the buffer, we can
3491          * not save it. Just return NULL in that case. This is only
3492          * used to add markers when two consecutive events' time
3493          * stamps have a large delta. See trace_print_lat_context()
3494          */
3495         if (iter->temp == static_temp_buf &&
3496             STATIC_TEMP_BUF_SIZE < ent_size)
3497                 return NULL;
3498
3499         /*
3500          * The __find_next_entry() may call peek_next_entry(), which may
3501          * call ring_buffer_peek() that may make the contents of iter->ent
3502          * undefined. Need to copy iter->ent now.
3503          */
3504         if (iter->ent && iter->ent != iter->temp) {
3505                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3506                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3507                         kfree(iter->temp);
3508                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3509                         if (!iter->temp)
3510                                 return NULL;
3511                 }
3512                 memcpy(iter->temp, iter->ent, iter->ent_size);
3513                 iter->temp_size = iter->ent_size;
3514                 iter->ent = iter->temp;
3515         }
3516         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3517         /* Put back the original ent_size */
3518         iter->ent_size = ent_size;
3519
3520         return entry;
3521 }
3522
3523 /* Find the next real entry, and increment the iterator to the next entry */
3524 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3525 {
3526         iter->ent = __find_next_entry(iter, &iter->cpu,
3527                                       &iter->lost_events, &iter->ts);
3528
3529         if (iter->ent)
3530                 trace_iterator_increment(iter);
3531
3532         return iter->ent ? iter : NULL;
3533 }
3534
3535 static void trace_consume(struct trace_iterator *iter)
3536 {
3537         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3538                             &iter->lost_events);
3539 }
3540
3541 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3542 {
3543         struct trace_iterator *iter = m->private;
3544         int i = (int)*pos;
3545         void *ent;
3546
3547         WARN_ON_ONCE(iter->leftover);
3548
3549         (*pos)++;
3550
3551         /* can't go backwards */
3552         if (iter->idx > i)
3553                 return NULL;
3554
3555         if (iter->idx < 0)
3556                 ent = trace_find_next_entry_inc(iter);
3557         else
3558                 ent = iter;
3559
3560         while (ent && iter->idx < i)
3561                 ent = trace_find_next_entry_inc(iter);
3562
3563         iter->pos = *pos;
3564
3565         return ent;
3566 }
3567
3568 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3569 {
3570         struct ring_buffer_event *event;
3571         struct ring_buffer_iter *buf_iter;
3572         unsigned long entries = 0;
3573         u64 ts;
3574
3575         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3576
3577         buf_iter = trace_buffer_iter(iter, cpu);
3578         if (!buf_iter)
3579                 return;
3580
3581         ring_buffer_iter_reset(buf_iter);
3582
3583         /*
3584          * We could have the case with the max latency tracers
3585          * that a reset never took place on a cpu. This is evident
3586          * by the timestamp being before the start of the buffer.
3587          */
3588         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3589                 if (ts >= iter->array_buffer->time_start)
3590                         break;
3591                 entries++;
3592                 ring_buffer_iter_advance(buf_iter);
3593         }
3594
3595         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3596 }
3597
3598 /*
3599  * The current tracer is copied to avoid a global locking
3600  * all around.
3601  */
3602 static void *s_start(struct seq_file *m, loff_t *pos)
3603 {
3604         struct trace_iterator *iter = m->private;
3605         struct trace_array *tr = iter->tr;
3606         int cpu_file = iter->cpu_file;
3607         void *p = NULL;
3608         loff_t l = 0;
3609         int cpu;
3610
3611         /*
3612          * copy the tracer to avoid using a global lock all around.
3613          * iter->trace is a copy of current_trace, the pointer to the
3614          * name may be used instead of a strcmp(), as iter->trace->name
3615          * will point to the same string as current_trace->name.
3616          */
3617         mutex_lock(&trace_types_lock);
3618         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3619                 *iter->trace = *tr->current_trace;
3620         mutex_unlock(&trace_types_lock);
3621
3622 #ifdef CONFIG_TRACER_MAX_TRACE
3623         if (iter->snapshot && iter->trace->use_max_tr)
3624                 return ERR_PTR(-EBUSY);
3625 #endif
3626
3627         if (!iter->snapshot)
3628                 atomic_inc(&trace_record_taskinfo_disabled);
3629
3630         if (*pos != iter->pos) {
3631                 iter->ent = NULL;
3632                 iter->cpu = 0;
3633                 iter->idx = -1;
3634
3635                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3636                         for_each_tracing_cpu(cpu)
3637                                 tracing_iter_reset(iter, cpu);
3638                 } else
3639                         tracing_iter_reset(iter, cpu_file);
3640
3641                 iter->leftover = 0;
3642                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3643                         ;
3644
3645         } else {
3646                 /*
3647                  * If we overflowed the seq_file before, then we want
3648                  * to just reuse the trace_seq buffer again.
3649                  */
3650                 if (iter->leftover)
3651                         p = iter;
3652                 else {
3653                         l = *pos - 1;
3654                         p = s_next(m, p, &l);
3655                 }
3656         }
3657
3658         trace_event_read_lock();
3659         trace_access_lock(cpu_file);
3660         return p;
3661 }
3662
3663 static void s_stop(struct seq_file *m, void *p)
3664 {
3665         struct trace_iterator *iter = m->private;
3666
3667 #ifdef CONFIG_TRACER_MAX_TRACE
3668         if (iter->snapshot && iter->trace->use_max_tr)
3669                 return;
3670 #endif
3671
3672         if (!iter->snapshot)
3673                 atomic_dec(&trace_record_taskinfo_disabled);
3674
3675         trace_access_unlock(iter->cpu_file);
3676         trace_event_read_unlock();
3677 }
3678
3679 static void
3680 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3681                       unsigned long *entries, int cpu)
3682 {
3683         unsigned long count;
3684
3685         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3686         /*
3687          * If this buffer has skipped entries, then we hold all
3688          * entries for the trace and we need to ignore the
3689          * ones before the time stamp.
3690          */
3691         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3692                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3693                 /* total is the same as the entries */
3694                 *total = count;
3695         } else
3696                 *total = count +
3697                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3698         *entries = count;
3699 }
3700
3701 static void
3702 get_total_entries(struct array_buffer *buf,
3703                   unsigned long *total, unsigned long *entries)
3704 {
3705         unsigned long t, e;
3706         int cpu;
3707
3708         *total = 0;
3709         *entries = 0;
3710
3711         for_each_tracing_cpu(cpu) {
3712                 get_total_entries_cpu(buf, &t, &e, cpu);
3713                 *total += t;
3714                 *entries += e;
3715         }
3716 }
3717
3718 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3719 {
3720         unsigned long total, entries;
3721
3722         if (!tr)
3723                 tr = &global_trace;
3724
3725         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3726
3727         return entries;
3728 }
3729
3730 unsigned long trace_total_entries(struct trace_array *tr)
3731 {
3732         unsigned long total, entries;
3733
3734         if (!tr)
3735                 tr = &global_trace;
3736
3737         get_total_entries(&tr->array_buffer, &total, &entries);
3738
3739         return entries;
3740 }
3741
3742 static void print_lat_help_header(struct seq_file *m)
3743 {
3744         seq_puts(m, "#                  _------=> CPU#            \n"
3745                     "#                 / _-----=> irqs-off        \n"
3746                     "#                | / _----=> need-resched    \n"
3747                     "#                || / _---=> hardirq/softirq \n"
3748                     "#                ||| / _--=> preempt-depth   \n"
3749                     "#                |||| /     delay            \n"
3750                     "#  cmd     pid   ||||| time  |   caller      \n"
3751                     "#     \\   /      |||||  \\    |   /         \n");
3752 }
3753
3754 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3755 {
3756         unsigned long total;
3757         unsigned long entries;
3758
3759         get_total_entries(buf, &total, &entries);
3760         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3761                    entries, total, num_online_cpus());
3762         seq_puts(m, "#\n");
3763 }
3764
3765 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3766                                    unsigned int flags)
3767 {
3768         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3769
3770         print_event_info(buf, m);
3771
3772         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3773         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3774 }
3775
3776 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3777                                        unsigned int flags)
3778 {
3779         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3780         const char *space = "          ";
3781         int prec = tgid ? 10 : 2;
3782
3783         print_event_info(buf, m);
3784
3785         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3786         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3787         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3788         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3789         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3790         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3791         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3792 }
3793
3794 void
3795 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3796 {
3797         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3798         struct array_buffer *buf = iter->array_buffer;
3799         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3800         struct tracer *type = iter->trace;
3801         unsigned long entries;
3802         unsigned long total;
3803         const char *name = "preemption";
3804
3805         name = type->name;
3806
3807         get_total_entries(buf, &total, &entries);
3808
3809         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3810                    name, UTS_RELEASE);
3811         seq_puts(m, "# -----------------------------------"
3812                  "---------------------------------\n");
3813         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3814                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3815                    nsecs_to_usecs(data->saved_latency),
3816                    entries,
3817                    total,
3818                    buf->cpu,
3819 #if defined(CONFIG_PREEMPT_NONE)
3820                    "server",
3821 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3822                    "desktop",
3823 #elif defined(CONFIG_PREEMPT)
3824                    "preempt",
3825 #elif defined(CONFIG_PREEMPT_RT)
3826                    "preempt_rt",
3827 #else
3828                    "unknown",
3829 #endif
3830                    /* These are reserved for later use */
3831                    0, 0, 0, 0);
3832 #ifdef CONFIG_SMP
3833         seq_printf(m, " #P:%d)\n", num_online_cpus());
3834 #else
3835         seq_puts(m, ")\n");
3836 #endif
3837         seq_puts(m, "#    -----------------\n");
3838         seq_printf(m, "#    | task: %.16s-%d "
3839                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3840                    data->comm, data->pid,
3841                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3842                    data->policy, data->rt_priority);
3843         seq_puts(m, "#    -----------------\n");
3844
3845         if (data->critical_start) {
3846                 seq_puts(m, "#  => started at: ");
3847                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3848                 trace_print_seq(m, &iter->seq);
3849                 seq_puts(m, "\n#  => ended at:   ");
3850                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3851                 trace_print_seq(m, &iter->seq);
3852                 seq_puts(m, "\n#\n");
3853         }
3854
3855         seq_puts(m, "#\n");
3856 }
3857
3858 static void test_cpu_buff_start(struct trace_iterator *iter)
3859 {
3860         struct trace_seq *s = &iter->seq;
3861         struct trace_array *tr = iter->tr;
3862
3863         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3864                 return;
3865
3866         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3867                 return;
3868
3869         if (cpumask_available(iter->started) &&
3870             cpumask_test_cpu(iter->cpu, iter->started))
3871                 return;
3872
3873         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3874                 return;
3875
3876         if (cpumask_available(iter->started))
3877                 cpumask_set_cpu(iter->cpu, iter->started);
3878
3879         /* Don't print started cpu buffer for the first entry of the trace */
3880         if (iter->idx > 1)
3881                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3882                                 iter->cpu);
3883 }
3884
3885 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3886 {
3887         struct trace_array *tr = iter->tr;
3888         struct trace_seq *s = &iter->seq;
3889         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3890         struct trace_entry *entry;
3891         struct trace_event *event;
3892
3893         entry = iter->ent;
3894
3895         test_cpu_buff_start(iter);
3896
3897         event = ftrace_find_event(entry->type);
3898
3899         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3900                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3901                         trace_print_lat_context(iter);
3902                 else
3903                         trace_print_context(iter);
3904         }
3905
3906         if (trace_seq_has_overflowed(s))
3907                 return TRACE_TYPE_PARTIAL_LINE;
3908
3909         if (event)
3910                 return event->funcs->trace(iter, sym_flags, event);
3911
3912         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3913
3914         return trace_handle_return(s);
3915 }
3916
3917 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3918 {
3919         struct trace_array *tr = iter->tr;
3920         struct trace_seq *s = &iter->seq;
3921         struct trace_entry *entry;
3922         struct trace_event *event;
3923
3924         entry = iter->ent;
3925
3926         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3927                 trace_seq_printf(s, "%d %d %llu ",
3928                                  entry->pid, iter->cpu, iter->ts);
3929
3930         if (trace_seq_has_overflowed(s))
3931                 return TRACE_TYPE_PARTIAL_LINE;
3932
3933         event = ftrace_find_event(entry->type);
3934         if (event)
3935                 return event->funcs->raw(iter, 0, event);
3936
3937         trace_seq_printf(s, "%d ?\n", entry->type);
3938
3939         return trace_handle_return(s);
3940 }
3941
3942 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3943 {
3944         struct trace_array *tr = iter->tr;
3945         struct trace_seq *s = &iter->seq;
3946         unsigned char newline = '\n';
3947         struct trace_entry *entry;
3948         struct trace_event *event;
3949
3950         entry = iter->ent;
3951
3952         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3953                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3954                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3955                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3956                 if (trace_seq_has_overflowed(s))
3957                         return TRACE_TYPE_PARTIAL_LINE;
3958         }
3959
3960         event = ftrace_find_event(entry->type);
3961         if (event) {
3962                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3963                 if (ret != TRACE_TYPE_HANDLED)
3964                         return ret;
3965         }
3966
3967         SEQ_PUT_FIELD(s, newline);
3968
3969         return trace_handle_return(s);
3970 }
3971
3972 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3973 {
3974         struct trace_array *tr = iter->tr;
3975         struct trace_seq *s = &iter->seq;
3976         struct trace_entry *entry;
3977         struct trace_event *event;
3978
3979         entry = iter->ent;
3980
3981         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3982                 SEQ_PUT_FIELD(s, entry->pid);
3983                 SEQ_PUT_FIELD(s, iter->cpu);
3984                 SEQ_PUT_FIELD(s, iter->ts);
3985                 if (trace_seq_has_overflowed(s))
3986                         return TRACE_TYPE_PARTIAL_LINE;
3987         }
3988
3989         event = ftrace_find_event(entry->type);
3990         return event ? event->funcs->binary(iter, 0, event) :
3991                 TRACE_TYPE_HANDLED;
3992 }
3993
3994 int trace_empty(struct trace_iterator *iter)
3995 {
3996         struct ring_buffer_iter *buf_iter;
3997         int cpu;
3998
3999         /* If we are looking at one CPU buffer, only check that one */
4000         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4001                 cpu = iter->cpu_file;
4002                 buf_iter = trace_buffer_iter(iter, cpu);
4003                 if (buf_iter) {
4004                         if (!ring_buffer_iter_empty(buf_iter))
4005                                 return 0;
4006                 } else {
4007                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4008                                 return 0;
4009                 }
4010                 return 1;
4011         }
4012
4013         for_each_tracing_cpu(cpu) {
4014                 buf_iter = trace_buffer_iter(iter, cpu);
4015                 if (buf_iter) {
4016                         if (!ring_buffer_iter_empty(buf_iter))
4017                                 return 0;
4018                 } else {
4019                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4020                                 return 0;
4021                 }
4022         }
4023
4024         return 1;
4025 }
4026
4027 /*  Called with trace_event_read_lock() held. */
4028 enum print_line_t print_trace_line(struct trace_iterator *iter)
4029 {
4030         struct trace_array *tr = iter->tr;
4031         unsigned long trace_flags = tr->trace_flags;
4032         enum print_line_t ret;
4033
4034         if (iter->lost_events) {
4035                 if (iter->lost_events == (unsigned long)-1)
4036                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4037                                          iter->cpu);
4038                 else
4039                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4040                                          iter->cpu, iter->lost_events);
4041                 if (trace_seq_has_overflowed(&iter->seq))
4042                         return TRACE_TYPE_PARTIAL_LINE;
4043         }
4044
4045         if (iter->trace && iter->trace->print_line) {
4046                 ret = iter->trace->print_line(iter);
4047                 if (ret != TRACE_TYPE_UNHANDLED)
4048                         return ret;
4049         }
4050
4051         if (iter->ent->type == TRACE_BPUTS &&
4052                         trace_flags & TRACE_ITER_PRINTK &&
4053                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4054                 return trace_print_bputs_msg_only(iter);
4055
4056         if (iter->ent->type == TRACE_BPRINT &&
4057                         trace_flags & TRACE_ITER_PRINTK &&
4058                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4059                 return trace_print_bprintk_msg_only(iter);
4060
4061         if (iter->ent->type == TRACE_PRINT &&
4062                         trace_flags & TRACE_ITER_PRINTK &&
4063                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4064                 return trace_print_printk_msg_only(iter);
4065
4066         if (trace_flags & TRACE_ITER_BIN)
4067                 return print_bin_fmt(iter);
4068
4069         if (trace_flags & TRACE_ITER_HEX)
4070                 return print_hex_fmt(iter);
4071
4072         if (trace_flags & TRACE_ITER_RAW)
4073                 return print_raw_fmt(iter);
4074
4075         return print_trace_fmt(iter);
4076 }
4077
4078 void trace_latency_header(struct seq_file *m)
4079 {
4080         struct trace_iterator *iter = m->private;
4081         struct trace_array *tr = iter->tr;
4082
4083         /* print nothing if the buffers are empty */
4084         if (trace_empty(iter))
4085                 return;
4086
4087         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4088                 print_trace_header(m, iter);
4089
4090         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4091                 print_lat_help_header(m);
4092 }
4093
4094 void trace_default_header(struct seq_file *m)
4095 {
4096         struct trace_iterator *iter = m->private;
4097         struct trace_array *tr = iter->tr;
4098         unsigned long trace_flags = tr->trace_flags;
4099
4100         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4101                 return;
4102
4103         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4104                 /* print nothing if the buffers are empty */
4105                 if (trace_empty(iter))
4106                         return;
4107                 print_trace_header(m, iter);
4108                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4109                         print_lat_help_header(m);
4110         } else {
4111                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4112                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4113                                 print_func_help_header_irq(iter->array_buffer,
4114                                                            m, trace_flags);
4115                         else
4116                                 print_func_help_header(iter->array_buffer, m,
4117                                                        trace_flags);
4118                 }
4119         }
4120 }
4121
4122 static void test_ftrace_alive(struct seq_file *m)
4123 {
4124         if (!ftrace_is_dead())
4125                 return;
4126         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4127                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4128 }
4129
4130 #ifdef CONFIG_TRACER_MAX_TRACE
4131 static void show_snapshot_main_help(struct seq_file *m)
4132 {
4133         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4134                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4135                     "#                      Takes a snapshot of the main buffer.\n"
4136                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4137                     "#                      (Doesn't have to be '2' works with any number that\n"
4138                     "#                       is not a '0' or '1')\n");
4139 }
4140
4141 static void show_snapshot_percpu_help(struct seq_file *m)
4142 {
4143         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4144 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4145         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4146                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4147 #else
4148         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4149                     "#                     Must use main snapshot file to allocate.\n");
4150 #endif
4151         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4152                     "#                      (Doesn't have to be '2' works with any number that\n"
4153                     "#                       is not a '0' or '1')\n");
4154 }
4155
4156 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4157 {
4158         if (iter->tr->allocated_snapshot)
4159                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4160         else
4161                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4162
4163         seq_puts(m, "# Snapshot commands:\n");
4164         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4165                 show_snapshot_main_help(m);
4166         else
4167                 show_snapshot_percpu_help(m);
4168 }
4169 #else
4170 /* Should never be called */
4171 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4172 #endif
4173
4174 static int s_show(struct seq_file *m, void *v)
4175 {
4176         struct trace_iterator *iter = v;
4177         int ret;
4178
4179         if (iter->ent == NULL) {
4180                 if (iter->tr) {
4181                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4182                         seq_puts(m, "#\n");
4183                         test_ftrace_alive(m);
4184                 }
4185                 if (iter->snapshot && trace_empty(iter))
4186                         print_snapshot_help(m, iter);
4187                 else if (iter->trace && iter->trace->print_header)
4188                         iter->trace->print_header(m);
4189                 else
4190                         trace_default_header(m);
4191
4192         } else if (iter->leftover) {
4193                 /*
4194                  * If we filled the seq_file buffer earlier, we
4195                  * want to just show it now.
4196                  */
4197                 ret = trace_print_seq(m, &iter->seq);
4198
4199                 /* ret should this time be zero, but you never know */
4200                 iter->leftover = ret;
4201
4202         } else {
4203                 print_trace_line(iter);
4204                 ret = trace_print_seq(m, &iter->seq);
4205                 /*
4206                  * If we overflow the seq_file buffer, then it will
4207                  * ask us for this data again at start up.
4208                  * Use that instead.
4209                  *  ret is 0 if seq_file write succeeded.
4210                  *        -1 otherwise.
4211                  */
4212                 iter->leftover = ret;
4213         }
4214
4215         return 0;
4216 }
4217
4218 /*
4219  * Should be used after trace_array_get(), trace_types_lock
4220  * ensures that i_cdev was already initialized.
4221  */
4222 static inline int tracing_get_cpu(struct inode *inode)
4223 {
4224         if (inode->i_cdev) /* See trace_create_cpu_file() */
4225                 return (long)inode->i_cdev - 1;
4226         return RING_BUFFER_ALL_CPUS;
4227 }
4228
4229 static const struct seq_operations tracer_seq_ops = {
4230         .start          = s_start,
4231         .next           = s_next,
4232         .stop           = s_stop,
4233         .show           = s_show,
4234 };
4235
4236 static struct trace_iterator *
4237 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4238 {
4239         struct trace_array *tr = inode->i_private;
4240         struct trace_iterator *iter;
4241         int cpu;
4242
4243         if (tracing_disabled)
4244                 return ERR_PTR(-ENODEV);
4245
4246         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4247         if (!iter)
4248                 return ERR_PTR(-ENOMEM);
4249
4250         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4251                                     GFP_KERNEL);
4252         if (!iter->buffer_iter)
4253                 goto release;
4254
4255         /*
4256          * trace_find_next_entry() may need to save off iter->ent.
4257          * It will place it into the iter->temp buffer. As most
4258          * events are less than 128, allocate a buffer of that size.
4259          * If one is greater, then trace_find_next_entry() will
4260          * allocate a new buffer to adjust for the bigger iter->ent.
4261          * It's not critical if it fails to get allocated here.
4262          */
4263         iter->temp = kmalloc(128, GFP_KERNEL);
4264         if (iter->temp)
4265                 iter->temp_size = 128;
4266
4267         /*
4268          * We make a copy of the current tracer to avoid concurrent
4269          * changes on it while we are reading.
4270          */
4271         mutex_lock(&trace_types_lock);
4272         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4273         if (!iter->trace)
4274                 goto fail;
4275
4276         *iter->trace = *tr->current_trace;
4277
4278         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4279                 goto fail;
4280
4281         iter->tr = tr;
4282
4283 #ifdef CONFIG_TRACER_MAX_TRACE
4284         /* Currently only the top directory has a snapshot */
4285         if (tr->current_trace->print_max || snapshot)
4286                 iter->array_buffer = &tr->max_buffer;
4287         else
4288 #endif
4289                 iter->array_buffer = &tr->array_buffer;
4290         iter->snapshot = snapshot;
4291         iter->pos = -1;
4292         iter->cpu_file = tracing_get_cpu(inode);
4293         mutex_init(&iter->mutex);
4294
4295         /* Notify the tracer early; before we stop tracing. */
4296         if (iter->trace->open)
4297                 iter->trace->open(iter);
4298
4299         /* Annotate start of buffers if we had overruns */
4300         if (ring_buffer_overruns(iter->array_buffer->buffer))
4301                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4302
4303         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4304         if (trace_clocks[tr->clock_id].in_ns)
4305                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4306
4307         /*
4308          * If pause-on-trace is enabled, then stop the trace while
4309          * dumping, unless this is the "snapshot" file
4310          */
4311         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4312                 tracing_stop_tr(tr);
4313
4314         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4315                 for_each_tracing_cpu(cpu) {
4316                         iter->buffer_iter[cpu] =
4317                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4318                                                          cpu, GFP_KERNEL);
4319                 }
4320                 ring_buffer_read_prepare_sync();
4321                 for_each_tracing_cpu(cpu) {
4322                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4323                         tracing_iter_reset(iter, cpu);
4324                 }
4325         } else {
4326                 cpu = iter->cpu_file;
4327                 iter->buffer_iter[cpu] =
4328                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4329                                                  cpu, GFP_KERNEL);
4330                 ring_buffer_read_prepare_sync();
4331                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4332                 tracing_iter_reset(iter, cpu);
4333         }
4334
4335         mutex_unlock(&trace_types_lock);
4336
4337         return iter;
4338
4339  fail:
4340         mutex_unlock(&trace_types_lock);
4341         kfree(iter->trace);
4342         kfree(iter->temp);
4343         kfree(iter->buffer_iter);
4344 release:
4345         seq_release_private(inode, file);
4346         return ERR_PTR(-ENOMEM);
4347 }
4348
4349 int tracing_open_generic(struct inode *inode, struct file *filp)
4350 {
4351         int ret;
4352
4353         ret = tracing_check_open_get_tr(NULL);
4354         if (ret)
4355                 return ret;
4356
4357         filp->private_data = inode->i_private;
4358         return 0;
4359 }
4360
4361 bool tracing_is_disabled(void)
4362 {
4363         return (tracing_disabled) ? true: false;
4364 }
4365
4366 /*
4367  * Open and update trace_array ref count.
4368  * Must have the current trace_array passed to it.
4369  */
4370 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4371 {
4372         struct trace_array *tr = inode->i_private;
4373         int ret;
4374
4375         ret = tracing_check_open_get_tr(tr);
4376         if (ret)
4377                 return ret;
4378
4379         filp->private_data = inode->i_private;
4380
4381         return 0;
4382 }
4383
4384 static int tracing_release(struct inode *inode, struct file *file)
4385 {
4386         struct trace_array *tr = inode->i_private;
4387         struct seq_file *m = file->private_data;
4388         struct trace_iterator *iter;
4389         int cpu;
4390
4391         if (!(file->f_mode & FMODE_READ)) {
4392                 trace_array_put(tr);
4393                 return 0;
4394         }
4395
4396         /* Writes do not use seq_file */
4397         iter = m->private;
4398         mutex_lock(&trace_types_lock);
4399
4400         for_each_tracing_cpu(cpu) {
4401                 if (iter->buffer_iter[cpu])
4402                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4403         }
4404
4405         if (iter->trace && iter->trace->close)
4406                 iter->trace->close(iter);
4407
4408         if (!iter->snapshot && tr->stop_count)
4409                 /* reenable tracing if it was previously enabled */
4410                 tracing_start_tr(tr);
4411
4412         __trace_array_put(tr);
4413
4414         mutex_unlock(&trace_types_lock);
4415
4416         mutex_destroy(&iter->mutex);
4417         free_cpumask_var(iter->started);
4418         kfree(iter->temp);
4419         kfree(iter->trace);
4420         kfree(iter->buffer_iter);
4421         seq_release_private(inode, file);
4422
4423         return 0;
4424 }
4425
4426 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4427 {
4428         struct trace_array *tr = inode->i_private;
4429
4430         trace_array_put(tr);
4431         return 0;
4432 }
4433
4434 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4435 {
4436         struct trace_array *tr = inode->i_private;
4437
4438         trace_array_put(tr);
4439
4440         return single_release(inode, file);
4441 }
4442
4443 static int tracing_open(struct inode *inode, struct file *file)
4444 {
4445         struct trace_array *tr = inode->i_private;
4446         struct trace_iterator *iter;
4447         int ret;
4448
4449         ret = tracing_check_open_get_tr(tr);
4450         if (ret)
4451                 return ret;
4452
4453         /* If this file was open for write, then erase contents */
4454         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4455                 int cpu = tracing_get_cpu(inode);
4456                 struct array_buffer *trace_buf = &tr->array_buffer;
4457
4458 #ifdef CONFIG_TRACER_MAX_TRACE
4459                 if (tr->current_trace->print_max)
4460                         trace_buf = &tr->max_buffer;
4461 #endif
4462
4463                 if (cpu == RING_BUFFER_ALL_CPUS)
4464                         tracing_reset_online_cpus(trace_buf);
4465                 else
4466                         tracing_reset_cpu(trace_buf, cpu);
4467         }
4468
4469         if (file->f_mode & FMODE_READ) {
4470                 iter = __tracing_open(inode, file, false);
4471                 if (IS_ERR(iter))
4472                         ret = PTR_ERR(iter);
4473                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4474                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4475         }
4476
4477         if (ret < 0)
4478                 trace_array_put(tr);
4479
4480         return ret;
4481 }
4482
4483 /*
4484  * Some tracers are not suitable for instance buffers.
4485  * A tracer is always available for the global array (toplevel)
4486  * or if it explicitly states that it is.
4487  */
4488 static bool
4489 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4490 {
4491         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4492 }
4493
4494 /* Find the next tracer that this trace array may use */
4495 static struct tracer *
4496 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4497 {
4498         while (t && !trace_ok_for_array(t, tr))
4499                 t = t->next;
4500
4501         return t;
4502 }
4503
4504 static void *
4505 t_next(struct seq_file *m, void *v, loff_t *pos)
4506 {
4507         struct trace_array *tr = m->private;
4508         struct tracer *t = v;
4509
4510         (*pos)++;
4511
4512         if (t)
4513                 t = get_tracer_for_array(tr, t->next);
4514
4515         return t;
4516 }
4517
4518 static void *t_start(struct seq_file *m, loff_t *pos)
4519 {
4520         struct trace_array *tr = m->private;
4521         struct tracer *t;
4522         loff_t l = 0;
4523
4524         mutex_lock(&trace_types_lock);
4525
4526         t = get_tracer_for_array(tr, trace_types);
4527         for (; t && l < *pos; t = t_next(m, t, &l))
4528                         ;
4529
4530         return t;
4531 }
4532
4533 static void t_stop(struct seq_file *m, void *p)
4534 {
4535         mutex_unlock(&trace_types_lock);
4536 }
4537
4538 static int t_show(struct seq_file *m, void *v)
4539 {
4540         struct tracer *t = v;
4541
4542         if (!t)
4543                 return 0;
4544
4545         seq_puts(m, t->name);
4546         if (t->next)
4547                 seq_putc(m, ' ');
4548         else
4549                 seq_putc(m, '\n');
4550
4551         return 0;
4552 }
4553
4554 static const struct seq_operations show_traces_seq_ops = {
4555         .start          = t_start,
4556         .next           = t_next,
4557         .stop           = t_stop,
4558         .show           = t_show,
4559 };
4560
4561 static int show_traces_open(struct inode *inode, struct file *file)
4562 {
4563         struct trace_array *tr = inode->i_private;
4564         struct seq_file *m;
4565         int ret;
4566
4567         ret = tracing_check_open_get_tr(tr);
4568         if (ret)
4569                 return ret;
4570
4571         ret = seq_open(file, &show_traces_seq_ops);
4572         if (ret) {
4573                 trace_array_put(tr);
4574                 return ret;
4575         }
4576
4577         m = file->private_data;
4578         m->private = tr;
4579
4580         return 0;
4581 }
4582
4583 static int show_traces_release(struct inode *inode, struct file *file)
4584 {
4585         struct trace_array *tr = inode->i_private;
4586
4587         trace_array_put(tr);
4588         return seq_release(inode, file);
4589 }
4590
4591 static ssize_t
4592 tracing_write_stub(struct file *filp, const char __user *ubuf,
4593                    size_t count, loff_t *ppos)
4594 {
4595         return count;
4596 }
4597
4598 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4599 {
4600         int ret;
4601
4602         if (file->f_mode & FMODE_READ)
4603                 ret = seq_lseek(file, offset, whence);
4604         else
4605                 file->f_pos = ret = 0;
4606
4607         return ret;
4608 }
4609
4610 static const struct file_operations tracing_fops = {
4611         .open           = tracing_open,
4612         .read           = seq_read,
4613         .write          = tracing_write_stub,
4614         .llseek         = tracing_lseek,
4615         .release        = tracing_release,
4616 };
4617
4618 static const struct file_operations show_traces_fops = {
4619         .open           = show_traces_open,
4620         .read           = seq_read,
4621         .llseek         = seq_lseek,
4622         .release        = show_traces_release,
4623 };
4624
4625 static ssize_t
4626 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4627                      size_t count, loff_t *ppos)
4628 {
4629         struct trace_array *tr = file_inode(filp)->i_private;
4630         char *mask_str;
4631         int len;
4632
4633         len = snprintf(NULL, 0, "%*pb\n",
4634                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4635         mask_str = kmalloc(len, GFP_KERNEL);
4636         if (!mask_str)
4637                 return -ENOMEM;
4638
4639         len = snprintf(mask_str, len, "%*pb\n",
4640                        cpumask_pr_args(tr->tracing_cpumask));
4641         if (len >= count) {
4642                 count = -EINVAL;
4643                 goto out_err;
4644         }
4645         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4646
4647 out_err:
4648         kfree(mask_str);
4649
4650         return count;
4651 }
4652
4653 int tracing_set_cpumask(struct trace_array *tr,
4654                         cpumask_var_t tracing_cpumask_new)
4655 {
4656         int cpu;
4657
4658         if (!tr)
4659                 return -EINVAL;
4660
4661         local_irq_disable();
4662         arch_spin_lock(&tr->max_lock);
4663         for_each_tracing_cpu(cpu) {
4664                 /*
4665                  * Increase/decrease the disabled counter if we are
4666                  * about to flip a bit in the cpumask:
4667                  */
4668                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4669                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4670                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4671                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4672                 }
4673                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4674                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4675                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4676                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4677                 }
4678         }
4679         arch_spin_unlock(&tr->max_lock);
4680         local_irq_enable();
4681
4682         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4683
4684         return 0;
4685 }
4686
4687 static ssize_t
4688 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4689                       size_t count, loff_t *ppos)
4690 {
4691         struct trace_array *tr = file_inode(filp)->i_private;
4692         cpumask_var_t tracing_cpumask_new;
4693         int err;
4694
4695         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4696                 return -ENOMEM;
4697
4698         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4699         if (err)
4700                 goto err_free;
4701
4702         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4703         if (err)
4704                 goto err_free;
4705
4706         free_cpumask_var(tracing_cpumask_new);
4707
4708         return count;
4709
4710 err_free:
4711         free_cpumask_var(tracing_cpumask_new);
4712
4713         return err;
4714 }
4715
4716 static const struct file_operations tracing_cpumask_fops = {
4717         .open           = tracing_open_generic_tr,
4718         .read           = tracing_cpumask_read,
4719         .write          = tracing_cpumask_write,
4720         .release        = tracing_release_generic_tr,
4721         .llseek         = generic_file_llseek,
4722 };
4723
4724 static int tracing_trace_options_show(struct seq_file *m, void *v)
4725 {
4726         struct tracer_opt *trace_opts;
4727         struct trace_array *tr = m->private;
4728         u32 tracer_flags;
4729         int i;
4730
4731         mutex_lock(&trace_types_lock);
4732         tracer_flags = tr->current_trace->flags->val;
4733         trace_opts = tr->current_trace->flags->opts;
4734
4735         for (i = 0; trace_options[i]; i++) {
4736                 if (tr->trace_flags & (1 << i))
4737                         seq_printf(m, "%s\n", trace_options[i]);
4738                 else
4739                         seq_printf(m, "no%s\n", trace_options[i]);
4740         }
4741
4742         for (i = 0; trace_opts[i].name; i++) {
4743                 if (tracer_flags & trace_opts[i].bit)
4744                         seq_printf(m, "%s\n", trace_opts[i].name);
4745                 else
4746                         seq_printf(m, "no%s\n", trace_opts[i].name);
4747         }
4748         mutex_unlock(&trace_types_lock);
4749
4750         return 0;
4751 }
4752
4753 static int __set_tracer_option(struct trace_array *tr,
4754                                struct tracer_flags *tracer_flags,
4755                                struct tracer_opt *opts, int neg)
4756 {
4757         struct tracer *trace = tracer_flags->trace;
4758         int ret;
4759
4760         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4761         if (ret)
4762                 return ret;
4763
4764         if (neg)
4765                 tracer_flags->val &= ~opts->bit;
4766         else
4767                 tracer_flags->val |= opts->bit;
4768         return 0;
4769 }
4770
4771 /* Try to assign a tracer specific option */
4772 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4773 {
4774         struct tracer *trace = tr->current_trace;
4775         struct tracer_flags *tracer_flags = trace->flags;
4776         struct tracer_opt *opts = NULL;
4777         int i;
4778
4779         for (i = 0; tracer_flags->opts[i].name; i++) {
4780                 opts = &tracer_flags->opts[i];
4781
4782                 if (strcmp(cmp, opts->name) == 0)
4783                         return __set_tracer_option(tr, trace->flags, opts, neg);
4784         }
4785
4786         return -EINVAL;
4787 }
4788
4789 /* Some tracers require overwrite to stay enabled */
4790 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4791 {
4792         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4793                 return -1;
4794
4795         return 0;
4796 }
4797
4798 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4799 {
4800         if ((mask == TRACE_ITER_RECORD_TGID) ||
4801             (mask == TRACE_ITER_RECORD_CMD))
4802                 lockdep_assert_held(&event_mutex);
4803
4804         /* do nothing if flag is already set */
4805         if (!!(tr->trace_flags & mask) == !!enabled)
4806                 return 0;
4807
4808         /* Give the tracer a chance to approve the change */
4809         if (tr->current_trace->flag_changed)
4810                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4811                         return -EINVAL;
4812
4813         if (enabled)
4814                 tr->trace_flags |= mask;
4815         else
4816                 tr->trace_flags &= ~mask;
4817
4818         if (mask == TRACE_ITER_RECORD_CMD)
4819                 trace_event_enable_cmd_record(enabled);
4820
4821         if (mask == TRACE_ITER_RECORD_TGID) {
4822                 if (!tgid_map)
4823                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4824                                            sizeof(*tgid_map),
4825                                            GFP_KERNEL);
4826                 if (!tgid_map) {
4827                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4828                         return -ENOMEM;
4829                 }
4830
4831                 trace_event_enable_tgid_record(enabled);
4832         }
4833
4834         if (mask == TRACE_ITER_EVENT_FORK)
4835                 trace_event_follow_fork(tr, enabled);
4836
4837         if (mask == TRACE_ITER_FUNC_FORK)
4838                 ftrace_pid_follow_fork(tr, enabled);
4839
4840         if (mask == TRACE_ITER_OVERWRITE) {
4841                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4842 #ifdef CONFIG_TRACER_MAX_TRACE
4843                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4844 #endif
4845         }
4846
4847         if (mask == TRACE_ITER_PRINTK) {
4848                 trace_printk_start_stop_comm(enabled);
4849                 trace_printk_control(enabled);
4850         }
4851
4852         return 0;
4853 }
4854
4855 int trace_set_options(struct trace_array *tr, char *option)
4856 {
4857         char *cmp;
4858         int neg = 0;
4859         int ret;
4860         size_t orig_len = strlen(option);
4861         int len;
4862
4863         cmp = strstrip(option);
4864
4865         len = str_has_prefix(cmp, "no");
4866         if (len)
4867                 neg = 1;
4868
4869         cmp += len;
4870
4871         mutex_lock(&event_mutex);
4872         mutex_lock(&trace_types_lock);
4873
4874         ret = match_string(trace_options, -1, cmp);
4875         /* If no option could be set, test the specific tracer options */
4876         if (ret < 0)
4877                 ret = set_tracer_option(tr, cmp, neg);
4878         else
4879                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4880
4881         mutex_unlock(&trace_types_lock);
4882         mutex_unlock(&event_mutex);
4883
4884         /*
4885          * If the first trailing whitespace is replaced with '\0' by strstrip,
4886          * turn it back into a space.
4887          */
4888         if (orig_len > strlen(option))
4889                 option[strlen(option)] = ' ';
4890
4891         return ret;
4892 }
4893
4894 static void __init apply_trace_boot_options(void)
4895 {
4896         char *buf = trace_boot_options_buf;
4897         char *option;
4898
4899         while (true) {
4900                 option = strsep(&buf, ",");
4901
4902                 if (!option)
4903                         break;
4904
4905                 if (*option)
4906                         trace_set_options(&global_trace, option);
4907
4908                 /* Put back the comma to allow this to be called again */
4909                 if (buf)
4910                         *(buf - 1) = ',';
4911         }
4912 }
4913
4914 static ssize_t
4915 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4916                         size_t cnt, loff_t *ppos)
4917 {
4918         struct seq_file *m = filp->private_data;
4919         struct trace_array *tr = m->private;
4920         char buf[64];
4921         int ret;
4922
4923         if (cnt >= sizeof(buf))
4924                 return -EINVAL;
4925
4926         if (copy_from_user(buf, ubuf, cnt))
4927                 return -EFAULT;
4928
4929         buf[cnt] = 0;
4930
4931         ret = trace_set_options(tr, buf);
4932         if (ret < 0)
4933                 return ret;
4934
4935         *ppos += cnt;
4936
4937         return cnt;
4938 }
4939
4940 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4941 {
4942         struct trace_array *tr = inode->i_private;
4943         int ret;
4944
4945         ret = tracing_check_open_get_tr(tr);
4946         if (ret)
4947                 return ret;
4948
4949         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4950         if (ret < 0)
4951                 trace_array_put(tr);
4952
4953         return ret;
4954 }
4955
4956 static const struct file_operations tracing_iter_fops = {
4957         .open           = tracing_trace_options_open,
4958         .read           = seq_read,
4959         .llseek         = seq_lseek,
4960         .release        = tracing_single_release_tr,
4961         .write          = tracing_trace_options_write,
4962 };
4963
4964 static const char readme_msg[] =
4965         "tracing mini-HOWTO:\n\n"
4966         "# echo 0 > tracing_on : quick way to disable tracing\n"
4967         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4968         " Important files:\n"
4969         "  trace\t\t\t- The static contents of the buffer\n"
4970         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4971         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4972         "  current_tracer\t- function and latency tracers\n"
4973         "  available_tracers\t- list of configured tracers for current_tracer\n"
4974         "  error_log\t- error log for failed commands (that support it)\n"
4975         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4976         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4977         "  trace_clock\t\t-change the clock used to order events\n"
4978         "       local:   Per cpu clock but may not be synced across CPUs\n"
4979         "      global:   Synced across CPUs but slows tracing down.\n"
4980         "     counter:   Not a clock, but just an increment\n"
4981         "      uptime:   Jiffy counter from time of boot\n"
4982         "        perf:   Same clock that perf events use\n"
4983 #ifdef CONFIG_X86_64
4984         "     x86-tsc:   TSC cycle counter\n"
4985 #endif
4986         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4987         "       delta:   Delta difference against a buffer-wide timestamp\n"
4988         "    absolute:   Absolute (standalone) timestamp\n"
4989         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4990         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4991         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4992         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4993         "\t\t\t  Remove sub-buffer with rmdir\n"
4994         "  trace_options\t\t- Set format or modify how tracing happens\n"
4995         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4996         "\t\t\t  option name\n"
4997         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4998 #ifdef CONFIG_DYNAMIC_FTRACE
4999         "\n  available_filter_functions - list of functions that can be filtered on\n"
5000         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5001         "\t\t\t  functions\n"
5002         "\t     accepts: func_full_name or glob-matching-pattern\n"
5003         "\t     modules: Can select a group via module\n"
5004         "\t      Format: :mod:<module-name>\n"
5005         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5006         "\t    triggers: a command to perform when function is hit\n"
5007         "\t      Format: <function>:<trigger>[:count]\n"
5008         "\t     trigger: traceon, traceoff\n"
5009         "\t\t      enable_event:<system>:<event>\n"
5010         "\t\t      disable_event:<system>:<event>\n"
5011 #ifdef CONFIG_STACKTRACE
5012         "\t\t      stacktrace\n"
5013 #endif
5014 #ifdef CONFIG_TRACER_SNAPSHOT
5015         "\t\t      snapshot\n"
5016 #endif
5017         "\t\t      dump\n"
5018         "\t\t      cpudump\n"
5019         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5020         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5021         "\t     The first one will disable tracing every time do_fault is hit\n"
5022         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5023         "\t       The first time do trap is hit and it disables tracing, the\n"
5024         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5025         "\t       the counter will not decrement. It only decrements when the\n"
5026         "\t       trigger did work\n"
5027         "\t     To remove trigger without count:\n"
5028         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5029         "\t     To remove trigger with a count:\n"
5030         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5031         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5032         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5033         "\t    modules: Can select a group via module command :mod:\n"
5034         "\t    Does not accept triggers\n"
5035 #endif /* CONFIG_DYNAMIC_FTRACE */
5036 #ifdef CONFIG_FUNCTION_TRACER
5037         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5038         "\t\t    (function)\n"
5039         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5040         "\t\t    (function)\n"
5041 #endif
5042 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5043         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5044         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5045         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5046 #endif
5047 #ifdef CONFIG_TRACER_SNAPSHOT
5048         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5049         "\t\t\t  snapshot buffer. Read the contents for more\n"
5050         "\t\t\t  information\n"
5051 #endif
5052 #ifdef CONFIG_STACK_TRACER
5053         "  stack_trace\t\t- Shows the max stack trace when active\n"
5054         "  stack_max_size\t- Shows current max stack size that was traced\n"
5055         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5056         "\t\t\t  new trace)\n"
5057 #ifdef CONFIG_DYNAMIC_FTRACE
5058         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5059         "\t\t\t  traces\n"
5060 #endif
5061 #endif /* CONFIG_STACK_TRACER */
5062 #ifdef CONFIG_DYNAMIC_EVENTS
5063         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5064         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5065 #endif
5066 #ifdef CONFIG_KPROBE_EVENTS
5067         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5068         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5069 #endif
5070 #ifdef CONFIG_UPROBE_EVENTS
5071         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5072         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5073 #endif
5074 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5075         "\t  accepts: event-definitions (one definition per line)\n"
5076         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5077         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5078 #ifdef CONFIG_HIST_TRIGGERS
5079         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5080 #endif
5081         "\t           -:[<group>/]<event>\n"
5082 #ifdef CONFIG_KPROBE_EVENTS
5083         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5084   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5085 #endif
5086 #ifdef CONFIG_UPROBE_EVENTS
5087   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5088 #endif
5089         "\t     args: <name>=fetcharg[:type]\n"
5090         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5091 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5092         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5093 #else
5094         "\t           $stack<index>, $stack, $retval, $comm,\n"
5095 #endif
5096         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5097         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5098         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5099         "\t           <type>\\[<array-size>\\]\n"
5100 #ifdef CONFIG_HIST_TRIGGERS
5101         "\t    field: <stype> <name>;\n"
5102         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5103         "\t           [unsigned] char/int/long\n"
5104 #endif
5105 #endif
5106         "  events/\t\t- Directory containing all trace event subsystems:\n"
5107         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5108         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5109         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5110         "\t\t\t  events\n"
5111         "      filter\t\t- If set, only events passing filter are traced\n"
5112         "  events/<system>/<event>/\t- Directory containing control files for\n"
5113         "\t\t\t  <event>:\n"
5114         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5115         "      filter\t\t- If set, only events passing filter are traced\n"
5116         "      trigger\t\t- If set, a command to perform when event is hit\n"
5117         "\t    Format: <trigger>[:count][if <filter>]\n"
5118         "\t   trigger: traceon, traceoff\n"
5119         "\t            enable_event:<system>:<event>\n"
5120         "\t            disable_event:<system>:<event>\n"
5121 #ifdef CONFIG_HIST_TRIGGERS
5122         "\t            enable_hist:<system>:<event>\n"
5123         "\t            disable_hist:<system>:<event>\n"
5124 #endif
5125 #ifdef CONFIG_STACKTRACE
5126         "\t\t    stacktrace\n"
5127 #endif
5128 #ifdef CONFIG_TRACER_SNAPSHOT
5129         "\t\t    snapshot\n"
5130 #endif
5131 #ifdef CONFIG_HIST_TRIGGERS
5132         "\t\t    hist (see below)\n"
5133 #endif
5134         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5135         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5136         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5137         "\t                  events/block/block_unplug/trigger\n"
5138         "\t   The first disables tracing every time block_unplug is hit.\n"
5139         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5140         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5141         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5142         "\t   Like function triggers, the counter is only decremented if it\n"
5143         "\t    enabled or disabled tracing.\n"
5144         "\t   To remove a trigger without a count:\n"
5145         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5146         "\t   To remove a trigger with a count:\n"
5147         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5148         "\t   Filters can be ignored when removing a trigger.\n"
5149 #ifdef CONFIG_HIST_TRIGGERS
5150         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5151         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5152         "\t            [:values=<field1[,field2,...]>]\n"
5153         "\t            [:sort=<field1[,field2,...]>]\n"
5154         "\t            [:size=#entries]\n"
5155         "\t            [:pause][:continue][:clear]\n"
5156         "\t            [:name=histname1]\n"
5157         "\t            [:<handler>.<action>]\n"
5158         "\t            [if <filter>]\n\n"
5159         "\t    When a matching event is hit, an entry is added to a hash\n"
5160         "\t    table using the key(s) and value(s) named, and the value of a\n"
5161         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5162         "\t    correspond to fields in the event's format description.  Keys\n"
5163         "\t    can be any field, or the special string 'stacktrace'.\n"
5164         "\t    Compound keys consisting of up to two fields can be specified\n"
5165         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5166         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5167         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5168         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5169         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5170         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5171         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5172         "\t    its histogram data will be shared with other triggers of the\n"
5173         "\t    same name, and trigger hits will update this common data.\n\n"
5174         "\t    Reading the 'hist' file for the event will dump the hash\n"
5175         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5176         "\t    triggers attached to an event, there will be a table for each\n"
5177         "\t    trigger in the output.  The table displayed for a named\n"
5178         "\t    trigger will be the same as any other instance having the\n"
5179         "\t    same name.  The default format used to display a given field\n"
5180         "\t    can be modified by appending any of the following modifiers\n"
5181         "\t    to the field name, as applicable:\n\n"
5182         "\t            .hex        display a number as a hex value\n"
5183         "\t            .sym        display an address as a symbol\n"
5184         "\t            .sym-offset display an address as a symbol and offset\n"
5185         "\t            .execname   display a common_pid as a program name\n"
5186         "\t            .syscall    display a syscall id as a syscall name\n"
5187         "\t            .log2       display log2 value rather than raw number\n"
5188         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5189         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5190         "\t    trigger or to start a hist trigger but not log any events\n"
5191         "\t    until told to do so.  'continue' can be used to start or\n"
5192         "\t    restart a paused hist trigger.\n\n"
5193         "\t    The 'clear' parameter will clear the contents of a running\n"
5194         "\t    hist trigger and leave its current paused/active state\n"
5195         "\t    unchanged.\n\n"
5196         "\t    The enable_hist and disable_hist triggers can be used to\n"
5197         "\t    have one event conditionally start and stop another event's\n"
5198         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5199         "\t    the enable_event and disable_event triggers.\n\n"
5200         "\t    Hist trigger handlers and actions are executed whenever a\n"
5201         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5202         "\t        <handler>.<action>\n\n"
5203         "\t    The available handlers are:\n\n"
5204         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5205         "\t        onmax(var)               - invoke if var exceeds current max\n"
5206         "\t        onchange(var)            - invoke action if var changes\n\n"
5207         "\t    The available actions are:\n\n"
5208         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5209         "\t        save(field,...)                      - save current event fields\n"
5210 #ifdef CONFIG_TRACER_SNAPSHOT
5211         "\t        snapshot()                           - snapshot the trace buffer\n"
5212 #endif
5213 #endif
5214 ;
5215
5216 static ssize_t
5217 tracing_readme_read(struct file *filp, char __user *ubuf,
5218                        size_t cnt, loff_t *ppos)
5219 {
5220         return simple_read_from_buffer(ubuf, cnt, ppos,
5221                                         readme_msg, strlen(readme_msg));
5222 }
5223
5224 static const struct file_operations tracing_readme_fops = {
5225         .open           = tracing_open_generic,
5226         .read           = tracing_readme_read,
5227         .llseek         = generic_file_llseek,
5228 };
5229
5230 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5231 {
5232         int *ptr = v;
5233
5234         if (*pos || m->count)
5235                 ptr++;
5236
5237         (*pos)++;
5238
5239         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5240                 if (trace_find_tgid(*ptr))
5241                         return ptr;
5242         }
5243
5244         return NULL;
5245 }
5246
5247 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5248 {
5249         void *v;
5250         loff_t l = 0;
5251
5252         if (!tgid_map)
5253                 return NULL;
5254
5255         v = &tgid_map[0];
5256         while (l <= *pos) {
5257                 v = saved_tgids_next(m, v, &l);
5258                 if (!v)
5259                         return NULL;
5260         }
5261
5262         return v;
5263 }
5264
5265 static void saved_tgids_stop(struct seq_file *m, void *v)
5266 {
5267 }
5268
5269 static int saved_tgids_show(struct seq_file *m, void *v)
5270 {
5271         int pid = (int *)v - tgid_map;
5272
5273         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5274         return 0;
5275 }
5276
5277 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5278         .start          = saved_tgids_start,
5279         .stop           = saved_tgids_stop,
5280         .next           = saved_tgids_next,
5281         .show           = saved_tgids_show,
5282 };
5283
5284 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5285 {
5286         int ret;
5287
5288         ret = tracing_check_open_get_tr(NULL);
5289         if (ret)
5290                 return ret;
5291
5292         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5293 }
5294
5295
5296 static const struct file_operations tracing_saved_tgids_fops = {
5297         .open           = tracing_saved_tgids_open,
5298         .read           = seq_read,
5299         .llseek         = seq_lseek,
5300         .release        = seq_release,
5301 };
5302
5303 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5304 {
5305         unsigned int *ptr = v;
5306
5307         if (*pos || m->count)
5308                 ptr++;
5309
5310         (*pos)++;
5311
5312         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5313              ptr++) {
5314                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5315                         continue;
5316
5317                 return ptr;
5318         }
5319
5320         return NULL;
5321 }
5322
5323 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5324 {
5325         void *v;
5326         loff_t l = 0;
5327
5328         preempt_disable();
5329         arch_spin_lock(&trace_cmdline_lock);
5330
5331         v = &savedcmd->map_cmdline_to_pid[0];
5332         while (l <= *pos) {
5333                 v = saved_cmdlines_next(m, v, &l);
5334                 if (!v)
5335                         return NULL;
5336         }
5337
5338         return v;
5339 }
5340
5341 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5342 {
5343         arch_spin_unlock(&trace_cmdline_lock);
5344         preempt_enable();
5345 }
5346
5347 static int saved_cmdlines_show(struct seq_file *m, void *v)
5348 {
5349         char buf[TASK_COMM_LEN];
5350         unsigned int *pid = v;
5351
5352         __trace_find_cmdline(*pid, buf);
5353         seq_printf(m, "%d %s\n", *pid, buf);
5354         return 0;
5355 }
5356
5357 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5358         .start          = saved_cmdlines_start,
5359         .next           = saved_cmdlines_next,
5360         .stop           = saved_cmdlines_stop,
5361         .show           = saved_cmdlines_show,
5362 };
5363
5364 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5365 {
5366         int ret;
5367
5368         ret = tracing_check_open_get_tr(NULL);
5369         if (ret)
5370                 return ret;
5371
5372         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5373 }
5374
5375 static const struct file_operations tracing_saved_cmdlines_fops = {
5376         .open           = tracing_saved_cmdlines_open,
5377         .read           = seq_read,
5378         .llseek         = seq_lseek,
5379         .release        = seq_release,
5380 };
5381
5382 static ssize_t
5383 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5384                                  size_t cnt, loff_t *ppos)
5385 {
5386         char buf[64];
5387         int r;
5388
5389         arch_spin_lock(&trace_cmdline_lock);
5390         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5391         arch_spin_unlock(&trace_cmdline_lock);
5392
5393         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5394 }
5395
5396 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5397 {
5398         kfree(s->saved_cmdlines);
5399         kfree(s->map_cmdline_to_pid);
5400         kfree(s);
5401 }
5402
5403 static int tracing_resize_saved_cmdlines(unsigned int val)
5404 {
5405         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5406
5407         s = kmalloc(sizeof(*s), GFP_KERNEL);
5408         if (!s)
5409                 return -ENOMEM;
5410
5411         if (allocate_cmdlines_buffer(val, s) < 0) {
5412                 kfree(s);
5413                 return -ENOMEM;
5414         }
5415
5416         arch_spin_lock(&trace_cmdline_lock);
5417         savedcmd_temp = savedcmd;
5418         savedcmd = s;
5419         arch_spin_unlock(&trace_cmdline_lock);
5420         free_saved_cmdlines_buffer(savedcmd_temp);
5421
5422         return 0;
5423 }
5424
5425 static ssize_t
5426 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5427                                   size_t cnt, loff_t *ppos)
5428 {
5429         unsigned long val;
5430         int ret;
5431
5432         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5433         if (ret)
5434                 return ret;
5435
5436         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5437         if (!val || val > PID_MAX_DEFAULT)
5438                 return -EINVAL;
5439
5440         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5441         if (ret < 0)
5442                 return ret;
5443
5444         *ppos += cnt;
5445
5446         return cnt;
5447 }
5448
5449 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5450         .open           = tracing_open_generic,
5451         .read           = tracing_saved_cmdlines_size_read,
5452         .write          = tracing_saved_cmdlines_size_write,
5453 };
5454
5455 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5456 static union trace_eval_map_item *
5457 update_eval_map(union trace_eval_map_item *ptr)
5458 {
5459         if (!ptr->map.eval_string) {
5460                 if (ptr->tail.next) {
5461                         ptr = ptr->tail.next;
5462                         /* Set ptr to the next real item (skip head) */
5463                         ptr++;
5464                 } else
5465                         return NULL;
5466         }
5467         return ptr;
5468 }
5469
5470 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5471 {
5472         union trace_eval_map_item *ptr = v;
5473
5474         /*
5475          * Paranoid! If ptr points to end, we don't want to increment past it.
5476          * This really should never happen.
5477          */
5478         (*pos)++;
5479         ptr = update_eval_map(ptr);
5480         if (WARN_ON_ONCE(!ptr))
5481                 return NULL;
5482
5483         ptr++;
5484         ptr = update_eval_map(ptr);
5485
5486         return ptr;
5487 }
5488
5489 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5490 {
5491         union trace_eval_map_item *v;
5492         loff_t l = 0;
5493
5494         mutex_lock(&trace_eval_mutex);
5495
5496         v = trace_eval_maps;
5497         if (v)
5498                 v++;
5499
5500         while (v && l < *pos) {
5501                 v = eval_map_next(m, v, &l);
5502         }
5503
5504         return v;
5505 }
5506
5507 static void eval_map_stop(struct seq_file *m, void *v)
5508 {
5509         mutex_unlock(&trace_eval_mutex);
5510 }
5511
5512 static int eval_map_show(struct seq_file *m, void *v)
5513 {
5514         union trace_eval_map_item *ptr = v;
5515
5516         seq_printf(m, "%s %ld (%s)\n",
5517                    ptr->map.eval_string, ptr->map.eval_value,
5518                    ptr->map.system);
5519
5520         return 0;
5521 }
5522
5523 static const struct seq_operations tracing_eval_map_seq_ops = {
5524         .start          = eval_map_start,
5525         .next           = eval_map_next,
5526         .stop           = eval_map_stop,
5527         .show           = eval_map_show,
5528 };
5529
5530 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5531 {
5532         int ret;
5533
5534         ret = tracing_check_open_get_tr(NULL);
5535         if (ret)
5536                 return ret;
5537
5538         return seq_open(filp, &tracing_eval_map_seq_ops);
5539 }
5540
5541 static const struct file_operations tracing_eval_map_fops = {
5542         .open           = tracing_eval_map_open,
5543         .read           = seq_read,
5544         .llseek         = seq_lseek,
5545         .release        = seq_release,
5546 };
5547
5548 static inline union trace_eval_map_item *
5549 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5550 {
5551         /* Return tail of array given the head */
5552         return ptr + ptr->head.length + 1;
5553 }
5554
5555 static void
5556 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5557                            int len)
5558 {
5559         struct trace_eval_map **stop;
5560         struct trace_eval_map **map;
5561         union trace_eval_map_item *map_array;
5562         union trace_eval_map_item *ptr;
5563
5564         stop = start + len;
5565
5566         /*
5567          * The trace_eval_maps contains the map plus a head and tail item,
5568          * where the head holds the module and length of array, and the
5569          * tail holds a pointer to the next list.
5570          */
5571         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5572         if (!map_array) {
5573                 pr_warn("Unable to allocate trace eval mapping\n");
5574                 return;
5575         }
5576
5577         mutex_lock(&trace_eval_mutex);
5578
5579         if (!trace_eval_maps)
5580                 trace_eval_maps = map_array;
5581         else {
5582                 ptr = trace_eval_maps;
5583                 for (;;) {
5584                         ptr = trace_eval_jmp_to_tail(ptr);
5585                         if (!ptr->tail.next)
5586                                 break;
5587                         ptr = ptr->tail.next;
5588
5589                 }
5590                 ptr->tail.next = map_array;
5591         }
5592         map_array->head.mod = mod;
5593         map_array->head.length = len;
5594         map_array++;
5595
5596         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5597                 map_array->map = **map;
5598                 map_array++;
5599         }
5600         memset(map_array, 0, sizeof(*map_array));
5601
5602         mutex_unlock(&trace_eval_mutex);
5603 }
5604
5605 static void trace_create_eval_file(struct dentry *d_tracer)
5606 {
5607         trace_create_file("eval_map", 0444, d_tracer,
5608                           NULL, &tracing_eval_map_fops);
5609 }
5610
5611 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5612 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5613 static inline void trace_insert_eval_map_file(struct module *mod,
5614                               struct trace_eval_map **start, int len) { }
5615 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5616
5617 static void trace_insert_eval_map(struct module *mod,
5618                                   struct trace_eval_map **start, int len)
5619 {
5620         struct trace_eval_map **map;
5621
5622         if (len <= 0)
5623                 return;
5624
5625         map = start;
5626
5627         trace_event_eval_update(map, len);
5628
5629         trace_insert_eval_map_file(mod, start, len);
5630 }
5631
5632 static ssize_t
5633 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5634                        size_t cnt, loff_t *ppos)
5635 {
5636         struct trace_array *tr = filp->private_data;
5637         char buf[MAX_TRACER_SIZE+2];
5638         int r;
5639
5640         mutex_lock(&trace_types_lock);
5641         r = sprintf(buf, "%s\n", tr->current_trace->name);
5642         mutex_unlock(&trace_types_lock);
5643
5644         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5645 }
5646
5647 int tracer_init(struct tracer *t, struct trace_array *tr)
5648 {
5649         tracing_reset_online_cpus(&tr->array_buffer);
5650         return t->init(tr);
5651 }
5652
5653 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5654 {
5655         int cpu;
5656
5657         for_each_tracing_cpu(cpu)
5658                 per_cpu_ptr(buf->data, cpu)->entries = val;
5659 }
5660
5661 #ifdef CONFIG_TRACER_MAX_TRACE
5662 /* resize @tr's buffer to the size of @size_tr's entries */
5663 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5664                                         struct array_buffer *size_buf, int cpu_id)
5665 {
5666         int cpu, ret = 0;
5667
5668         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5669                 for_each_tracing_cpu(cpu) {
5670                         ret = ring_buffer_resize(trace_buf->buffer,
5671                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5672                         if (ret < 0)
5673                                 break;
5674                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5675                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5676                 }
5677         } else {
5678                 ret = ring_buffer_resize(trace_buf->buffer,
5679                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5680                 if (ret == 0)
5681                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5682                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5683         }
5684
5685         return ret;
5686 }
5687 #endif /* CONFIG_TRACER_MAX_TRACE */
5688
5689 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5690                                         unsigned long size, int cpu)
5691 {
5692         int ret;
5693
5694         /*
5695          * If kernel or user changes the size of the ring buffer
5696          * we use the size that was given, and we can forget about
5697          * expanding it later.
5698          */
5699         ring_buffer_expanded = true;
5700
5701         /* May be called before buffers are initialized */
5702         if (!tr->array_buffer.buffer)
5703                 return 0;
5704
5705         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5706         if (ret < 0)
5707                 return ret;
5708
5709 #ifdef CONFIG_TRACER_MAX_TRACE
5710         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5711             !tr->current_trace->use_max_tr)
5712                 goto out;
5713
5714         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5715         if (ret < 0) {
5716                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5717                                                      &tr->array_buffer, cpu);
5718                 if (r < 0) {
5719                         /*
5720                          * AARGH! We are left with different
5721                          * size max buffer!!!!
5722                          * The max buffer is our "snapshot" buffer.
5723                          * When a tracer needs a snapshot (one of the
5724                          * latency tracers), it swaps the max buffer
5725                          * with the saved snap shot. We succeeded to
5726                          * update the size of the main buffer, but failed to
5727                          * update the size of the max buffer. But when we tried
5728                          * to reset the main buffer to the original size, we
5729                          * failed there too. This is very unlikely to
5730                          * happen, but if it does, warn and kill all
5731                          * tracing.
5732                          */
5733                         WARN_ON(1);
5734                         tracing_disabled = 1;
5735                 }
5736                 return ret;
5737         }
5738
5739         if (cpu == RING_BUFFER_ALL_CPUS)
5740                 set_buffer_entries(&tr->max_buffer, size);
5741         else
5742                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5743
5744  out:
5745 #endif /* CONFIG_TRACER_MAX_TRACE */
5746
5747         if (cpu == RING_BUFFER_ALL_CPUS)
5748                 set_buffer_entries(&tr->array_buffer, size);
5749         else
5750                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5751
5752         return ret;
5753 }
5754
5755 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5756                                   unsigned long size, int cpu_id)
5757 {
5758         int ret = size;
5759
5760         mutex_lock(&trace_types_lock);
5761
5762         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5763                 /* make sure, this cpu is enabled in the mask */
5764                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5765                         ret = -EINVAL;
5766                         goto out;
5767                 }
5768         }
5769
5770         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5771         if (ret < 0)
5772                 ret = -ENOMEM;
5773
5774 out:
5775         mutex_unlock(&trace_types_lock);
5776
5777         return ret;
5778 }
5779
5780
5781 /**
5782  * tracing_update_buffers - used by tracing facility to expand ring buffers
5783  *
5784  * To save on memory when the tracing is never used on a system with it
5785  * configured in. The ring buffers are set to a minimum size. But once
5786  * a user starts to use the tracing facility, then they need to grow
5787  * to their default size.
5788  *
5789  * This function is to be called when a tracer is about to be used.
5790  */
5791 int tracing_update_buffers(void)
5792 {
5793         int ret = 0;
5794
5795         mutex_lock(&trace_types_lock);
5796         if (!ring_buffer_expanded)
5797                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5798                                                 RING_BUFFER_ALL_CPUS);
5799         mutex_unlock(&trace_types_lock);
5800
5801         return ret;
5802 }
5803
5804 struct trace_option_dentry;
5805
5806 static void
5807 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5808
5809 /*
5810  * Used to clear out the tracer before deletion of an instance.
5811  * Must have trace_types_lock held.
5812  */
5813 static void tracing_set_nop(struct trace_array *tr)
5814 {
5815         if (tr->current_trace == &nop_trace)
5816                 return;
5817         
5818         tr->current_trace->enabled--;
5819
5820         if (tr->current_trace->reset)
5821                 tr->current_trace->reset(tr);
5822
5823         tr->current_trace = &nop_trace;
5824 }
5825
5826 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5827 {
5828         /* Only enable if the directory has been created already. */
5829         if (!tr->dir)
5830                 return;
5831
5832         create_trace_option_files(tr, t);
5833 }
5834
5835 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5836 {
5837         struct tracer *t;
5838 #ifdef CONFIG_TRACER_MAX_TRACE
5839         bool had_max_tr;
5840 #endif
5841         int ret = 0;
5842
5843         mutex_lock(&trace_types_lock);
5844
5845         if (!ring_buffer_expanded) {
5846                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5847                                                 RING_BUFFER_ALL_CPUS);
5848                 if (ret < 0)
5849                         goto out;
5850                 ret = 0;
5851         }
5852
5853         for (t = trace_types; t; t = t->next) {
5854                 if (strcmp(t->name, buf) == 0)
5855                         break;
5856         }
5857         if (!t) {
5858                 ret = -EINVAL;
5859                 goto out;
5860         }
5861         if (t == tr->current_trace)
5862                 goto out;
5863
5864 #ifdef CONFIG_TRACER_SNAPSHOT
5865         if (t->use_max_tr) {
5866                 arch_spin_lock(&tr->max_lock);
5867                 if (tr->cond_snapshot)
5868                         ret = -EBUSY;
5869                 arch_spin_unlock(&tr->max_lock);
5870                 if (ret)
5871                         goto out;
5872         }
5873 #endif
5874         /* Some tracers won't work on kernel command line */
5875         if (system_state < SYSTEM_RUNNING && t->noboot) {
5876                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5877                         t->name);
5878                 goto out;
5879         }
5880
5881         /* Some tracers are only allowed for the top level buffer */
5882         if (!trace_ok_for_array(t, tr)) {
5883                 ret = -EINVAL;
5884                 goto out;
5885         }
5886
5887         /* If trace pipe files are being read, we can't change the tracer */
5888         if (tr->current_trace->ref) {
5889                 ret = -EBUSY;
5890                 goto out;
5891         }
5892
5893         trace_branch_disable();
5894
5895         tr->current_trace->enabled--;
5896
5897         if (tr->current_trace->reset)
5898                 tr->current_trace->reset(tr);
5899
5900         /* Current trace needs to be nop_trace before synchronize_rcu */
5901         tr->current_trace = &nop_trace;
5902
5903 #ifdef CONFIG_TRACER_MAX_TRACE
5904         had_max_tr = tr->allocated_snapshot;
5905
5906         if (had_max_tr && !t->use_max_tr) {
5907                 /*
5908                  * We need to make sure that the update_max_tr sees that
5909                  * current_trace changed to nop_trace to keep it from
5910                  * swapping the buffers after we resize it.
5911                  * The update_max_tr is called from interrupts disabled
5912                  * so a synchronized_sched() is sufficient.
5913                  */
5914                 synchronize_rcu();
5915                 free_snapshot(tr);
5916         }
5917 #endif
5918
5919 #ifdef CONFIG_TRACER_MAX_TRACE
5920         if (t->use_max_tr && !had_max_tr) {
5921                 ret = tracing_alloc_snapshot_instance(tr);
5922                 if (ret < 0)
5923                         goto out;
5924         }
5925 #endif
5926
5927         if (t->init) {
5928                 ret = tracer_init(t, tr);
5929                 if (ret)
5930                         goto out;
5931         }
5932
5933         tr->current_trace = t;
5934         tr->current_trace->enabled++;
5935         trace_branch_enable(tr);
5936  out:
5937         mutex_unlock(&trace_types_lock);
5938
5939         return ret;
5940 }
5941
5942 static ssize_t
5943 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5944                         size_t cnt, loff_t *ppos)
5945 {
5946         struct trace_array *tr = filp->private_data;
5947         char buf[MAX_TRACER_SIZE+1];
5948         int i;
5949         size_t ret;
5950         int err;
5951
5952         ret = cnt;
5953
5954         if (cnt > MAX_TRACER_SIZE)
5955                 cnt = MAX_TRACER_SIZE;
5956
5957         if (copy_from_user(buf, ubuf, cnt))
5958                 return -EFAULT;
5959
5960         buf[cnt] = 0;
5961
5962         /* strip ending whitespace. */
5963         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5964                 buf[i] = 0;
5965
5966         err = tracing_set_tracer(tr, buf);
5967         if (err)
5968                 return err;
5969
5970         *ppos += ret;
5971
5972         return ret;
5973 }
5974
5975 static ssize_t
5976 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5977                    size_t cnt, loff_t *ppos)
5978 {
5979         char buf[64];
5980         int r;
5981
5982         r = snprintf(buf, sizeof(buf), "%ld\n",
5983                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5984         if (r > sizeof(buf))
5985                 r = sizeof(buf);
5986         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5987 }
5988
5989 static ssize_t
5990 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5991                     size_t cnt, loff_t *ppos)
5992 {
5993         unsigned long val;
5994         int ret;
5995
5996         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5997         if (ret)
5998                 return ret;
5999
6000         *ptr = val * 1000;
6001
6002         return cnt;
6003 }
6004
6005 static ssize_t
6006 tracing_thresh_read(struct file *filp, char __user *ubuf,
6007                     size_t cnt, loff_t *ppos)
6008 {
6009         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6010 }
6011
6012 static ssize_t
6013 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6014                      size_t cnt, loff_t *ppos)
6015 {
6016         struct trace_array *tr = filp->private_data;
6017         int ret;
6018
6019         mutex_lock(&trace_types_lock);
6020         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6021         if (ret < 0)
6022                 goto out;
6023
6024         if (tr->current_trace->update_thresh) {
6025                 ret = tr->current_trace->update_thresh(tr);
6026                 if (ret < 0)
6027                         goto out;
6028         }
6029
6030         ret = cnt;
6031 out:
6032         mutex_unlock(&trace_types_lock);
6033
6034         return ret;
6035 }
6036
6037 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6038
6039 static ssize_t
6040 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6041                      size_t cnt, loff_t *ppos)
6042 {
6043         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6044 }
6045
6046 static ssize_t
6047 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6048                       size_t cnt, loff_t *ppos)
6049 {
6050         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6051 }
6052
6053 #endif
6054
6055 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6056 {
6057         struct trace_array *tr = inode->i_private;
6058         struct trace_iterator *iter;
6059         int ret;
6060
6061         ret = tracing_check_open_get_tr(tr);
6062         if (ret)
6063                 return ret;
6064
6065         mutex_lock(&trace_types_lock);
6066
6067         /* create a buffer to store the information to pass to userspace */
6068         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6069         if (!iter) {
6070                 ret = -ENOMEM;
6071                 __trace_array_put(tr);
6072                 goto out;
6073         }
6074
6075         trace_seq_init(&iter->seq);
6076         iter->trace = tr->current_trace;
6077
6078         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6079                 ret = -ENOMEM;
6080                 goto fail;
6081         }
6082
6083         /* trace pipe does not show start of buffer */
6084         cpumask_setall(iter->started);
6085
6086         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6087                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6088
6089         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6090         if (trace_clocks[tr->clock_id].in_ns)
6091                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6092
6093         iter->tr = tr;
6094         iter->array_buffer = &tr->array_buffer;
6095         iter->cpu_file = tracing_get_cpu(inode);
6096         mutex_init(&iter->mutex);
6097         filp->private_data = iter;
6098
6099         if (iter->trace->pipe_open)
6100                 iter->trace->pipe_open(iter);
6101
6102         nonseekable_open(inode, filp);
6103
6104         tr->current_trace->ref++;
6105 out:
6106         mutex_unlock(&trace_types_lock);
6107         return ret;
6108
6109 fail:
6110         kfree(iter);
6111         __trace_array_put(tr);
6112         mutex_unlock(&trace_types_lock);
6113         return ret;
6114 }
6115
6116 static int tracing_release_pipe(struct inode *inode, struct file *file)
6117 {
6118         struct trace_iterator *iter = file->private_data;
6119         struct trace_array *tr = inode->i_private;
6120
6121         mutex_lock(&trace_types_lock);
6122
6123         tr->current_trace->ref--;
6124
6125         if (iter->trace->pipe_close)
6126                 iter->trace->pipe_close(iter);
6127
6128         mutex_unlock(&trace_types_lock);
6129
6130         free_cpumask_var(iter->started);
6131         mutex_destroy(&iter->mutex);
6132         kfree(iter);
6133
6134         trace_array_put(tr);
6135
6136         return 0;
6137 }
6138
6139 static __poll_t
6140 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6141 {
6142         struct trace_array *tr = iter->tr;
6143
6144         /* Iterators are static, they should be filled or empty */
6145         if (trace_buffer_iter(iter, iter->cpu_file))
6146                 return EPOLLIN | EPOLLRDNORM;
6147
6148         if (tr->trace_flags & TRACE_ITER_BLOCK)
6149                 /*
6150                  * Always select as readable when in blocking mode
6151                  */
6152                 return EPOLLIN | EPOLLRDNORM;
6153         else
6154                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6155                                              filp, poll_table);
6156 }
6157
6158 static __poll_t
6159 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6160 {
6161         struct trace_iterator *iter = filp->private_data;
6162
6163         return trace_poll(iter, filp, poll_table);
6164 }
6165
6166 /* Must be called with iter->mutex held. */
6167 static int tracing_wait_pipe(struct file *filp)
6168 {
6169         struct trace_iterator *iter = filp->private_data;
6170         int ret;
6171
6172         while (trace_empty(iter)) {
6173
6174                 if ((filp->f_flags & O_NONBLOCK)) {
6175                         return -EAGAIN;
6176                 }
6177
6178                 /*
6179                  * We block until we read something and tracing is disabled.
6180                  * We still block if tracing is disabled, but we have never
6181                  * read anything. This allows a user to cat this file, and
6182                  * then enable tracing. But after we have read something,
6183                  * we give an EOF when tracing is again disabled.
6184                  *
6185                  * iter->pos will be 0 if we haven't read anything.
6186                  */
6187                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6188                         break;
6189
6190                 mutex_unlock(&iter->mutex);
6191
6192                 ret = wait_on_pipe(iter, 0);
6193
6194                 mutex_lock(&iter->mutex);
6195
6196                 if (ret)
6197                         return ret;
6198         }
6199
6200         return 1;
6201 }
6202
6203 /*
6204  * Consumer reader.
6205  */
6206 static ssize_t
6207 tracing_read_pipe(struct file *filp, char __user *ubuf,
6208                   size_t cnt, loff_t *ppos)
6209 {
6210         struct trace_iterator *iter = filp->private_data;
6211         ssize_t sret;
6212
6213         /*
6214          * Avoid more than one consumer on a single file descriptor
6215          * This is just a matter of traces coherency, the ring buffer itself
6216          * is protected.
6217          */
6218         mutex_lock(&iter->mutex);
6219
6220         /* return any leftover data */
6221         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6222         if (sret != -EBUSY)
6223                 goto out;
6224
6225         trace_seq_init(&iter->seq);
6226
6227         if (iter->trace->read) {
6228                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6229                 if (sret)
6230                         goto out;
6231         }
6232
6233 waitagain:
6234         sret = tracing_wait_pipe(filp);
6235         if (sret <= 0)
6236                 goto out;
6237
6238         /* stop when tracing is finished */
6239         if (trace_empty(iter)) {
6240                 sret = 0;
6241                 goto out;
6242         }
6243
6244         if (cnt >= PAGE_SIZE)
6245                 cnt = PAGE_SIZE - 1;
6246
6247         /* reset all but tr, trace, and overruns */
6248         memset(&iter->seq, 0,
6249                sizeof(struct trace_iterator) -
6250                offsetof(struct trace_iterator, seq));
6251         cpumask_clear(iter->started);
6252         trace_seq_init(&iter->seq);
6253         iter->pos = -1;
6254
6255         trace_event_read_lock();
6256         trace_access_lock(iter->cpu_file);
6257         while (trace_find_next_entry_inc(iter) != NULL) {
6258                 enum print_line_t ret;
6259                 int save_len = iter->seq.seq.len;
6260
6261                 ret = print_trace_line(iter);
6262                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6263                         /* don't print partial lines */
6264                         iter->seq.seq.len = save_len;
6265                         break;
6266                 }
6267                 if (ret != TRACE_TYPE_NO_CONSUME)
6268                         trace_consume(iter);
6269
6270                 if (trace_seq_used(&iter->seq) >= cnt)
6271                         break;
6272
6273                 /*
6274                  * Setting the full flag means we reached the trace_seq buffer
6275                  * size and we should leave by partial output condition above.
6276                  * One of the trace_seq_* functions is not used properly.
6277                  */
6278                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6279                           iter->ent->type);
6280         }
6281         trace_access_unlock(iter->cpu_file);
6282         trace_event_read_unlock();
6283
6284         /* Now copy what we have to the user */
6285         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6286         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6287                 trace_seq_init(&iter->seq);
6288
6289         /*
6290          * If there was nothing to send to user, in spite of consuming trace
6291          * entries, go back to wait for more entries.
6292          */
6293         if (sret == -EBUSY)
6294                 goto waitagain;
6295
6296 out:
6297         mutex_unlock(&iter->mutex);
6298
6299         return sret;
6300 }
6301
6302 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6303                                      unsigned int idx)
6304 {
6305         __free_page(spd->pages[idx]);
6306 }
6307
6308 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6309         .confirm                = generic_pipe_buf_confirm,
6310         .release                = generic_pipe_buf_release,
6311         .steal                  = generic_pipe_buf_steal,
6312         .get                    = generic_pipe_buf_get,
6313 };
6314
6315 static size_t
6316 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6317 {
6318         size_t count;
6319         int save_len;
6320         int ret;
6321
6322         /* Seq buffer is page-sized, exactly what we need. */
6323         for (;;) {
6324                 save_len = iter->seq.seq.len;
6325                 ret = print_trace_line(iter);
6326
6327                 if (trace_seq_has_overflowed(&iter->seq)) {
6328                         iter->seq.seq.len = save_len;
6329                         break;
6330                 }
6331
6332                 /*
6333                  * This should not be hit, because it should only
6334                  * be set if the iter->seq overflowed. But check it
6335                  * anyway to be safe.
6336                  */
6337                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6338                         iter->seq.seq.len = save_len;
6339                         break;
6340                 }
6341
6342                 count = trace_seq_used(&iter->seq) - save_len;
6343                 if (rem < count) {
6344                         rem = 0;
6345                         iter->seq.seq.len = save_len;
6346                         break;
6347                 }
6348
6349                 if (ret != TRACE_TYPE_NO_CONSUME)
6350                         trace_consume(iter);
6351                 rem -= count;
6352                 if (!trace_find_next_entry_inc(iter))   {
6353                         rem = 0;
6354                         iter->ent = NULL;
6355                         break;
6356                 }
6357         }
6358
6359         return rem;
6360 }
6361
6362 static ssize_t tracing_splice_read_pipe(struct file *filp,
6363                                         loff_t *ppos,
6364                                         struct pipe_inode_info *pipe,
6365                                         size_t len,
6366                                         unsigned int flags)
6367 {
6368         struct page *pages_def[PIPE_DEF_BUFFERS];
6369         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6370         struct trace_iterator *iter = filp->private_data;
6371         struct splice_pipe_desc spd = {
6372                 .pages          = pages_def,
6373                 .partial        = partial_def,
6374                 .nr_pages       = 0, /* This gets updated below. */
6375                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6376                 .ops            = &tracing_pipe_buf_ops,
6377                 .spd_release    = tracing_spd_release_pipe,
6378         };
6379         ssize_t ret;
6380         size_t rem;
6381         unsigned int i;
6382
6383         if (splice_grow_spd(pipe, &spd))
6384                 return -ENOMEM;
6385
6386         mutex_lock(&iter->mutex);
6387
6388         if (iter->trace->splice_read) {
6389                 ret = iter->trace->splice_read(iter, filp,
6390                                                ppos, pipe, len, flags);
6391                 if (ret)
6392                         goto out_err;
6393         }
6394
6395         ret = tracing_wait_pipe(filp);
6396         if (ret <= 0)
6397                 goto out_err;
6398
6399         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6400                 ret = -EFAULT;
6401                 goto out_err;
6402         }
6403
6404         trace_event_read_lock();
6405         trace_access_lock(iter->cpu_file);
6406
6407         /* Fill as many pages as possible. */
6408         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6409                 spd.pages[i] = alloc_page(GFP_KERNEL);
6410                 if (!spd.pages[i])
6411                         break;
6412
6413                 rem = tracing_fill_pipe_page(rem, iter);
6414
6415                 /* Copy the data into the page, so we can start over. */
6416                 ret = trace_seq_to_buffer(&iter->seq,
6417                                           page_address(spd.pages[i]),
6418                                           trace_seq_used(&iter->seq));
6419                 if (ret < 0) {
6420                         __free_page(spd.pages[i]);
6421                         break;
6422                 }
6423                 spd.partial[i].offset = 0;
6424                 spd.partial[i].len = trace_seq_used(&iter->seq);
6425
6426                 trace_seq_init(&iter->seq);
6427         }
6428
6429         trace_access_unlock(iter->cpu_file);
6430         trace_event_read_unlock();
6431         mutex_unlock(&iter->mutex);
6432
6433         spd.nr_pages = i;
6434
6435         if (i)
6436                 ret = splice_to_pipe(pipe, &spd);
6437         else
6438                 ret = 0;
6439 out:
6440         splice_shrink_spd(&spd);
6441         return ret;
6442
6443 out_err:
6444         mutex_unlock(&iter->mutex);
6445         goto out;
6446 }
6447
6448 static ssize_t
6449 tracing_entries_read(struct file *filp, char __user *ubuf,
6450                      size_t cnt, loff_t *ppos)
6451 {
6452         struct inode *inode = file_inode(filp);
6453         struct trace_array *tr = inode->i_private;
6454         int cpu = tracing_get_cpu(inode);
6455         char buf[64];
6456         int r = 0;
6457         ssize_t ret;
6458
6459         mutex_lock(&trace_types_lock);
6460
6461         if (cpu == RING_BUFFER_ALL_CPUS) {
6462                 int cpu, buf_size_same;
6463                 unsigned long size;
6464
6465                 size = 0;
6466                 buf_size_same = 1;
6467                 /* check if all cpu sizes are same */
6468                 for_each_tracing_cpu(cpu) {
6469                         /* fill in the size from first enabled cpu */
6470                         if (size == 0)
6471                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6472                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6473                                 buf_size_same = 0;
6474                                 break;
6475                         }
6476                 }
6477
6478                 if (buf_size_same) {
6479                         if (!ring_buffer_expanded)
6480                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6481                                             size >> 10,
6482                                             trace_buf_size >> 10);
6483                         else
6484                                 r = sprintf(buf, "%lu\n", size >> 10);
6485                 } else
6486                         r = sprintf(buf, "X\n");
6487         } else
6488                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6489
6490         mutex_unlock(&trace_types_lock);
6491
6492         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6493         return ret;
6494 }
6495
6496 static ssize_t
6497 tracing_entries_write(struct file *filp, const char __user *ubuf,
6498                       size_t cnt, loff_t *ppos)
6499 {
6500         struct inode *inode = file_inode(filp);
6501         struct trace_array *tr = inode->i_private;
6502         unsigned long val;
6503         int ret;
6504
6505         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6506         if (ret)
6507                 return ret;
6508
6509         /* must have at least 1 entry */
6510         if (!val)
6511                 return -EINVAL;
6512
6513         /* value is in KB */
6514         val <<= 10;
6515         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6516         if (ret < 0)
6517                 return ret;
6518
6519         *ppos += cnt;
6520
6521         return cnt;
6522 }
6523
6524 static ssize_t
6525 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6526                                 size_t cnt, loff_t *ppos)
6527 {
6528         struct trace_array *tr = filp->private_data;
6529         char buf[64];
6530         int r, cpu;
6531         unsigned long size = 0, expanded_size = 0;
6532
6533         mutex_lock(&trace_types_lock);
6534         for_each_tracing_cpu(cpu) {
6535                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6536                 if (!ring_buffer_expanded)
6537                         expanded_size += trace_buf_size >> 10;
6538         }
6539         if (ring_buffer_expanded)
6540                 r = sprintf(buf, "%lu\n", size);
6541         else
6542                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6543         mutex_unlock(&trace_types_lock);
6544
6545         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6546 }
6547
6548 static ssize_t
6549 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6550                           size_t cnt, loff_t *ppos)
6551 {
6552         /*
6553          * There is no need to read what the user has written, this function
6554          * is just to make sure that there is no error when "echo" is used
6555          */
6556
6557         *ppos += cnt;
6558
6559         return cnt;
6560 }
6561
6562 static int
6563 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6564 {
6565         struct trace_array *tr = inode->i_private;
6566
6567         /* disable tracing ? */
6568         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6569                 tracer_tracing_off(tr);
6570         /* resize the ring buffer to 0 */
6571         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6572
6573         trace_array_put(tr);
6574
6575         return 0;
6576 }
6577
6578 static ssize_t
6579 tracing_mark_write(struct file *filp, const char __user *ubuf,
6580                                         size_t cnt, loff_t *fpos)
6581 {
6582         struct trace_array *tr = filp->private_data;
6583         struct ring_buffer_event *event;
6584         enum event_trigger_type tt = ETT_NONE;
6585         struct trace_buffer *buffer;
6586         struct print_entry *entry;
6587         unsigned long irq_flags;
6588         ssize_t written;
6589         int size;
6590         int len;
6591
6592 /* Used in tracing_mark_raw_write() as well */
6593 #define FAULTED_STR "<faulted>"
6594 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6595
6596         if (tracing_disabled)
6597                 return -EINVAL;
6598
6599         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6600                 return -EINVAL;
6601
6602         if (cnt > TRACE_BUF_SIZE)
6603                 cnt = TRACE_BUF_SIZE;
6604
6605         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6606
6607         local_save_flags(irq_flags);
6608         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6609
6610         /* If less than "<faulted>", then make sure we can still add that */
6611         if (cnt < FAULTED_SIZE)
6612                 size += FAULTED_SIZE - cnt;
6613
6614         buffer = tr->array_buffer.buffer;
6615         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6616                                             irq_flags, preempt_count());
6617         if (unlikely(!event))
6618                 /* Ring buffer disabled, return as if not open for write */
6619                 return -EBADF;
6620
6621         entry = ring_buffer_event_data(event);
6622         entry->ip = _THIS_IP_;
6623
6624         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6625         if (len) {
6626                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6627                 cnt = FAULTED_SIZE;
6628                 written = -EFAULT;
6629         } else
6630                 written = cnt;
6631         len = cnt;
6632
6633         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6634                 /* do not add \n before testing triggers, but add \0 */
6635                 entry->buf[cnt] = '\0';
6636                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6637         }
6638
6639         if (entry->buf[cnt - 1] != '\n') {
6640                 entry->buf[cnt] = '\n';
6641                 entry->buf[cnt + 1] = '\0';
6642         } else
6643                 entry->buf[cnt] = '\0';
6644
6645         __buffer_unlock_commit(buffer, event);
6646
6647         if (tt)
6648                 event_triggers_post_call(tr->trace_marker_file, tt);
6649
6650         if (written > 0)
6651                 *fpos += written;
6652
6653         return written;
6654 }
6655
6656 /* Limit it for now to 3K (including tag) */
6657 #define RAW_DATA_MAX_SIZE (1024*3)
6658
6659 static ssize_t
6660 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6661                                         size_t cnt, loff_t *fpos)
6662 {
6663         struct trace_array *tr = filp->private_data;
6664         struct ring_buffer_event *event;
6665         struct trace_buffer *buffer;
6666         struct raw_data_entry *entry;
6667         unsigned long irq_flags;
6668         ssize_t written;
6669         int size;
6670         int len;
6671
6672 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6673
6674         if (tracing_disabled)
6675                 return -EINVAL;
6676
6677         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6678                 return -EINVAL;
6679
6680         /* The marker must at least have a tag id */
6681         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6682                 return -EINVAL;
6683
6684         if (cnt > TRACE_BUF_SIZE)
6685                 cnt = TRACE_BUF_SIZE;
6686
6687         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6688
6689         local_save_flags(irq_flags);
6690         size = sizeof(*entry) + cnt;
6691         if (cnt < FAULT_SIZE_ID)
6692                 size += FAULT_SIZE_ID - cnt;
6693
6694         buffer = tr->array_buffer.buffer;
6695         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6696                                             irq_flags, preempt_count());
6697         if (!event)
6698                 /* Ring buffer disabled, return as if not open for write */
6699                 return -EBADF;
6700
6701         entry = ring_buffer_event_data(event);
6702
6703         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6704         if (len) {
6705                 entry->id = -1;
6706                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6707                 written = -EFAULT;
6708         } else
6709                 written = cnt;
6710
6711         __buffer_unlock_commit(buffer, event);
6712
6713         if (written > 0)
6714                 *fpos += written;
6715
6716         return written;
6717 }
6718
6719 static int tracing_clock_show(struct seq_file *m, void *v)
6720 {
6721         struct trace_array *tr = m->private;
6722         int i;
6723
6724         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6725                 seq_printf(m,
6726                         "%s%s%s%s", i ? " " : "",
6727                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6728                         i == tr->clock_id ? "]" : "");
6729         seq_putc(m, '\n');
6730
6731         return 0;
6732 }
6733
6734 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6735 {
6736         int i;
6737
6738         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6739                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6740                         break;
6741         }
6742         if (i == ARRAY_SIZE(trace_clocks))
6743                 return -EINVAL;
6744
6745         mutex_lock(&trace_types_lock);
6746
6747         tr->clock_id = i;
6748
6749         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6750
6751         /*
6752          * New clock may not be consistent with the previous clock.
6753          * Reset the buffer so that it doesn't have incomparable timestamps.
6754          */
6755         tracing_reset_online_cpus(&tr->array_buffer);
6756
6757 #ifdef CONFIG_TRACER_MAX_TRACE
6758         if (tr->max_buffer.buffer)
6759                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6760         tracing_reset_online_cpus(&tr->max_buffer);
6761 #endif
6762
6763         mutex_unlock(&trace_types_lock);
6764
6765         return 0;
6766 }
6767
6768 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6769                                    size_t cnt, loff_t *fpos)
6770 {
6771         struct seq_file *m = filp->private_data;
6772         struct trace_array *tr = m->private;
6773         char buf[64];
6774         const char *clockstr;
6775         int ret;
6776
6777         if (cnt >= sizeof(buf))
6778                 return -EINVAL;
6779
6780         if (copy_from_user(buf, ubuf, cnt))
6781                 return -EFAULT;
6782
6783         buf[cnt] = 0;
6784
6785         clockstr = strstrip(buf);
6786
6787         ret = tracing_set_clock(tr, clockstr);
6788         if (ret)
6789                 return ret;
6790
6791         *fpos += cnt;
6792
6793         return cnt;
6794 }
6795
6796 static int tracing_clock_open(struct inode *inode, struct file *file)
6797 {
6798         struct trace_array *tr = inode->i_private;
6799         int ret;
6800
6801         ret = tracing_check_open_get_tr(tr);
6802         if (ret)
6803                 return ret;
6804
6805         ret = single_open(file, tracing_clock_show, inode->i_private);
6806         if (ret < 0)
6807                 trace_array_put(tr);
6808
6809         return ret;
6810 }
6811
6812 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6813 {
6814         struct trace_array *tr = m->private;
6815
6816         mutex_lock(&trace_types_lock);
6817
6818         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6819                 seq_puts(m, "delta [absolute]\n");
6820         else
6821                 seq_puts(m, "[delta] absolute\n");
6822
6823         mutex_unlock(&trace_types_lock);
6824
6825         return 0;
6826 }
6827
6828 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6829 {
6830         struct trace_array *tr = inode->i_private;
6831         int ret;
6832
6833         ret = tracing_check_open_get_tr(tr);
6834         if (ret)
6835                 return ret;
6836
6837         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6838         if (ret < 0)
6839                 trace_array_put(tr);
6840
6841         return ret;
6842 }
6843
6844 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6845 {
6846         int ret = 0;
6847
6848         mutex_lock(&trace_types_lock);
6849
6850         if (abs && tr->time_stamp_abs_ref++)
6851                 goto out;
6852
6853         if (!abs) {
6854                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6855                         ret = -EINVAL;
6856                         goto out;
6857                 }
6858
6859                 if (--tr->time_stamp_abs_ref)
6860                         goto out;
6861         }
6862
6863         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6864
6865 #ifdef CONFIG_TRACER_MAX_TRACE
6866         if (tr->max_buffer.buffer)
6867                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6868 #endif
6869  out:
6870         mutex_unlock(&trace_types_lock);
6871
6872         return ret;
6873 }
6874
6875 struct ftrace_buffer_info {
6876         struct trace_iterator   iter;
6877         void                    *spare;
6878         unsigned int            spare_cpu;
6879         unsigned int            read;
6880 };
6881
6882 #ifdef CONFIG_TRACER_SNAPSHOT
6883 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6884 {
6885         struct trace_array *tr = inode->i_private;
6886         struct trace_iterator *iter;
6887         struct seq_file *m;
6888         int ret;
6889
6890         ret = tracing_check_open_get_tr(tr);
6891         if (ret)
6892                 return ret;
6893
6894         if (file->f_mode & FMODE_READ) {
6895                 iter = __tracing_open(inode, file, true);
6896                 if (IS_ERR(iter))
6897                         ret = PTR_ERR(iter);
6898         } else {
6899                 /* Writes still need the seq_file to hold the private data */
6900                 ret = -ENOMEM;
6901                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6902                 if (!m)
6903                         goto out;
6904                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6905                 if (!iter) {
6906                         kfree(m);
6907                         goto out;
6908                 }
6909                 ret = 0;
6910
6911                 iter->tr = tr;
6912                 iter->array_buffer = &tr->max_buffer;
6913                 iter->cpu_file = tracing_get_cpu(inode);
6914                 m->private = iter;
6915                 file->private_data = m;
6916         }
6917 out:
6918         if (ret < 0)
6919                 trace_array_put(tr);
6920
6921         return ret;
6922 }
6923
6924 static ssize_t
6925 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6926                        loff_t *ppos)
6927 {
6928         struct seq_file *m = filp->private_data;
6929         struct trace_iterator *iter = m->private;
6930         struct trace_array *tr = iter->tr;
6931         unsigned long val;
6932         int ret;
6933
6934         ret = tracing_update_buffers();
6935         if (ret < 0)
6936                 return ret;
6937
6938         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6939         if (ret)
6940                 return ret;
6941
6942         mutex_lock(&trace_types_lock);
6943
6944         if (tr->current_trace->use_max_tr) {
6945                 ret = -EBUSY;
6946                 goto out;
6947         }
6948
6949         arch_spin_lock(&tr->max_lock);
6950         if (tr->cond_snapshot)
6951                 ret = -EBUSY;
6952         arch_spin_unlock(&tr->max_lock);
6953         if (ret)
6954                 goto out;
6955
6956         switch (val) {
6957         case 0:
6958                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6959                         ret = -EINVAL;
6960                         break;
6961                 }
6962                 if (tr->allocated_snapshot)
6963                         free_snapshot(tr);
6964                 break;
6965         case 1:
6966 /* Only allow per-cpu swap if the ring buffer supports it */
6967 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6968                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6969                         ret = -EINVAL;
6970                         break;
6971                 }
6972 #endif
6973                 if (tr->allocated_snapshot)
6974                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6975                                         &tr->array_buffer, iter->cpu_file);
6976                 else
6977                         ret = tracing_alloc_snapshot_instance(tr);
6978                 if (ret < 0)
6979                         break;
6980                 local_irq_disable();
6981                 /* Now, we're going to swap */
6982                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6983                         update_max_tr(tr, current, smp_processor_id(), NULL);
6984                 else
6985                         update_max_tr_single(tr, current, iter->cpu_file);
6986                 local_irq_enable();
6987                 break;
6988         default:
6989                 if (tr->allocated_snapshot) {
6990                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6991                                 tracing_reset_online_cpus(&tr->max_buffer);
6992                         else
6993                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6994                 }
6995                 break;
6996         }
6997
6998         if (ret >= 0) {
6999                 *ppos += cnt;
7000                 ret = cnt;
7001         }
7002 out:
7003         mutex_unlock(&trace_types_lock);
7004         return ret;
7005 }
7006
7007 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7008 {
7009         struct seq_file *m = file->private_data;
7010         int ret;
7011
7012         ret = tracing_release(inode, file);
7013
7014         if (file->f_mode & FMODE_READ)
7015                 return ret;
7016
7017         /* If write only, the seq_file is just a stub */
7018         if (m)
7019                 kfree(m->private);
7020         kfree(m);
7021
7022         return 0;
7023 }
7024
7025 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7026 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7027                                     size_t count, loff_t *ppos);
7028 static int tracing_buffers_release(struct inode *inode, struct file *file);
7029 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7030                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7031
7032 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7033 {
7034         struct ftrace_buffer_info *info;
7035         int ret;
7036
7037         /* The following checks for tracefs lockdown */
7038         ret = tracing_buffers_open(inode, filp);
7039         if (ret < 0)
7040                 return ret;
7041
7042         info = filp->private_data;
7043
7044         if (info->iter.trace->use_max_tr) {
7045                 tracing_buffers_release(inode, filp);
7046                 return -EBUSY;
7047         }
7048
7049         info->iter.snapshot = true;
7050         info->iter.array_buffer = &info->iter.tr->max_buffer;
7051
7052         return ret;
7053 }
7054
7055 #endif /* CONFIG_TRACER_SNAPSHOT */
7056
7057
7058 static const struct file_operations tracing_thresh_fops = {
7059         .open           = tracing_open_generic,
7060         .read           = tracing_thresh_read,
7061         .write          = tracing_thresh_write,
7062         .llseek         = generic_file_llseek,
7063 };
7064
7065 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7066 static const struct file_operations tracing_max_lat_fops = {
7067         .open           = tracing_open_generic,
7068         .read           = tracing_max_lat_read,
7069         .write          = tracing_max_lat_write,
7070         .llseek         = generic_file_llseek,
7071 };
7072 #endif
7073
7074 static const struct file_operations set_tracer_fops = {
7075         .open           = tracing_open_generic,
7076         .read           = tracing_set_trace_read,
7077         .write          = tracing_set_trace_write,
7078         .llseek         = generic_file_llseek,
7079 };
7080
7081 static const struct file_operations tracing_pipe_fops = {
7082         .open           = tracing_open_pipe,
7083         .poll           = tracing_poll_pipe,
7084         .read           = tracing_read_pipe,
7085         .splice_read    = tracing_splice_read_pipe,
7086         .release        = tracing_release_pipe,
7087         .llseek         = no_llseek,
7088 };
7089
7090 static const struct file_operations tracing_entries_fops = {
7091         .open           = tracing_open_generic_tr,
7092         .read           = tracing_entries_read,
7093         .write          = tracing_entries_write,
7094         .llseek         = generic_file_llseek,
7095         .release        = tracing_release_generic_tr,
7096 };
7097
7098 static const struct file_operations tracing_total_entries_fops = {
7099         .open           = tracing_open_generic_tr,
7100         .read           = tracing_total_entries_read,
7101         .llseek         = generic_file_llseek,
7102         .release        = tracing_release_generic_tr,
7103 };
7104
7105 static const struct file_operations tracing_free_buffer_fops = {
7106         .open           = tracing_open_generic_tr,
7107         .write          = tracing_free_buffer_write,
7108         .release        = tracing_free_buffer_release,
7109 };
7110
7111 static const struct file_operations tracing_mark_fops = {
7112         .open           = tracing_open_generic_tr,
7113         .write          = tracing_mark_write,
7114         .llseek         = generic_file_llseek,
7115         .release        = tracing_release_generic_tr,
7116 };
7117
7118 static const struct file_operations tracing_mark_raw_fops = {
7119         .open           = tracing_open_generic_tr,
7120         .write          = tracing_mark_raw_write,
7121         .llseek         = generic_file_llseek,
7122         .release        = tracing_release_generic_tr,
7123 };
7124
7125 static const struct file_operations trace_clock_fops = {
7126         .open           = tracing_clock_open,
7127         .read           = seq_read,
7128         .llseek         = seq_lseek,
7129         .release        = tracing_single_release_tr,
7130         .write          = tracing_clock_write,
7131 };
7132
7133 static const struct file_operations trace_time_stamp_mode_fops = {
7134         .open           = tracing_time_stamp_mode_open,
7135         .read           = seq_read,
7136         .llseek         = seq_lseek,
7137         .release        = tracing_single_release_tr,
7138 };
7139
7140 #ifdef CONFIG_TRACER_SNAPSHOT
7141 static const struct file_operations snapshot_fops = {
7142         .open           = tracing_snapshot_open,
7143         .read           = seq_read,
7144         .write          = tracing_snapshot_write,
7145         .llseek         = tracing_lseek,
7146         .release        = tracing_snapshot_release,
7147 };
7148
7149 static const struct file_operations snapshot_raw_fops = {
7150         .open           = snapshot_raw_open,
7151         .read           = tracing_buffers_read,
7152         .release        = tracing_buffers_release,
7153         .splice_read    = tracing_buffers_splice_read,
7154         .llseek         = no_llseek,
7155 };
7156
7157 #endif /* CONFIG_TRACER_SNAPSHOT */
7158
7159 #define TRACING_LOG_ERRS_MAX    8
7160 #define TRACING_LOG_LOC_MAX     128
7161
7162 #define CMD_PREFIX "  Command: "
7163
7164 struct err_info {
7165         const char      **errs; /* ptr to loc-specific array of err strings */
7166         u8              type;   /* index into errs -> specific err string */
7167         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7168         u64             ts;
7169 };
7170
7171 struct tracing_log_err {
7172         struct list_head        list;
7173         struct err_info         info;
7174         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7175         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7176 };
7177
7178 static DEFINE_MUTEX(tracing_err_log_lock);
7179
7180 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7181 {
7182         struct tracing_log_err *err;
7183
7184         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7185                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7186                 if (!err)
7187                         err = ERR_PTR(-ENOMEM);
7188                 tr->n_err_log_entries++;
7189
7190                 return err;
7191         }
7192
7193         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7194         list_del(&err->list);
7195
7196         return err;
7197 }
7198
7199 /**
7200  * err_pos - find the position of a string within a command for error careting
7201  * @cmd: The tracing command that caused the error
7202  * @str: The string to position the caret at within @cmd
7203  *
7204  * Finds the position of the first occurence of @str within @cmd.  The
7205  * return value can be passed to tracing_log_err() for caret placement
7206  * within @cmd.
7207  *
7208  * Returns the index within @cmd of the first occurence of @str or 0
7209  * if @str was not found.
7210  */
7211 unsigned int err_pos(char *cmd, const char *str)
7212 {
7213         char *found;
7214
7215         if (WARN_ON(!strlen(cmd)))
7216                 return 0;
7217
7218         found = strstr(cmd, str);
7219         if (found)
7220                 return found - cmd;
7221
7222         return 0;
7223 }
7224
7225 /**
7226  * tracing_log_err - write an error to the tracing error log
7227  * @tr: The associated trace array for the error (NULL for top level array)
7228  * @loc: A string describing where the error occurred
7229  * @cmd: The tracing command that caused the error
7230  * @errs: The array of loc-specific static error strings
7231  * @type: The index into errs[], which produces the specific static err string
7232  * @pos: The position the caret should be placed in the cmd
7233  *
7234  * Writes an error into tracing/error_log of the form:
7235  *
7236  * <loc>: error: <text>
7237  *   Command: <cmd>
7238  *              ^
7239  *
7240  * tracing/error_log is a small log file containing the last
7241  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7242  * unless there has been a tracing error, and the error log can be
7243  * cleared and have its memory freed by writing the empty string in
7244  * truncation mode to it i.e. echo > tracing/error_log.
7245  *
7246  * NOTE: the @errs array along with the @type param are used to
7247  * produce a static error string - this string is not copied and saved
7248  * when the error is logged - only a pointer to it is saved.  See
7249  * existing callers for examples of how static strings are typically
7250  * defined for use with tracing_log_err().
7251  */
7252 void tracing_log_err(struct trace_array *tr,
7253                      const char *loc, const char *cmd,
7254                      const char **errs, u8 type, u8 pos)
7255 {
7256         struct tracing_log_err *err;
7257
7258         if (!tr)
7259                 tr = &global_trace;
7260
7261         mutex_lock(&tracing_err_log_lock);
7262         err = get_tracing_log_err(tr);
7263         if (PTR_ERR(err) == -ENOMEM) {
7264                 mutex_unlock(&tracing_err_log_lock);
7265                 return;
7266         }
7267
7268         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7269         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7270
7271         err->info.errs = errs;
7272         err->info.type = type;
7273         err->info.pos = pos;
7274         err->info.ts = local_clock();
7275
7276         list_add_tail(&err->list, &tr->err_log);
7277         mutex_unlock(&tracing_err_log_lock);
7278 }
7279
7280 static void clear_tracing_err_log(struct trace_array *tr)
7281 {
7282         struct tracing_log_err *err, *next;
7283
7284         mutex_lock(&tracing_err_log_lock);
7285         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7286                 list_del(&err->list);
7287                 kfree(err);
7288         }
7289
7290         tr->n_err_log_entries = 0;
7291         mutex_unlock(&tracing_err_log_lock);
7292 }
7293
7294 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7295 {
7296         struct trace_array *tr = m->private;
7297
7298         mutex_lock(&tracing_err_log_lock);
7299
7300         return seq_list_start(&tr->err_log, *pos);
7301 }
7302
7303 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7304 {
7305         struct trace_array *tr = m->private;
7306
7307         return seq_list_next(v, &tr->err_log, pos);
7308 }
7309
7310 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7311 {
7312         mutex_unlock(&tracing_err_log_lock);
7313 }
7314
7315 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7316 {
7317         u8 i;
7318
7319         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7320                 seq_putc(m, ' ');
7321         for (i = 0; i < pos; i++)
7322                 seq_putc(m, ' ');
7323         seq_puts(m, "^\n");
7324 }
7325
7326 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7327 {
7328         struct tracing_log_err *err = v;
7329
7330         if (err) {
7331                 const char *err_text = err->info.errs[err->info.type];
7332                 u64 sec = err->info.ts;
7333                 u32 nsec;
7334
7335                 nsec = do_div(sec, NSEC_PER_SEC);
7336                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7337                            err->loc, err_text);
7338                 seq_printf(m, "%s", err->cmd);
7339                 tracing_err_log_show_pos(m, err->info.pos);
7340         }
7341
7342         return 0;
7343 }
7344
7345 static const struct seq_operations tracing_err_log_seq_ops = {
7346         .start  = tracing_err_log_seq_start,
7347         .next   = tracing_err_log_seq_next,
7348         .stop   = tracing_err_log_seq_stop,
7349         .show   = tracing_err_log_seq_show
7350 };
7351
7352 static int tracing_err_log_open(struct inode *inode, struct file *file)
7353 {
7354         struct trace_array *tr = inode->i_private;
7355         int ret = 0;
7356
7357         ret = tracing_check_open_get_tr(tr);
7358         if (ret)
7359                 return ret;
7360
7361         /* If this file was opened for write, then erase contents */
7362         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7363                 clear_tracing_err_log(tr);
7364
7365         if (file->f_mode & FMODE_READ) {
7366                 ret = seq_open(file, &tracing_err_log_seq_ops);
7367                 if (!ret) {
7368                         struct seq_file *m = file->private_data;
7369                         m->private = tr;
7370                 } else {
7371                         trace_array_put(tr);
7372                 }
7373         }
7374         return ret;
7375 }
7376
7377 static ssize_t tracing_err_log_write(struct file *file,
7378                                      const char __user *buffer,
7379                                      size_t count, loff_t *ppos)
7380 {
7381         return count;
7382 }
7383
7384 static int tracing_err_log_release(struct inode *inode, struct file *file)
7385 {
7386         struct trace_array *tr = inode->i_private;
7387
7388         trace_array_put(tr);
7389
7390         if (file->f_mode & FMODE_READ)
7391                 seq_release(inode, file);
7392
7393         return 0;
7394 }
7395
7396 static const struct file_operations tracing_err_log_fops = {
7397         .open           = tracing_err_log_open,
7398         .write          = tracing_err_log_write,
7399         .read           = seq_read,
7400         .llseek         = seq_lseek,
7401         .release        = tracing_err_log_release,
7402 };
7403
7404 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7405 {
7406         struct trace_array *tr = inode->i_private;
7407         struct ftrace_buffer_info *info;
7408         int ret;
7409
7410         ret = tracing_check_open_get_tr(tr);
7411         if (ret)
7412                 return ret;
7413
7414         info = kzalloc(sizeof(*info), GFP_KERNEL);
7415         if (!info) {
7416                 trace_array_put(tr);
7417                 return -ENOMEM;
7418         }
7419
7420         mutex_lock(&trace_types_lock);
7421
7422         info->iter.tr           = tr;
7423         info->iter.cpu_file     = tracing_get_cpu(inode);
7424         info->iter.trace        = tr->current_trace;
7425         info->iter.array_buffer = &tr->array_buffer;
7426         info->spare             = NULL;
7427         /* Force reading ring buffer for first read */
7428         info->read              = (unsigned int)-1;
7429
7430         filp->private_data = info;
7431
7432         tr->current_trace->ref++;
7433
7434         mutex_unlock(&trace_types_lock);
7435
7436         ret = nonseekable_open(inode, filp);
7437         if (ret < 0)
7438                 trace_array_put(tr);
7439
7440         return ret;
7441 }
7442
7443 static __poll_t
7444 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7445 {
7446         struct ftrace_buffer_info *info = filp->private_data;
7447         struct trace_iterator *iter = &info->iter;
7448
7449         return trace_poll(iter, filp, poll_table);
7450 }
7451
7452 static ssize_t
7453 tracing_buffers_read(struct file *filp, char __user *ubuf,
7454                      size_t count, loff_t *ppos)
7455 {
7456         struct ftrace_buffer_info *info = filp->private_data;
7457         struct trace_iterator *iter = &info->iter;
7458         ssize_t ret = 0;
7459         ssize_t size;
7460
7461         if (!count)
7462                 return 0;
7463
7464 #ifdef CONFIG_TRACER_MAX_TRACE
7465         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7466                 return -EBUSY;
7467 #endif
7468
7469         if (!info->spare) {
7470                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7471                                                           iter->cpu_file);
7472                 if (IS_ERR(info->spare)) {
7473                         ret = PTR_ERR(info->spare);
7474                         info->spare = NULL;
7475                 } else {
7476                         info->spare_cpu = iter->cpu_file;
7477                 }
7478         }
7479         if (!info->spare)
7480                 return ret;
7481
7482         /* Do we have previous read data to read? */
7483         if (info->read < PAGE_SIZE)
7484                 goto read;
7485
7486  again:
7487         trace_access_lock(iter->cpu_file);
7488         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7489                                     &info->spare,
7490                                     count,
7491                                     iter->cpu_file, 0);
7492         trace_access_unlock(iter->cpu_file);
7493
7494         if (ret < 0) {
7495                 if (trace_empty(iter)) {
7496                         if ((filp->f_flags & O_NONBLOCK))
7497                                 return -EAGAIN;
7498
7499                         ret = wait_on_pipe(iter, 0);
7500                         if (ret)
7501                                 return ret;
7502
7503                         goto again;
7504                 }
7505                 return 0;
7506         }
7507
7508         info->read = 0;
7509  read:
7510         size = PAGE_SIZE - info->read;
7511         if (size > count)
7512                 size = count;
7513
7514         ret = copy_to_user(ubuf, info->spare + info->read, size);
7515         if (ret == size)
7516                 return -EFAULT;
7517
7518         size -= ret;
7519
7520         *ppos += size;
7521         info->read += size;
7522
7523         return size;
7524 }
7525
7526 static int tracing_buffers_release(struct inode *inode, struct file *file)
7527 {
7528         struct ftrace_buffer_info *info = file->private_data;
7529         struct trace_iterator *iter = &info->iter;
7530
7531         mutex_lock(&trace_types_lock);
7532
7533         iter->tr->current_trace->ref--;
7534
7535         __trace_array_put(iter->tr);
7536
7537         if (info->spare)
7538                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7539                                            info->spare_cpu, info->spare);
7540         kfree(info);
7541
7542         mutex_unlock(&trace_types_lock);
7543
7544         return 0;
7545 }
7546
7547 struct buffer_ref {
7548         struct trace_buffer     *buffer;
7549         void                    *page;
7550         int                     cpu;
7551         refcount_t              refcount;
7552 };
7553
7554 static void buffer_ref_release(struct buffer_ref *ref)
7555 {
7556         if (!refcount_dec_and_test(&ref->refcount))
7557                 return;
7558         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7559         kfree(ref);
7560 }
7561
7562 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7563                                     struct pipe_buffer *buf)
7564 {
7565         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7566
7567         buffer_ref_release(ref);
7568         buf->private = 0;
7569 }
7570
7571 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7572                                 struct pipe_buffer *buf)
7573 {
7574         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7575
7576         if (refcount_read(&ref->refcount) > INT_MAX/2)
7577                 return false;
7578
7579         refcount_inc(&ref->refcount);
7580         return true;
7581 }
7582
7583 /* Pipe buffer operations for a buffer. */
7584 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7585         .confirm                = generic_pipe_buf_confirm,
7586         .release                = buffer_pipe_buf_release,
7587         .steal                  = generic_pipe_buf_nosteal,
7588         .get                    = buffer_pipe_buf_get,
7589 };
7590
7591 /*
7592  * Callback from splice_to_pipe(), if we need to release some pages
7593  * at the end of the spd in case we error'ed out in filling the pipe.
7594  */
7595 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7596 {
7597         struct buffer_ref *ref =
7598                 (struct buffer_ref *)spd->partial[i].private;
7599
7600         buffer_ref_release(ref);
7601         spd->partial[i].private = 0;
7602 }
7603
7604 static ssize_t
7605 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7606                             struct pipe_inode_info *pipe, size_t len,
7607                             unsigned int flags)
7608 {
7609         struct ftrace_buffer_info *info = file->private_data;
7610         struct trace_iterator *iter = &info->iter;
7611         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7612         struct page *pages_def[PIPE_DEF_BUFFERS];
7613         struct splice_pipe_desc spd = {
7614                 .pages          = pages_def,
7615                 .partial        = partial_def,
7616                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7617                 .ops            = &buffer_pipe_buf_ops,
7618                 .spd_release    = buffer_spd_release,
7619         };
7620         struct buffer_ref *ref;
7621         int entries, i;
7622         ssize_t ret = 0;
7623
7624 #ifdef CONFIG_TRACER_MAX_TRACE
7625         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7626                 return -EBUSY;
7627 #endif
7628
7629         if (*ppos & (PAGE_SIZE - 1))
7630                 return -EINVAL;
7631
7632         if (len & (PAGE_SIZE - 1)) {
7633                 if (len < PAGE_SIZE)
7634                         return -EINVAL;
7635                 len &= PAGE_MASK;
7636         }
7637
7638         if (splice_grow_spd(pipe, &spd))
7639                 return -ENOMEM;
7640
7641  again:
7642         trace_access_lock(iter->cpu_file);
7643         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7644
7645         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7646                 struct page *page;
7647                 int r;
7648
7649                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7650                 if (!ref) {
7651                         ret = -ENOMEM;
7652                         break;
7653                 }
7654
7655                 refcount_set(&ref->refcount, 1);
7656                 ref->buffer = iter->array_buffer->buffer;
7657                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7658                 if (IS_ERR(ref->page)) {
7659                         ret = PTR_ERR(ref->page);
7660                         ref->page = NULL;
7661                         kfree(ref);
7662                         break;
7663                 }
7664                 ref->cpu = iter->cpu_file;
7665
7666                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7667                                           len, iter->cpu_file, 1);
7668                 if (r < 0) {
7669                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7670                                                    ref->page);
7671                         kfree(ref);
7672                         break;
7673                 }
7674
7675                 page = virt_to_page(ref->page);
7676
7677                 spd.pages[i] = page;
7678                 spd.partial[i].len = PAGE_SIZE;
7679                 spd.partial[i].offset = 0;
7680                 spd.partial[i].private = (unsigned long)ref;
7681                 spd.nr_pages++;
7682                 *ppos += PAGE_SIZE;
7683
7684                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7685         }
7686
7687         trace_access_unlock(iter->cpu_file);
7688         spd.nr_pages = i;
7689
7690         /* did we read anything? */
7691         if (!spd.nr_pages) {
7692                 if (ret)
7693                         goto out;
7694
7695                 ret = -EAGAIN;
7696                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7697                         goto out;
7698
7699                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7700                 if (ret)
7701                         goto out;
7702
7703                 goto again;
7704         }
7705
7706         ret = splice_to_pipe(pipe, &spd);
7707 out:
7708         splice_shrink_spd(&spd);
7709
7710         return ret;
7711 }
7712
7713 static const struct file_operations tracing_buffers_fops = {
7714         .open           = tracing_buffers_open,
7715         .read           = tracing_buffers_read,
7716         .poll           = tracing_buffers_poll,
7717         .release        = tracing_buffers_release,
7718         .splice_read    = tracing_buffers_splice_read,
7719         .llseek         = no_llseek,
7720 };
7721
7722 static ssize_t
7723 tracing_stats_read(struct file *filp, char __user *ubuf,
7724                    size_t count, loff_t *ppos)
7725 {
7726         struct inode *inode = file_inode(filp);
7727         struct trace_array *tr = inode->i_private;
7728         struct array_buffer *trace_buf = &tr->array_buffer;
7729         int cpu = tracing_get_cpu(inode);
7730         struct trace_seq *s;
7731         unsigned long cnt;
7732         unsigned long long t;
7733         unsigned long usec_rem;
7734
7735         s = kmalloc(sizeof(*s), GFP_KERNEL);
7736         if (!s)
7737                 return -ENOMEM;
7738
7739         trace_seq_init(s);
7740
7741         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7742         trace_seq_printf(s, "entries: %ld\n", cnt);
7743
7744         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7745         trace_seq_printf(s, "overrun: %ld\n", cnt);
7746
7747         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7748         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7749
7750         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7751         trace_seq_printf(s, "bytes: %ld\n", cnt);
7752
7753         if (trace_clocks[tr->clock_id].in_ns) {
7754                 /* local or global for trace_clock */
7755                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7756                 usec_rem = do_div(t, USEC_PER_SEC);
7757                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7758                                                                 t, usec_rem);
7759
7760                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7761                 usec_rem = do_div(t, USEC_PER_SEC);
7762                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7763         } else {
7764                 /* counter or tsc mode for trace_clock */
7765                 trace_seq_printf(s, "oldest event ts: %llu\n",
7766                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7767
7768                 trace_seq_printf(s, "now ts: %llu\n",
7769                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7770         }
7771
7772         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7773         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7774
7775         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7776         trace_seq_printf(s, "read events: %ld\n", cnt);
7777
7778         count = simple_read_from_buffer(ubuf, count, ppos,
7779                                         s->buffer, trace_seq_used(s));
7780
7781         kfree(s);
7782
7783         return count;
7784 }
7785
7786 static const struct file_operations tracing_stats_fops = {
7787         .open           = tracing_open_generic_tr,
7788         .read           = tracing_stats_read,
7789         .llseek         = generic_file_llseek,
7790         .release        = tracing_release_generic_tr,
7791 };
7792
7793 #ifdef CONFIG_DYNAMIC_FTRACE
7794
7795 static ssize_t
7796 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7797                   size_t cnt, loff_t *ppos)
7798 {
7799         ssize_t ret;
7800         char *buf;
7801         int r;
7802
7803         /* 256 should be plenty to hold the amount needed */
7804         buf = kmalloc(256, GFP_KERNEL);
7805         if (!buf)
7806                 return -ENOMEM;
7807
7808         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7809                       ftrace_update_tot_cnt,
7810                       ftrace_number_of_pages,
7811                       ftrace_number_of_groups);
7812
7813         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7814         kfree(buf);
7815         return ret;
7816 }
7817
7818 static const struct file_operations tracing_dyn_info_fops = {
7819         .open           = tracing_open_generic,
7820         .read           = tracing_read_dyn_info,
7821         .llseek         = generic_file_llseek,
7822 };
7823 #endif /* CONFIG_DYNAMIC_FTRACE */
7824
7825 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7826 static void
7827 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7828                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7829                 void *data)
7830 {
7831         tracing_snapshot_instance(tr);
7832 }
7833
7834 static void
7835 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7836                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7837                       void *data)
7838 {
7839         struct ftrace_func_mapper *mapper = data;
7840         long *count = NULL;
7841
7842         if (mapper)
7843                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7844
7845         if (count) {
7846
7847                 if (*count <= 0)
7848                         return;
7849
7850                 (*count)--;
7851         }
7852
7853         tracing_snapshot_instance(tr);
7854 }
7855
7856 static int
7857 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7858                       struct ftrace_probe_ops *ops, void *data)
7859 {
7860         struct ftrace_func_mapper *mapper = data;
7861         long *count = NULL;
7862
7863         seq_printf(m, "%ps:", (void *)ip);
7864
7865         seq_puts(m, "snapshot");
7866
7867         if (mapper)
7868                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7869
7870         if (count)
7871                 seq_printf(m, ":count=%ld\n", *count);
7872         else
7873                 seq_puts(m, ":unlimited\n");
7874
7875         return 0;
7876 }
7877
7878 static int
7879 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7880                      unsigned long ip, void *init_data, void **data)
7881 {
7882         struct ftrace_func_mapper *mapper = *data;
7883
7884         if (!mapper) {
7885                 mapper = allocate_ftrace_func_mapper();
7886                 if (!mapper)
7887                         return -ENOMEM;
7888                 *data = mapper;
7889         }
7890
7891         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7892 }
7893
7894 static void
7895 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7896                      unsigned long ip, void *data)
7897 {
7898         struct ftrace_func_mapper *mapper = data;
7899
7900         if (!ip) {
7901                 if (!mapper)
7902                         return;
7903                 free_ftrace_func_mapper(mapper, NULL);
7904                 return;
7905         }
7906
7907         ftrace_func_mapper_remove_ip(mapper, ip);
7908 }
7909
7910 static struct ftrace_probe_ops snapshot_probe_ops = {
7911         .func                   = ftrace_snapshot,
7912         .print                  = ftrace_snapshot_print,
7913 };
7914
7915 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7916         .func                   = ftrace_count_snapshot,
7917         .print                  = ftrace_snapshot_print,
7918         .init                   = ftrace_snapshot_init,
7919         .free                   = ftrace_snapshot_free,
7920 };
7921
7922 static int
7923 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7924                                char *glob, char *cmd, char *param, int enable)
7925 {
7926         struct ftrace_probe_ops *ops;
7927         void *count = (void *)-1;
7928         char *number;
7929         int ret;
7930
7931         if (!tr)
7932                 return -ENODEV;
7933
7934         /* hash funcs only work with set_ftrace_filter */
7935         if (!enable)
7936                 return -EINVAL;
7937
7938         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7939
7940         if (glob[0] == '!')
7941                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7942
7943         if (!param)
7944                 goto out_reg;
7945
7946         number = strsep(&param, ":");
7947
7948         if (!strlen(number))
7949                 goto out_reg;
7950
7951         /*
7952          * We use the callback data field (which is a pointer)
7953          * as our counter.
7954          */
7955         ret = kstrtoul(number, 0, (unsigned long *)&count);
7956         if (ret)
7957                 return ret;
7958
7959  out_reg:
7960         ret = tracing_alloc_snapshot_instance(tr);
7961         if (ret < 0)
7962                 goto out;
7963
7964         ret = register_ftrace_function_probe(glob, tr, ops, count);
7965
7966  out:
7967         return ret < 0 ? ret : 0;
7968 }
7969
7970 static struct ftrace_func_command ftrace_snapshot_cmd = {
7971         .name                   = "snapshot",
7972         .func                   = ftrace_trace_snapshot_callback,
7973 };
7974
7975 static __init int register_snapshot_cmd(void)
7976 {
7977         return register_ftrace_command(&ftrace_snapshot_cmd);
7978 }
7979 #else
7980 static inline __init int register_snapshot_cmd(void) { return 0; }
7981 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7982
7983 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7984 {
7985         if (WARN_ON(!tr->dir))
7986                 return ERR_PTR(-ENODEV);
7987
7988         /* Top directory uses NULL as the parent */
7989         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7990                 return NULL;
7991
7992         /* All sub buffers have a descriptor */
7993         return tr->dir;
7994 }
7995
7996 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7997 {
7998         struct dentry *d_tracer;
7999
8000         if (tr->percpu_dir)
8001                 return tr->percpu_dir;
8002
8003         d_tracer = tracing_get_dentry(tr);
8004         if (IS_ERR(d_tracer))
8005                 return NULL;
8006
8007         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8008
8009         MEM_FAIL(!tr->percpu_dir,
8010                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8011
8012         return tr->percpu_dir;
8013 }
8014
8015 static struct dentry *
8016 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8017                       void *data, long cpu, const struct file_operations *fops)
8018 {
8019         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8020
8021         if (ret) /* See tracing_get_cpu() */
8022                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8023         return ret;
8024 }
8025
8026 static void
8027 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8028 {
8029         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8030         struct dentry *d_cpu;
8031         char cpu_dir[30]; /* 30 characters should be more than enough */
8032
8033         if (!d_percpu)
8034                 return;
8035
8036         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8037         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8038         if (!d_cpu) {
8039                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8040                 return;
8041         }
8042
8043         /* per cpu trace_pipe */
8044         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8045                                 tr, cpu, &tracing_pipe_fops);
8046
8047         /* per cpu trace */
8048         trace_create_cpu_file("trace", 0644, d_cpu,
8049                                 tr, cpu, &tracing_fops);
8050
8051         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8052                                 tr, cpu, &tracing_buffers_fops);
8053
8054         trace_create_cpu_file("stats", 0444, d_cpu,
8055                                 tr, cpu, &tracing_stats_fops);
8056
8057         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8058                                 tr, cpu, &tracing_entries_fops);
8059
8060 #ifdef CONFIG_TRACER_SNAPSHOT
8061         trace_create_cpu_file("snapshot", 0644, d_cpu,
8062                                 tr, cpu, &snapshot_fops);
8063
8064         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8065                                 tr, cpu, &snapshot_raw_fops);
8066 #endif
8067 }
8068
8069 #ifdef CONFIG_FTRACE_SELFTEST
8070 /* Let selftest have access to static functions in this file */
8071 #include "trace_selftest.c"
8072 #endif
8073
8074 static ssize_t
8075 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8076                         loff_t *ppos)
8077 {
8078         struct trace_option_dentry *topt = filp->private_data;
8079         char *buf;
8080
8081         if (topt->flags->val & topt->opt->bit)
8082                 buf = "1\n";
8083         else
8084                 buf = "0\n";
8085
8086         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8087 }
8088
8089 static ssize_t
8090 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8091                          loff_t *ppos)
8092 {
8093         struct trace_option_dentry *topt = filp->private_data;
8094         unsigned long val;
8095         int ret;
8096
8097         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8098         if (ret)
8099                 return ret;
8100
8101         if (val != 0 && val != 1)
8102                 return -EINVAL;
8103
8104         if (!!(topt->flags->val & topt->opt->bit) != val) {
8105                 mutex_lock(&trace_types_lock);
8106                 ret = __set_tracer_option(topt->tr, topt->flags,
8107                                           topt->opt, !val);
8108                 mutex_unlock(&trace_types_lock);
8109                 if (ret)
8110                         return ret;
8111         }
8112
8113         *ppos += cnt;
8114
8115         return cnt;
8116 }
8117
8118
8119 static const struct file_operations trace_options_fops = {
8120         .open = tracing_open_generic,
8121         .read = trace_options_read,
8122         .write = trace_options_write,
8123         .llseek = generic_file_llseek,
8124 };
8125
8126 /*
8127  * In order to pass in both the trace_array descriptor as well as the index
8128  * to the flag that the trace option file represents, the trace_array
8129  * has a character array of trace_flags_index[], which holds the index
8130  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8131  * The address of this character array is passed to the flag option file
8132  * read/write callbacks.
8133  *
8134  * In order to extract both the index and the trace_array descriptor,
8135  * get_tr_index() uses the following algorithm.
8136  *
8137  *   idx = *ptr;
8138  *
8139  * As the pointer itself contains the address of the index (remember
8140  * index[1] == 1).
8141  *
8142  * Then to get the trace_array descriptor, by subtracting that index
8143  * from the ptr, we get to the start of the index itself.
8144  *
8145  *   ptr - idx == &index[0]
8146  *
8147  * Then a simple container_of() from that pointer gets us to the
8148  * trace_array descriptor.
8149  */
8150 static void get_tr_index(void *data, struct trace_array **ptr,
8151                          unsigned int *pindex)
8152 {
8153         *pindex = *(unsigned char *)data;
8154
8155         *ptr = container_of(data - *pindex, struct trace_array,
8156                             trace_flags_index);
8157 }
8158
8159 static ssize_t
8160 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8161                         loff_t *ppos)
8162 {
8163         void *tr_index = filp->private_data;
8164         struct trace_array *tr;
8165         unsigned int index;
8166         char *buf;
8167
8168         get_tr_index(tr_index, &tr, &index);
8169
8170         if (tr->trace_flags & (1 << index))
8171                 buf = "1\n";
8172         else
8173                 buf = "0\n";
8174
8175         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8176 }
8177
8178 static ssize_t
8179 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8180                          loff_t *ppos)
8181 {
8182         void *tr_index = filp->private_data;
8183         struct trace_array *tr;
8184         unsigned int index;
8185         unsigned long val;
8186         int ret;
8187
8188         get_tr_index(tr_index, &tr, &index);
8189
8190         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8191         if (ret)
8192                 return ret;
8193
8194         if (val != 0 && val != 1)
8195                 return -EINVAL;
8196
8197         mutex_lock(&event_mutex);
8198         mutex_lock(&trace_types_lock);
8199         ret = set_tracer_flag(tr, 1 << index, val);
8200         mutex_unlock(&trace_types_lock);
8201         mutex_unlock(&event_mutex);
8202
8203         if (ret < 0)
8204                 return ret;
8205
8206         *ppos += cnt;
8207
8208         return cnt;
8209 }
8210
8211 static const struct file_operations trace_options_core_fops = {
8212         .open = tracing_open_generic,
8213         .read = trace_options_core_read,
8214         .write = trace_options_core_write,
8215         .llseek = generic_file_llseek,
8216 };
8217
8218 struct dentry *trace_create_file(const char *name,
8219                                  umode_t mode,
8220                                  struct dentry *parent,
8221                                  void *data,
8222                                  const struct file_operations *fops)
8223 {
8224         struct dentry *ret;
8225
8226         ret = tracefs_create_file(name, mode, parent, data, fops);
8227         if (!ret)
8228                 pr_warn("Could not create tracefs '%s' entry\n", name);
8229
8230         return ret;
8231 }
8232
8233
8234 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8235 {
8236         struct dentry *d_tracer;
8237
8238         if (tr->options)
8239                 return tr->options;
8240
8241         d_tracer = tracing_get_dentry(tr);
8242         if (IS_ERR(d_tracer))
8243                 return NULL;
8244
8245         tr->options = tracefs_create_dir("options", d_tracer);
8246         if (!tr->options) {
8247                 pr_warn("Could not create tracefs directory 'options'\n");
8248                 return NULL;
8249         }
8250
8251         return tr->options;
8252 }
8253
8254 static void
8255 create_trace_option_file(struct trace_array *tr,
8256                          struct trace_option_dentry *topt,
8257                          struct tracer_flags *flags,
8258                          struct tracer_opt *opt)
8259 {
8260         struct dentry *t_options;
8261
8262         t_options = trace_options_init_dentry(tr);
8263         if (!t_options)
8264                 return;
8265
8266         topt->flags = flags;
8267         topt->opt = opt;
8268         topt->tr = tr;
8269
8270         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8271                                     &trace_options_fops);
8272
8273 }
8274
8275 static void
8276 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8277 {
8278         struct trace_option_dentry *topts;
8279         struct trace_options *tr_topts;
8280         struct tracer_flags *flags;
8281         struct tracer_opt *opts;
8282         int cnt;
8283         int i;
8284
8285         if (!tracer)
8286                 return;
8287
8288         flags = tracer->flags;
8289
8290         if (!flags || !flags->opts)
8291                 return;
8292
8293         /*
8294          * If this is an instance, only create flags for tracers
8295          * the instance may have.
8296          */
8297         if (!trace_ok_for_array(tracer, tr))
8298                 return;
8299
8300         for (i = 0; i < tr->nr_topts; i++) {
8301                 /* Make sure there's no duplicate flags. */
8302                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8303                         return;
8304         }
8305
8306         opts = flags->opts;
8307
8308         for (cnt = 0; opts[cnt].name; cnt++)
8309                 ;
8310
8311         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8312         if (!topts)
8313                 return;
8314
8315         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8316                             GFP_KERNEL);
8317         if (!tr_topts) {
8318                 kfree(topts);
8319                 return;
8320         }
8321
8322         tr->topts = tr_topts;
8323         tr->topts[tr->nr_topts].tracer = tracer;
8324         tr->topts[tr->nr_topts].topts = topts;
8325         tr->nr_topts++;
8326
8327         for (cnt = 0; opts[cnt].name; cnt++) {
8328                 create_trace_option_file(tr, &topts[cnt], flags,
8329                                          &opts[cnt]);
8330                 MEM_FAIL(topts[cnt].entry == NULL,
8331                           "Failed to create trace option: %s",
8332                           opts[cnt].name);
8333         }
8334 }
8335
8336 static struct dentry *
8337 create_trace_option_core_file(struct trace_array *tr,
8338                               const char *option, long index)
8339 {
8340         struct dentry *t_options;
8341
8342         t_options = trace_options_init_dentry(tr);
8343         if (!t_options)
8344                 return NULL;
8345
8346         return trace_create_file(option, 0644, t_options,
8347                                  (void *)&tr->trace_flags_index[index],
8348                                  &trace_options_core_fops);
8349 }
8350
8351 static void create_trace_options_dir(struct trace_array *tr)
8352 {
8353         struct dentry *t_options;
8354         bool top_level = tr == &global_trace;
8355         int i;
8356
8357         t_options = trace_options_init_dentry(tr);
8358         if (!t_options)
8359                 return;
8360
8361         for (i = 0; trace_options[i]; i++) {
8362                 if (top_level ||
8363                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8364                         create_trace_option_core_file(tr, trace_options[i], i);
8365         }
8366 }
8367
8368 static ssize_t
8369 rb_simple_read(struct file *filp, char __user *ubuf,
8370                size_t cnt, loff_t *ppos)
8371 {
8372         struct trace_array *tr = filp->private_data;
8373         char buf[64];
8374         int r;
8375
8376         r = tracer_tracing_is_on(tr);
8377         r = sprintf(buf, "%d\n", r);
8378
8379         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8380 }
8381
8382 static ssize_t
8383 rb_simple_write(struct file *filp, const char __user *ubuf,
8384                 size_t cnt, loff_t *ppos)
8385 {
8386         struct trace_array *tr = filp->private_data;
8387         struct trace_buffer *buffer = tr->array_buffer.buffer;
8388         unsigned long val;
8389         int ret;
8390
8391         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8392         if (ret)
8393                 return ret;
8394
8395         if (buffer) {
8396                 mutex_lock(&trace_types_lock);
8397                 if (!!val == tracer_tracing_is_on(tr)) {
8398                         val = 0; /* do nothing */
8399                 } else if (val) {
8400                         tracer_tracing_on(tr);
8401                         if (tr->current_trace->start)
8402                                 tr->current_trace->start(tr);
8403                 } else {
8404                         tracer_tracing_off(tr);
8405                         if (tr->current_trace->stop)
8406                                 tr->current_trace->stop(tr);
8407                 }
8408                 mutex_unlock(&trace_types_lock);
8409         }
8410
8411         (*ppos)++;
8412
8413         return cnt;
8414 }
8415
8416 static const struct file_operations rb_simple_fops = {
8417         .open           = tracing_open_generic_tr,
8418         .read           = rb_simple_read,
8419         .write          = rb_simple_write,
8420         .release        = tracing_release_generic_tr,
8421         .llseek         = default_llseek,
8422 };
8423
8424 static ssize_t
8425 buffer_percent_read(struct file *filp, char __user *ubuf,
8426                     size_t cnt, loff_t *ppos)
8427 {
8428         struct trace_array *tr = filp->private_data;
8429         char buf[64];
8430         int r;
8431
8432         r = tr->buffer_percent;
8433         r = sprintf(buf, "%d\n", r);
8434
8435         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8436 }
8437
8438 static ssize_t
8439 buffer_percent_write(struct file *filp, const char __user *ubuf,
8440                      size_t cnt, loff_t *ppos)
8441 {
8442         struct trace_array *tr = filp->private_data;
8443         unsigned long val;
8444         int ret;
8445
8446         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8447         if (ret)
8448                 return ret;
8449
8450         if (val > 100)
8451                 return -EINVAL;
8452
8453         if (!val)
8454                 val = 1;
8455
8456         tr->buffer_percent = val;
8457
8458         (*ppos)++;
8459
8460         return cnt;
8461 }
8462
8463 static const struct file_operations buffer_percent_fops = {
8464         .open           = tracing_open_generic_tr,
8465         .read           = buffer_percent_read,
8466         .write          = buffer_percent_write,
8467         .release        = tracing_release_generic_tr,
8468         .llseek         = default_llseek,
8469 };
8470
8471 static struct dentry *trace_instance_dir;
8472
8473 static void
8474 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8475
8476 static int
8477 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8478 {
8479         enum ring_buffer_flags rb_flags;
8480
8481         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8482
8483         buf->tr = tr;
8484
8485         buf->buffer = ring_buffer_alloc(size, rb_flags);
8486         if (!buf->buffer)
8487                 return -ENOMEM;
8488
8489         buf->data = alloc_percpu(struct trace_array_cpu);
8490         if (!buf->data) {
8491                 ring_buffer_free(buf->buffer);
8492                 buf->buffer = NULL;
8493                 return -ENOMEM;
8494         }
8495
8496         /* Allocate the first page for all buffers */
8497         set_buffer_entries(&tr->array_buffer,
8498                            ring_buffer_size(tr->array_buffer.buffer, 0));
8499
8500         return 0;
8501 }
8502
8503 static int allocate_trace_buffers(struct trace_array *tr, int size)
8504 {
8505         int ret;
8506
8507         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8508         if (ret)
8509                 return ret;
8510
8511 #ifdef CONFIG_TRACER_MAX_TRACE
8512         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8513                                     allocate_snapshot ? size : 1);
8514         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8515                 ring_buffer_free(tr->array_buffer.buffer);
8516                 tr->array_buffer.buffer = NULL;
8517                 free_percpu(tr->array_buffer.data);
8518                 tr->array_buffer.data = NULL;
8519                 return -ENOMEM;
8520         }
8521         tr->allocated_snapshot = allocate_snapshot;
8522
8523         /*
8524          * Only the top level trace array gets its snapshot allocated
8525          * from the kernel command line.
8526          */
8527         allocate_snapshot = false;
8528 #endif
8529
8530         /*
8531          * Because of some magic with the way alloc_percpu() works on
8532          * x86_64, we need to synchronize the pgd of all the tables,
8533          * otherwise the trace events that happen in x86_64 page fault
8534          * handlers can't cope with accessing the chance that a
8535          * alloc_percpu()'d memory might be touched in the page fault trace
8536          * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
8537          * calls in tracing, because something might get triggered within a
8538          * page fault trace event!
8539          */
8540         vmalloc_sync_mappings();
8541
8542         return 0;
8543 }
8544
8545 static void free_trace_buffer(struct array_buffer *buf)
8546 {
8547         if (buf->buffer) {
8548                 ring_buffer_free(buf->buffer);
8549                 buf->buffer = NULL;
8550                 free_percpu(buf->data);
8551                 buf->data = NULL;
8552         }
8553 }
8554
8555 static void free_trace_buffers(struct trace_array *tr)
8556 {
8557         if (!tr)
8558                 return;
8559
8560         free_trace_buffer(&tr->array_buffer);
8561
8562 #ifdef CONFIG_TRACER_MAX_TRACE
8563         free_trace_buffer(&tr->max_buffer);
8564 #endif
8565 }
8566
8567 static void init_trace_flags_index(struct trace_array *tr)
8568 {
8569         int i;
8570
8571         /* Used by the trace options files */
8572         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8573                 tr->trace_flags_index[i] = i;
8574 }
8575
8576 static void __update_tracer_options(struct trace_array *tr)
8577 {
8578         struct tracer *t;
8579
8580         for (t = trace_types; t; t = t->next)
8581                 add_tracer_options(tr, t);
8582 }
8583
8584 static void update_tracer_options(struct trace_array *tr)
8585 {
8586         mutex_lock(&trace_types_lock);
8587         __update_tracer_options(tr);
8588         mutex_unlock(&trace_types_lock);
8589 }
8590
8591 /* Must have trace_types_lock held */
8592 struct trace_array *trace_array_find(const char *instance)
8593 {
8594         struct trace_array *tr, *found = NULL;
8595
8596         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8597                 if (tr->name && strcmp(tr->name, instance) == 0) {
8598                         found = tr;
8599                         break;
8600                 }
8601         }
8602
8603         return found;
8604 }
8605
8606 struct trace_array *trace_array_find_get(const char *instance)
8607 {
8608         struct trace_array *tr;
8609
8610         mutex_lock(&trace_types_lock);
8611         tr = trace_array_find(instance);
8612         if (tr)
8613                 tr->ref++;
8614         mutex_unlock(&trace_types_lock);
8615
8616         return tr;
8617 }
8618
8619 static struct trace_array *trace_array_create(const char *name)
8620 {
8621         struct trace_array *tr;
8622         int ret;
8623
8624         ret = -ENOMEM;
8625         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8626         if (!tr)
8627                 return ERR_PTR(ret);
8628
8629         tr->name = kstrdup(name, GFP_KERNEL);
8630         if (!tr->name)
8631                 goto out_free_tr;
8632
8633         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8634                 goto out_free_tr;
8635
8636         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8637
8638         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8639
8640         raw_spin_lock_init(&tr->start_lock);
8641
8642         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8643
8644         tr->current_trace = &nop_trace;
8645
8646         INIT_LIST_HEAD(&tr->systems);
8647         INIT_LIST_HEAD(&tr->events);
8648         INIT_LIST_HEAD(&tr->hist_vars);
8649         INIT_LIST_HEAD(&tr->err_log);
8650
8651         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8652                 goto out_free_tr;
8653
8654         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8655         if (!tr->dir)
8656                 goto out_free_tr;
8657
8658         ret = event_trace_add_tracer(tr->dir, tr);
8659         if (ret) {
8660                 tracefs_remove(tr->dir);
8661                 goto out_free_tr;
8662         }
8663
8664         ftrace_init_trace_array(tr);
8665
8666         init_tracer_tracefs(tr, tr->dir);
8667         init_trace_flags_index(tr);
8668         __update_tracer_options(tr);
8669
8670         list_add(&tr->list, &ftrace_trace_arrays);
8671
8672         tr->ref++;
8673
8674
8675         return tr;
8676
8677  out_free_tr:
8678         free_trace_buffers(tr);
8679         free_cpumask_var(tr->tracing_cpumask);
8680         kfree(tr->name);
8681         kfree(tr);
8682
8683         return ERR_PTR(ret);
8684 }
8685
8686 static int instance_mkdir(const char *name)
8687 {
8688         struct trace_array *tr;
8689         int ret;
8690
8691         mutex_lock(&event_mutex);
8692         mutex_lock(&trace_types_lock);
8693
8694         ret = -EEXIST;
8695         if (trace_array_find(name))
8696                 goto out_unlock;
8697
8698         tr = trace_array_create(name);
8699
8700         ret = PTR_ERR_OR_ZERO(tr);
8701
8702 out_unlock:
8703         mutex_unlock(&trace_types_lock);
8704         mutex_unlock(&event_mutex);
8705         return ret;
8706 }
8707
8708 /**
8709  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8710  * @name: The name of the trace array to be looked up/created.
8711  *
8712  * Returns pointer to trace array with given name.
8713  * NULL, if it cannot be created.
8714  *
8715  * NOTE: This function increments the reference counter associated with the
8716  * trace array returned. This makes sure it cannot be freed while in use.
8717  * Use trace_array_put() once the trace array is no longer needed.
8718  * If the trace_array is to be freed, trace_array_destroy() needs to
8719  * be called after the trace_array_put(), or simply let user space delete
8720  * it from the tracefs instances directory. But until the
8721  * trace_array_put() is called, user space can not delete it.
8722  *
8723  */
8724 struct trace_array *trace_array_get_by_name(const char *name)
8725 {
8726         struct trace_array *tr;
8727
8728         mutex_lock(&event_mutex);
8729         mutex_lock(&trace_types_lock);
8730
8731         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8732                 if (tr->name && strcmp(tr->name, name) == 0)
8733                         goto out_unlock;
8734         }
8735
8736         tr = trace_array_create(name);
8737
8738         if (IS_ERR(tr))
8739                 tr = NULL;
8740 out_unlock:
8741         if (tr)
8742                 tr->ref++;
8743
8744         mutex_unlock(&trace_types_lock);
8745         mutex_unlock(&event_mutex);
8746         return tr;
8747 }
8748 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8749
8750 static int __remove_instance(struct trace_array *tr)
8751 {
8752         int i;
8753
8754         /* Reference counter for a newly created trace array = 1. */
8755         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8756                 return -EBUSY;
8757
8758         list_del(&tr->list);
8759
8760         /* Disable all the flags that were enabled coming in */
8761         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8762                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8763                         set_tracer_flag(tr, 1 << i, 0);
8764         }
8765
8766         tracing_set_nop(tr);
8767         clear_ftrace_function_probes(tr);
8768         event_trace_del_tracer(tr);
8769         ftrace_clear_pids(tr);
8770         ftrace_destroy_function_files(tr);
8771         tracefs_remove(tr->dir);
8772         free_trace_buffers(tr);
8773
8774         for (i = 0; i < tr->nr_topts; i++) {
8775                 kfree(tr->topts[i].topts);
8776         }
8777         kfree(tr->topts);
8778
8779         free_cpumask_var(tr->tracing_cpumask);
8780         kfree(tr->name);
8781         kfree(tr);
8782         tr = NULL;
8783
8784         return 0;
8785 }
8786
8787 int trace_array_destroy(struct trace_array *this_tr)
8788 {
8789         struct trace_array *tr;
8790         int ret;
8791
8792         if (!this_tr)
8793                 return -EINVAL;
8794
8795         mutex_lock(&event_mutex);
8796         mutex_lock(&trace_types_lock);
8797
8798         ret = -ENODEV;
8799
8800         /* Making sure trace array exists before destroying it. */
8801         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8802                 if (tr == this_tr) {
8803                         ret = __remove_instance(tr);
8804                         break;
8805                 }
8806         }
8807
8808         mutex_unlock(&trace_types_lock);
8809         mutex_unlock(&event_mutex);
8810
8811         return ret;
8812 }
8813 EXPORT_SYMBOL_GPL(trace_array_destroy);
8814
8815 static int instance_rmdir(const char *name)
8816 {
8817         struct trace_array *tr;
8818         int ret;
8819
8820         mutex_lock(&event_mutex);
8821         mutex_lock(&trace_types_lock);
8822
8823         ret = -ENODEV;
8824         tr = trace_array_find(name);
8825         if (tr)
8826                 ret = __remove_instance(tr);
8827
8828         mutex_unlock(&trace_types_lock);
8829         mutex_unlock(&event_mutex);
8830
8831         return ret;
8832 }
8833
8834 static __init void create_trace_instances(struct dentry *d_tracer)
8835 {
8836         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8837                                                          instance_mkdir,
8838                                                          instance_rmdir);
8839         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8840                 return;
8841 }
8842
8843 static void
8844 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8845 {
8846         struct trace_event_file *file;
8847         int cpu;
8848
8849         trace_create_file("available_tracers", 0444, d_tracer,
8850                         tr, &show_traces_fops);
8851
8852         trace_create_file("current_tracer", 0644, d_tracer,
8853                         tr, &set_tracer_fops);
8854
8855         trace_create_file("tracing_cpumask", 0644, d_tracer,
8856                           tr, &tracing_cpumask_fops);
8857
8858         trace_create_file("trace_options", 0644, d_tracer,
8859                           tr, &tracing_iter_fops);
8860
8861         trace_create_file("trace", 0644, d_tracer,
8862                           tr, &tracing_fops);
8863
8864         trace_create_file("trace_pipe", 0444, d_tracer,
8865                           tr, &tracing_pipe_fops);
8866
8867         trace_create_file("buffer_size_kb", 0644, d_tracer,
8868                           tr, &tracing_entries_fops);
8869
8870         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8871                           tr, &tracing_total_entries_fops);
8872
8873         trace_create_file("free_buffer", 0200, d_tracer,
8874                           tr, &tracing_free_buffer_fops);
8875
8876         trace_create_file("trace_marker", 0220, d_tracer,
8877                           tr, &tracing_mark_fops);
8878
8879         file = __find_event_file(tr, "ftrace", "print");
8880         if (file && file->dir)
8881                 trace_create_file("trigger", 0644, file->dir, file,
8882                                   &event_trigger_fops);
8883         tr->trace_marker_file = file;
8884
8885         trace_create_file("trace_marker_raw", 0220, d_tracer,
8886                           tr, &tracing_mark_raw_fops);
8887
8888         trace_create_file("trace_clock", 0644, d_tracer, tr,
8889                           &trace_clock_fops);
8890
8891         trace_create_file("tracing_on", 0644, d_tracer,
8892                           tr, &rb_simple_fops);
8893
8894         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8895                           &trace_time_stamp_mode_fops);
8896
8897         tr->buffer_percent = 50;
8898
8899         trace_create_file("buffer_percent", 0444, d_tracer,
8900                         tr, &buffer_percent_fops);
8901
8902         create_trace_options_dir(tr);
8903
8904 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8905         trace_create_maxlat_file(tr, d_tracer);
8906 #endif
8907
8908         if (ftrace_create_function_files(tr, d_tracer))
8909                 MEM_FAIL(1, "Could not allocate function filter files");
8910
8911 #ifdef CONFIG_TRACER_SNAPSHOT
8912         trace_create_file("snapshot", 0644, d_tracer,
8913                           tr, &snapshot_fops);
8914 #endif
8915
8916         trace_create_file("error_log", 0644, d_tracer,
8917                           tr, &tracing_err_log_fops);
8918
8919         for_each_tracing_cpu(cpu)
8920                 tracing_init_tracefs_percpu(tr, cpu);
8921
8922         ftrace_init_tracefs(tr, d_tracer);
8923 }
8924
8925 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8926 {
8927         struct vfsmount *mnt;
8928         struct file_system_type *type;
8929
8930         /*
8931          * To maintain backward compatibility for tools that mount
8932          * debugfs to get to the tracing facility, tracefs is automatically
8933          * mounted to the debugfs/tracing directory.
8934          */
8935         type = get_fs_type("tracefs");
8936         if (!type)
8937                 return NULL;
8938         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8939         put_filesystem(type);
8940         if (IS_ERR(mnt))
8941                 return NULL;
8942         mntget(mnt);
8943
8944         return mnt;
8945 }
8946
8947 /**
8948  * tracing_init_dentry - initialize top level trace array
8949  *
8950  * This is called when creating files or directories in the tracing
8951  * directory. It is called via fs_initcall() by any of the boot up code
8952  * and expects to return the dentry of the top level tracing directory.
8953  */
8954 struct dentry *tracing_init_dentry(void)
8955 {
8956         struct trace_array *tr = &global_trace;
8957
8958         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8959                 pr_warn("Tracing disabled due to lockdown\n");
8960                 return ERR_PTR(-EPERM);
8961         }
8962
8963         /* The top level trace array uses  NULL as parent */
8964         if (tr->dir)
8965                 return NULL;
8966
8967         if (WARN_ON(!tracefs_initialized()) ||
8968                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8969                  WARN_ON(!debugfs_initialized())))
8970                 return ERR_PTR(-ENODEV);
8971
8972         /*
8973          * As there may still be users that expect the tracing
8974          * files to exist in debugfs/tracing, we must automount
8975          * the tracefs file system there, so older tools still
8976          * work with the newer kerenl.
8977          */
8978         tr->dir = debugfs_create_automount("tracing", NULL,
8979                                            trace_automount, NULL);
8980
8981         return NULL;
8982 }
8983
8984 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8985 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8986
8987 static void __init trace_eval_init(void)
8988 {
8989         int len;
8990
8991         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8992         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8993 }
8994
8995 #ifdef CONFIG_MODULES
8996 static void trace_module_add_evals(struct module *mod)
8997 {
8998         if (!mod->num_trace_evals)
8999                 return;
9000
9001         /*
9002          * Modules with bad taint do not have events created, do
9003          * not bother with enums either.
9004          */
9005         if (trace_module_has_bad_taint(mod))
9006                 return;
9007
9008         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9009 }
9010
9011 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9012 static void trace_module_remove_evals(struct module *mod)
9013 {
9014         union trace_eval_map_item *map;
9015         union trace_eval_map_item **last = &trace_eval_maps;
9016
9017         if (!mod->num_trace_evals)
9018                 return;
9019
9020         mutex_lock(&trace_eval_mutex);
9021
9022         map = trace_eval_maps;
9023
9024         while (map) {
9025                 if (map->head.mod == mod)
9026                         break;
9027                 map = trace_eval_jmp_to_tail(map);
9028                 last = &map->tail.next;
9029                 map = map->tail.next;
9030         }
9031         if (!map)
9032                 goto out;
9033
9034         *last = trace_eval_jmp_to_tail(map)->tail.next;
9035         kfree(map);
9036  out:
9037         mutex_unlock(&trace_eval_mutex);
9038 }
9039 #else
9040 static inline void trace_module_remove_evals(struct module *mod) { }
9041 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9042
9043 static int trace_module_notify(struct notifier_block *self,
9044                                unsigned long val, void *data)
9045 {
9046         struct module *mod = data;
9047
9048         switch (val) {
9049         case MODULE_STATE_COMING:
9050                 trace_module_add_evals(mod);
9051                 break;
9052         case MODULE_STATE_GOING:
9053                 trace_module_remove_evals(mod);
9054                 break;
9055         }
9056
9057         return 0;
9058 }
9059
9060 static struct notifier_block trace_module_nb = {
9061         .notifier_call = trace_module_notify,
9062         .priority = 0,
9063 };
9064 #endif /* CONFIG_MODULES */
9065
9066 static __init int tracer_init_tracefs(void)
9067 {
9068         struct dentry *d_tracer;
9069
9070         trace_access_lock_init();
9071
9072         d_tracer = tracing_init_dentry();
9073         if (IS_ERR(d_tracer))
9074                 return 0;
9075
9076         event_trace_init();
9077
9078         init_tracer_tracefs(&global_trace, d_tracer);
9079         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9080
9081         trace_create_file("tracing_thresh", 0644, d_tracer,
9082                         &global_trace, &tracing_thresh_fops);
9083
9084         trace_create_file("README", 0444, d_tracer,
9085                         NULL, &tracing_readme_fops);
9086
9087         trace_create_file("saved_cmdlines", 0444, d_tracer,
9088                         NULL, &tracing_saved_cmdlines_fops);
9089
9090         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9091                           NULL, &tracing_saved_cmdlines_size_fops);
9092
9093         trace_create_file("saved_tgids", 0444, d_tracer,
9094                         NULL, &tracing_saved_tgids_fops);
9095
9096         trace_eval_init();
9097
9098         trace_create_eval_file(d_tracer);
9099
9100 #ifdef CONFIG_MODULES
9101         register_module_notifier(&trace_module_nb);
9102 #endif
9103
9104 #ifdef CONFIG_DYNAMIC_FTRACE
9105         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9106                         NULL, &tracing_dyn_info_fops);
9107 #endif
9108
9109         create_trace_instances(d_tracer);
9110
9111         update_tracer_options(&global_trace);
9112
9113         return 0;
9114 }
9115
9116 static int trace_panic_handler(struct notifier_block *this,
9117                                unsigned long event, void *unused)
9118 {
9119         if (ftrace_dump_on_oops)
9120                 ftrace_dump(ftrace_dump_on_oops);
9121         return NOTIFY_OK;
9122 }
9123
9124 static struct notifier_block trace_panic_notifier = {
9125         .notifier_call  = trace_panic_handler,
9126         .next           = NULL,
9127         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9128 };
9129
9130 static int trace_die_handler(struct notifier_block *self,
9131                              unsigned long val,
9132                              void *data)
9133 {
9134         switch (val) {
9135         case DIE_OOPS:
9136                 if (ftrace_dump_on_oops)
9137                         ftrace_dump(ftrace_dump_on_oops);
9138                 break;
9139         default:
9140                 break;
9141         }
9142         return NOTIFY_OK;
9143 }
9144
9145 static struct notifier_block trace_die_notifier = {
9146         .notifier_call = trace_die_handler,
9147         .priority = 200
9148 };
9149
9150 /*
9151  * printk is set to max of 1024, we really don't need it that big.
9152  * Nothing should be printing 1000 characters anyway.
9153  */
9154 #define TRACE_MAX_PRINT         1000
9155
9156 /*
9157  * Define here KERN_TRACE so that we have one place to modify
9158  * it if we decide to change what log level the ftrace dump
9159  * should be at.
9160  */
9161 #define KERN_TRACE              KERN_EMERG
9162
9163 void
9164 trace_printk_seq(struct trace_seq *s)
9165 {
9166         /* Probably should print a warning here. */
9167         if (s->seq.len >= TRACE_MAX_PRINT)
9168                 s->seq.len = TRACE_MAX_PRINT;
9169
9170         /*
9171          * More paranoid code. Although the buffer size is set to
9172          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9173          * an extra layer of protection.
9174          */
9175         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9176                 s->seq.len = s->seq.size - 1;
9177
9178         /* should be zero ended, but we are paranoid. */
9179         s->buffer[s->seq.len] = 0;
9180
9181         printk(KERN_TRACE "%s", s->buffer);
9182
9183         trace_seq_init(s);
9184 }
9185
9186 void trace_init_global_iter(struct trace_iterator *iter)
9187 {
9188         iter->tr = &global_trace;
9189         iter->trace = iter->tr->current_trace;
9190         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9191         iter->array_buffer = &global_trace.array_buffer;
9192
9193         if (iter->trace && iter->trace->open)
9194                 iter->trace->open(iter);
9195
9196         /* Annotate start of buffers if we had overruns */
9197         if (ring_buffer_overruns(iter->array_buffer->buffer))
9198                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9199
9200         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9201         if (trace_clocks[iter->tr->clock_id].in_ns)
9202                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9203 }
9204
9205 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9206 {
9207         /* use static because iter can be a bit big for the stack */
9208         static struct trace_iterator iter;
9209         static atomic_t dump_running;
9210         struct trace_array *tr = &global_trace;
9211         unsigned int old_userobj;
9212         unsigned long flags;
9213         int cnt = 0, cpu;
9214
9215         /* Only allow one dump user at a time. */
9216         if (atomic_inc_return(&dump_running) != 1) {
9217                 atomic_dec(&dump_running);
9218                 return;
9219         }
9220
9221         /*
9222          * Always turn off tracing when we dump.
9223          * We don't need to show trace output of what happens
9224          * between multiple crashes.
9225          *
9226          * If the user does a sysrq-z, then they can re-enable
9227          * tracing with echo 1 > tracing_on.
9228          */
9229         tracing_off();
9230
9231         local_irq_save(flags);
9232         printk_nmi_direct_enter();
9233
9234         /* Simulate the iterator */
9235         trace_init_global_iter(&iter);
9236         /* Can not use kmalloc for iter.temp */
9237         iter.temp = static_temp_buf;
9238         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9239
9240         for_each_tracing_cpu(cpu) {
9241                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9242         }
9243
9244         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9245
9246         /* don't look at user memory in panic mode */
9247         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9248
9249         switch (oops_dump_mode) {
9250         case DUMP_ALL:
9251                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9252                 break;
9253         case DUMP_ORIG:
9254                 iter.cpu_file = raw_smp_processor_id();
9255                 break;
9256         case DUMP_NONE:
9257                 goto out_enable;
9258         default:
9259                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9260                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9261         }
9262
9263         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9264
9265         /* Did function tracer already get disabled? */
9266         if (ftrace_is_dead()) {
9267                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9268                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9269         }
9270
9271         /*
9272          * We need to stop all tracing on all CPUS to read the
9273          * the next buffer. This is a bit expensive, but is
9274          * not done often. We fill all what we can read,
9275          * and then release the locks again.
9276          */
9277
9278         while (!trace_empty(&iter)) {
9279
9280                 if (!cnt)
9281                         printk(KERN_TRACE "---------------------------------\n");
9282
9283                 cnt++;
9284
9285                 trace_iterator_reset(&iter);
9286                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9287
9288                 if (trace_find_next_entry_inc(&iter) != NULL) {
9289                         int ret;
9290
9291                         ret = print_trace_line(&iter);
9292                         if (ret != TRACE_TYPE_NO_CONSUME)
9293                                 trace_consume(&iter);
9294                 }
9295                 touch_nmi_watchdog();
9296
9297                 trace_printk_seq(&iter.seq);
9298         }
9299
9300         if (!cnt)
9301                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9302         else
9303                 printk(KERN_TRACE "---------------------------------\n");
9304
9305  out_enable:
9306         tr->trace_flags |= old_userobj;
9307
9308         for_each_tracing_cpu(cpu) {
9309                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9310         }
9311         atomic_dec(&dump_running);
9312         printk_nmi_direct_exit();
9313         local_irq_restore(flags);
9314 }
9315 EXPORT_SYMBOL_GPL(ftrace_dump);
9316
9317 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9318 {
9319         char **argv;
9320         int argc, ret;
9321
9322         argc = 0;
9323         ret = 0;
9324         argv = argv_split(GFP_KERNEL, buf, &argc);
9325         if (!argv)
9326                 return -ENOMEM;
9327
9328         if (argc)
9329                 ret = createfn(argc, argv);
9330
9331         argv_free(argv);
9332
9333         return ret;
9334 }
9335
9336 #define WRITE_BUFSIZE  4096
9337
9338 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9339                                 size_t count, loff_t *ppos,
9340                                 int (*createfn)(int, char **))
9341 {
9342         char *kbuf, *buf, *tmp;
9343         int ret = 0;
9344         size_t done = 0;
9345         size_t size;
9346
9347         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9348         if (!kbuf)
9349                 return -ENOMEM;
9350
9351         while (done < count) {
9352                 size = count - done;
9353
9354                 if (size >= WRITE_BUFSIZE)
9355                         size = WRITE_BUFSIZE - 1;
9356
9357                 if (copy_from_user(kbuf, buffer + done, size)) {
9358                         ret = -EFAULT;
9359                         goto out;
9360                 }
9361                 kbuf[size] = '\0';
9362                 buf = kbuf;
9363                 do {
9364                         tmp = strchr(buf, '\n');
9365                         if (tmp) {
9366                                 *tmp = '\0';
9367                                 size = tmp - buf + 1;
9368                         } else {
9369                                 size = strlen(buf);
9370                                 if (done + size < count) {
9371                                         if (buf != kbuf)
9372                                                 break;
9373                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9374                                         pr_warn("Line length is too long: Should be less than %d\n",
9375                                                 WRITE_BUFSIZE - 2);
9376                                         ret = -EINVAL;
9377                                         goto out;
9378                                 }
9379                         }
9380                         done += size;
9381
9382                         /* Remove comments */
9383                         tmp = strchr(buf, '#');
9384
9385                         if (tmp)
9386                                 *tmp = '\0';
9387
9388                         ret = trace_run_command(buf, createfn);
9389                         if (ret)
9390                                 goto out;
9391                         buf += size;
9392
9393                 } while (done < count);
9394         }
9395         ret = done;
9396
9397 out:
9398         kfree(kbuf);
9399
9400         return ret;
9401 }
9402
9403 __init static int tracer_alloc_buffers(void)
9404 {
9405         int ring_buf_size;
9406         int ret = -ENOMEM;
9407
9408
9409         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9410                 pr_warn("Tracing disabled due to lockdown\n");
9411                 return -EPERM;
9412         }
9413
9414         /*
9415          * Make sure we don't accidently add more trace options
9416          * than we have bits for.
9417          */
9418         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9419
9420         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9421                 goto out;
9422
9423         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9424                 goto out_free_buffer_mask;
9425
9426         /* Only allocate trace_printk buffers if a trace_printk exists */
9427         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9428                 /* Must be called before global_trace.buffer is allocated */
9429                 trace_printk_init_buffers();
9430
9431         /* To save memory, keep the ring buffer size to its minimum */
9432         if (ring_buffer_expanded)
9433                 ring_buf_size = trace_buf_size;
9434         else
9435                 ring_buf_size = 1;
9436
9437         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9438         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9439
9440         raw_spin_lock_init(&global_trace.start_lock);
9441
9442         /*
9443          * The prepare callbacks allocates some memory for the ring buffer. We
9444          * don't free the buffer if the if the CPU goes down. If we were to free
9445          * the buffer, then the user would lose any trace that was in the
9446          * buffer. The memory will be removed once the "instance" is removed.
9447          */
9448         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9449                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9450                                       NULL);
9451         if (ret < 0)
9452                 goto out_free_cpumask;
9453         /* Used for event triggers */
9454         ret = -ENOMEM;
9455         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9456         if (!temp_buffer)
9457                 goto out_rm_hp_state;
9458
9459         if (trace_create_savedcmd() < 0)
9460                 goto out_free_temp_buffer;
9461
9462         /* TODO: make the number of buffers hot pluggable with CPUS */
9463         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9464                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9465                 goto out_free_savedcmd;
9466         }
9467
9468         if (global_trace.buffer_disabled)
9469                 tracing_off();
9470
9471         if (trace_boot_clock) {
9472                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9473                 if (ret < 0)
9474                         pr_warn("Trace clock %s not defined, going back to default\n",
9475                                 trace_boot_clock);
9476         }
9477
9478         /*
9479          * register_tracer() might reference current_trace, so it
9480          * needs to be set before we register anything. This is
9481          * just a bootstrap of current_trace anyway.
9482          */
9483         global_trace.current_trace = &nop_trace;
9484
9485         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9486
9487         ftrace_init_global_array_ops(&global_trace);
9488
9489         init_trace_flags_index(&global_trace);
9490
9491         register_tracer(&nop_trace);
9492
9493         /* Function tracing may start here (via kernel command line) */
9494         init_function_trace();
9495
9496         /* All seems OK, enable tracing */
9497         tracing_disabled = 0;
9498
9499         atomic_notifier_chain_register(&panic_notifier_list,
9500                                        &trace_panic_notifier);
9501
9502         register_die_notifier(&trace_die_notifier);
9503
9504         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9505
9506         INIT_LIST_HEAD(&global_trace.systems);
9507         INIT_LIST_HEAD(&global_trace.events);
9508         INIT_LIST_HEAD(&global_trace.hist_vars);
9509         INIT_LIST_HEAD(&global_trace.err_log);
9510         list_add(&global_trace.list, &ftrace_trace_arrays);
9511
9512         apply_trace_boot_options();
9513
9514         register_snapshot_cmd();
9515
9516         return 0;
9517
9518 out_free_savedcmd:
9519         free_saved_cmdlines_buffer(savedcmd);
9520 out_free_temp_buffer:
9521         ring_buffer_free(temp_buffer);
9522 out_rm_hp_state:
9523         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9524 out_free_cpumask:
9525         free_cpumask_var(global_trace.tracing_cpumask);
9526 out_free_buffer_mask:
9527         free_cpumask_var(tracing_buffer_mask);
9528 out:
9529         return ret;
9530 }
9531
9532 void __init early_trace_init(void)
9533 {
9534         if (tracepoint_printk) {
9535                 tracepoint_print_iter =
9536                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9537                 if (MEM_FAIL(!tracepoint_print_iter,
9538                              "Failed to allocate trace iterator\n"))
9539                         tracepoint_printk = 0;
9540                 else
9541                         static_key_enable(&tracepoint_printk_key.key);
9542         }
9543         tracer_alloc_buffers();
9544 }
9545
9546 void __init trace_init(void)
9547 {
9548         trace_event_init();
9549 }
9550
9551 __init static int clear_boot_tracer(void)
9552 {
9553         /*
9554          * The default tracer at boot buffer is an init section.
9555          * This function is called in lateinit. If we did not
9556          * find the boot tracer, then clear it out, to prevent
9557          * later registration from accessing the buffer that is
9558          * about to be freed.
9559          */
9560         if (!default_bootup_tracer)
9561                 return 0;
9562
9563         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9564                default_bootup_tracer);
9565         default_bootup_tracer = NULL;
9566
9567         return 0;
9568 }
9569
9570 fs_initcall(tracer_init_tracefs);
9571 late_initcall_sync(clear_boot_tracer);
9572
9573 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9574 __init static int tracing_set_default_clock(void)
9575 {
9576         /* sched_clock_stable() is determined in late_initcall */
9577         if (!trace_boot_clock && !sched_clock_stable()) {
9578                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9579                         pr_warn("Can not set tracing clock due to lockdown\n");
9580                         return -EPERM;
9581                 }
9582
9583                 printk(KERN_WARNING
9584                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9585                        "If you want to keep using the local clock, then add:\n"
9586                        "  \"trace_clock=local\"\n"
9587                        "on the kernel command line\n");
9588                 tracing_set_clock(&global_trace, "global");
9589         }
9590
9591         return 0;
9592 }
9593 late_initcall_sync(tracing_set_default_clock);
9594 #endif