vfs: do bulk POLL* -> EPOLL* replacement
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 ret = -EINVAL;
534                 if (kstrtoul(parser.buffer, 0, &val))
535                         break;
536                 if (val >= pid_list->pid_max)
537                         break;
538
539                 pid = (pid_t)val;
540
541                 set_bit(pid, pid_list->pids);
542                 nr_pids++;
543
544                 trace_parser_clear(&parser);
545                 ret = 0;
546         }
547         trace_parser_put(&parser);
548
549         if (ret < 0) {
550                 trace_free_pid_list(pid_list);
551                 return ret;
552         }
553
554         if (!nr_pids) {
555                 /* Cleared the list of pids */
556                 trace_free_pid_list(pid_list);
557                 read = ret;
558                 pid_list = NULL;
559         }
560
561         *new_pid_list = pid_list;
562
563         return read;
564 }
565
566 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
567 {
568         u64 ts;
569
570         /* Early boot up does not have a buffer yet */
571         if (!buf->buffer)
572                 return trace_clock_local();
573
574         ts = ring_buffer_time_stamp(buf->buffer, cpu);
575         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
576
577         return ts;
578 }
579
580 u64 ftrace_now(int cpu)
581 {
582         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
583 }
584
585 /**
586  * tracing_is_enabled - Show if global_trace has been disabled
587  *
588  * Shows if the global trace has been enabled or not. It uses the
589  * mirror flag "buffer_disabled" to be used in fast paths such as for
590  * the irqsoff tracer. But it may be inaccurate due to races. If you
591  * need to know the accurate state, use tracing_is_on() which is a little
592  * slower, but accurate.
593  */
594 int tracing_is_enabled(void)
595 {
596         /*
597          * For quick access (irqsoff uses this in fast path), just
598          * return the mirror variable of the state of the ring buffer.
599          * It's a little racy, but we don't really care.
600          */
601         smp_rmb();
602         return !global_trace.buffer_disabled;
603 }
604
605 /*
606  * trace_buf_size is the size in bytes that is allocated
607  * for a buffer. Note, the number of bytes is always rounded
608  * to page size.
609  *
610  * This number is purposely set to a low number of 16384.
611  * If the dump on oops happens, it will be much appreciated
612  * to not have to wait for all that output. Anyway this can be
613  * boot time and run time configurable.
614  */
615 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
616
617 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
618
619 /* trace_types holds a link list of available tracers. */
620 static struct tracer            *trace_types __read_mostly;
621
622 /*
623  * trace_types_lock is used to protect the trace_types list.
624  */
625 DEFINE_MUTEX(trace_types_lock);
626
627 /*
628  * serialize the access of the ring buffer
629  *
630  * ring buffer serializes readers, but it is low level protection.
631  * The validity of the events (which returns by ring_buffer_peek() ..etc)
632  * are not protected by ring buffer.
633  *
634  * The content of events may become garbage if we allow other process consumes
635  * these events concurrently:
636  *   A) the page of the consumed events may become a normal page
637  *      (not reader page) in ring buffer, and this page will be rewrited
638  *      by events producer.
639  *   B) The page of the consumed events may become a page for splice_read,
640  *      and this page will be returned to system.
641  *
642  * These primitives allow multi process access to different cpu ring buffer
643  * concurrently.
644  *
645  * These primitives don't distinguish read-only and read-consume access.
646  * Multi read-only access are also serialized.
647  */
648
649 #ifdef CONFIG_SMP
650 static DECLARE_RWSEM(all_cpu_access_lock);
651 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
652
653 static inline void trace_access_lock(int cpu)
654 {
655         if (cpu == RING_BUFFER_ALL_CPUS) {
656                 /* gain it for accessing the whole ring buffer. */
657                 down_write(&all_cpu_access_lock);
658         } else {
659                 /* gain it for accessing a cpu ring buffer. */
660
661                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
662                 down_read(&all_cpu_access_lock);
663
664                 /* Secondly block other access to this @cpu ring buffer. */
665                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
666         }
667 }
668
669 static inline void trace_access_unlock(int cpu)
670 {
671         if (cpu == RING_BUFFER_ALL_CPUS) {
672                 up_write(&all_cpu_access_lock);
673         } else {
674                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
675                 up_read(&all_cpu_access_lock);
676         }
677 }
678
679 static inline void trace_access_lock_init(void)
680 {
681         int cpu;
682
683         for_each_possible_cpu(cpu)
684                 mutex_init(&per_cpu(cpu_access_lock, cpu));
685 }
686
687 #else
688
689 static DEFINE_MUTEX(access_lock);
690
691 static inline void trace_access_lock(int cpu)
692 {
693         (void)cpu;
694         mutex_lock(&access_lock);
695 }
696
697 static inline void trace_access_unlock(int cpu)
698 {
699         (void)cpu;
700         mutex_unlock(&access_lock);
701 }
702
703 static inline void trace_access_lock_init(void)
704 {
705 }
706
707 #endif
708
709 #ifdef CONFIG_STACKTRACE
710 static void __ftrace_trace_stack(struct ring_buffer *buffer,
711                                  unsigned long flags,
712                                  int skip, int pc, struct pt_regs *regs);
713 static inline void ftrace_trace_stack(struct trace_array *tr,
714                                       struct ring_buffer *buffer,
715                                       unsigned long flags,
716                                       int skip, int pc, struct pt_regs *regs);
717
718 #else
719 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
720                                         unsigned long flags,
721                                         int skip, int pc, struct pt_regs *regs)
722 {
723 }
724 static inline void ftrace_trace_stack(struct trace_array *tr,
725                                       struct ring_buffer *buffer,
726                                       unsigned long flags,
727                                       int skip, int pc, struct pt_regs *regs)
728 {
729 }
730
731 #endif
732
733 static __always_inline void
734 trace_event_setup(struct ring_buffer_event *event,
735                   int type, unsigned long flags, int pc)
736 {
737         struct trace_entry *ent = ring_buffer_event_data(event);
738
739         tracing_generic_entry_update(ent, flags, pc);
740         ent->type = type;
741 }
742
743 static __always_inline struct ring_buffer_event *
744 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
745                           int type,
746                           unsigned long len,
747                           unsigned long flags, int pc)
748 {
749         struct ring_buffer_event *event;
750
751         event = ring_buffer_lock_reserve(buffer, len);
752         if (event != NULL)
753                 trace_event_setup(event, type, flags, pc);
754
755         return event;
756 }
757
758 void tracer_tracing_on(struct trace_array *tr)
759 {
760         if (tr->trace_buffer.buffer)
761                 ring_buffer_record_on(tr->trace_buffer.buffer);
762         /*
763          * This flag is looked at when buffers haven't been allocated
764          * yet, or by some tracers (like irqsoff), that just want to
765          * know if the ring buffer has been disabled, but it can handle
766          * races of where it gets disabled but we still do a record.
767          * As the check is in the fast path of the tracers, it is more
768          * important to be fast than accurate.
769          */
770         tr->buffer_disabled = 0;
771         /* Make the flag seen by readers */
772         smp_wmb();
773 }
774
775 /**
776  * tracing_on - enable tracing buffers
777  *
778  * This function enables tracing buffers that may have been
779  * disabled with tracing_off.
780  */
781 void tracing_on(void)
782 {
783         tracer_tracing_on(&global_trace);
784 }
785 EXPORT_SYMBOL_GPL(tracing_on);
786
787
788 static __always_inline void
789 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
790 {
791         __this_cpu_write(trace_taskinfo_save, true);
792
793         /* If this is the temp buffer, we need to commit fully */
794         if (this_cpu_read(trace_buffered_event) == event) {
795                 /* Length is in event->array[0] */
796                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
797                 /* Release the temp buffer */
798                 this_cpu_dec(trace_buffered_event_cnt);
799         } else
800                 ring_buffer_unlock_commit(buffer, event);
801 }
802
803 /**
804  * __trace_puts - write a constant string into the trace buffer.
805  * @ip:    The address of the caller
806  * @str:   The constant string to write
807  * @size:  The size of the string.
808  */
809 int __trace_puts(unsigned long ip, const char *str, int size)
810 {
811         struct ring_buffer_event *event;
812         struct ring_buffer *buffer;
813         struct print_entry *entry;
814         unsigned long irq_flags;
815         int alloc;
816         int pc;
817
818         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
819                 return 0;
820
821         pc = preempt_count();
822
823         if (unlikely(tracing_selftest_running || tracing_disabled))
824                 return 0;
825
826         alloc = sizeof(*entry) + size + 2; /* possible \n added */
827
828         local_save_flags(irq_flags);
829         buffer = global_trace.trace_buffer.buffer;
830         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
831                                             irq_flags, pc);
832         if (!event)
833                 return 0;
834
835         entry = ring_buffer_event_data(event);
836         entry->ip = ip;
837
838         memcpy(&entry->buf, str, size);
839
840         /* Add a newline if necessary */
841         if (entry->buf[size - 1] != '\n') {
842                 entry->buf[size] = '\n';
843                 entry->buf[size + 1] = '\0';
844         } else
845                 entry->buf[size] = '\0';
846
847         __buffer_unlock_commit(buffer, event);
848         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
849
850         return size;
851 }
852 EXPORT_SYMBOL_GPL(__trace_puts);
853
854 /**
855  * __trace_bputs - write the pointer to a constant string into trace buffer
856  * @ip:    The address of the caller
857  * @str:   The constant string to write to the buffer to
858  */
859 int __trace_bputs(unsigned long ip, const char *str)
860 {
861         struct ring_buffer_event *event;
862         struct ring_buffer *buffer;
863         struct bputs_entry *entry;
864         unsigned long irq_flags;
865         int size = sizeof(struct bputs_entry);
866         int pc;
867
868         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
869                 return 0;
870
871         pc = preempt_count();
872
873         if (unlikely(tracing_selftest_running || tracing_disabled))
874                 return 0;
875
876         local_save_flags(irq_flags);
877         buffer = global_trace.trace_buffer.buffer;
878         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
879                                             irq_flags, pc);
880         if (!event)
881                 return 0;
882
883         entry = ring_buffer_event_data(event);
884         entry->ip                       = ip;
885         entry->str                      = str;
886
887         __buffer_unlock_commit(buffer, event);
888         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
889
890         return 1;
891 }
892 EXPORT_SYMBOL_GPL(__trace_bputs);
893
894 #ifdef CONFIG_TRACER_SNAPSHOT
895 static void tracing_snapshot_instance(struct trace_array *tr)
896 {
897         struct tracer *tracer = tr->current_trace;
898         unsigned long flags;
899
900         if (in_nmi()) {
901                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
902                 internal_trace_puts("*** snapshot is being ignored        ***\n");
903                 return;
904         }
905
906         if (!tr->allocated_snapshot) {
907                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
908                 internal_trace_puts("*** stopping trace here!   ***\n");
909                 tracing_off();
910                 return;
911         }
912
913         /* Note, snapshot can not be used when the tracer uses it */
914         if (tracer->use_max_tr) {
915                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
916                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
917                 return;
918         }
919
920         local_irq_save(flags);
921         update_max_tr(tr, current, smp_processor_id());
922         local_irq_restore(flags);
923 }
924
925 /**
926  * tracing_snapshot - take a snapshot of the current buffer.
927  *
928  * This causes a swap between the snapshot buffer and the current live
929  * tracing buffer. You can use this to take snapshots of the live
930  * trace when some condition is triggered, but continue to trace.
931  *
932  * Note, make sure to allocate the snapshot with either
933  * a tracing_snapshot_alloc(), or by doing it manually
934  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
935  *
936  * If the snapshot buffer is not allocated, it will stop tracing.
937  * Basically making a permanent snapshot.
938  */
939 void tracing_snapshot(void)
940 {
941         struct trace_array *tr = &global_trace;
942
943         tracing_snapshot_instance(tr);
944 }
945 EXPORT_SYMBOL_GPL(tracing_snapshot);
946
947 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
948                                         struct trace_buffer *size_buf, int cpu_id);
949 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
950
951 static int alloc_snapshot(struct trace_array *tr)
952 {
953         int ret;
954
955         if (!tr->allocated_snapshot) {
956
957                 /* allocate spare buffer */
958                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
959                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
960                 if (ret < 0)
961                         return ret;
962
963                 tr->allocated_snapshot = true;
964         }
965
966         return 0;
967 }
968
969 static void free_snapshot(struct trace_array *tr)
970 {
971         /*
972          * We don't free the ring buffer. instead, resize it because
973          * The max_tr ring buffer has some state (e.g. ring->clock) and
974          * we want preserve it.
975          */
976         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
977         set_buffer_entries(&tr->max_buffer, 1);
978         tracing_reset_online_cpus(&tr->max_buffer);
979         tr->allocated_snapshot = false;
980 }
981
982 /**
983  * tracing_alloc_snapshot - allocate snapshot buffer.
984  *
985  * This only allocates the snapshot buffer if it isn't already
986  * allocated - it doesn't also take a snapshot.
987  *
988  * This is meant to be used in cases where the snapshot buffer needs
989  * to be set up for events that can't sleep but need to be able to
990  * trigger a snapshot.
991  */
992 int tracing_alloc_snapshot(void)
993 {
994         struct trace_array *tr = &global_trace;
995         int ret;
996
997         ret = alloc_snapshot(tr);
998         WARN_ON(ret < 0);
999
1000         return ret;
1001 }
1002 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1003
1004 /**
1005  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1006  *
1007  * This is similar to tracing_snapshot(), but it will allocate the
1008  * snapshot buffer if it isn't already allocated. Use this only
1009  * where it is safe to sleep, as the allocation may sleep.
1010  *
1011  * This causes a swap between the snapshot buffer and the current live
1012  * tracing buffer. You can use this to take snapshots of the live
1013  * trace when some condition is triggered, but continue to trace.
1014  */
1015 void tracing_snapshot_alloc(void)
1016 {
1017         int ret;
1018
1019         ret = tracing_alloc_snapshot();
1020         if (ret < 0)
1021                 return;
1022
1023         tracing_snapshot();
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1026 #else
1027 void tracing_snapshot(void)
1028 {
1029         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1030 }
1031 EXPORT_SYMBOL_GPL(tracing_snapshot);
1032 int tracing_alloc_snapshot(void)
1033 {
1034         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1035         return -ENODEV;
1036 }
1037 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1038 void tracing_snapshot_alloc(void)
1039 {
1040         /* Give warning */
1041         tracing_snapshot();
1042 }
1043 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1044 #endif /* CONFIG_TRACER_SNAPSHOT */
1045
1046 void tracer_tracing_off(struct trace_array *tr)
1047 {
1048         if (tr->trace_buffer.buffer)
1049                 ring_buffer_record_off(tr->trace_buffer.buffer);
1050         /*
1051          * This flag is looked at when buffers haven't been allocated
1052          * yet, or by some tracers (like irqsoff), that just want to
1053          * know if the ring buffer has been disabled, but it can handle
1054          * races of where it gets disabled but we still do a record.
1055          * As the check is in the fast path of the tracers, it is more
1056          * important to be fast than accurate.
1057          */
1058         tr->buffer_disabled = 1;
1059         /* Make the flag seen by readers */
1060         smp_wmb();
1061 }
1062
1063 /**
1064  * tracing_off - turn off tracing buffers
1065  *
1066  * This function stops the tracing buffers from recording data.
1067  * It does not disable any overhead the tracers themselves may
1068  * be causing. This function simply causes all recording to
1069  * the ring buffers to fail.
1070  */
1071 void tracing_off(void)
1072 {
1073         tracer_tracing_off(&global_trace);
1074 }
1075 EXPORT_SYMBOL_GPL(tracing_off);
1076
1077 void disable_trace_on_warning(void)
1078 {
1079         if (__disable_trace_on_warning)
1080                 tracing_off();
1081 }
1082
1083 /**
1084  * tracer_tracing_is_on - show real state of ring buffer enabled
1085  * @tr : the trace array to know if ring buffer is enabled
1086  *
1087  * Shows real state of the ring buffer if it is enabled or not.
1088  */
1089 int tracer_tracing_is_on(struct trace_array *tr)
1090 {
1091         if (tr->trace_buffer.buffer)
1092                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1093         return !tr->buffer_disabled;
1094 }
1095
1096 /**
1097  * tracing_is_on - show state of ring buffers enabled
1098  */
1099 int tracing_is_on(void)
1100 {
1101         return tracer_tracing_is_on(&global_trace);
1102 }
1103 EXPORT_SYMBOL_GPL(tracing_is_on);
1104
1105 static int __init set_buf_size(char *str)
1106 {
1107         unsigned long buf_size;
1108
1109         if (!str)
1110                 return 0;
1111         buf_size = memparse(str, &str);
1112         /* nr_entries can not be zero */
1113         if (buf_size == 0)
1114                 return 0;
1115         trace_buf_size = buf_size;
1116         return 1;
1117 }
1118 __setup("trace_buf_size=", set_buf_size);
1119
1120 static int __init set_tracing_thresh(char *str)
1121 {
1122         unsigned long threshold;
1123         int ret;
1124
1125         if (!str)
1126                 return 0;
1127         ret = kstrtoul(str, 0, &threshold);
1128         if (ret < 0)
1129                 return 0;
1130         tracing_thresh = threshold * 1000;
1131         return 1;
1132 }
1133 __setup("tracing_thresh=", set_tracing_thresh);
1134
1135 unsigned long nsecs_to_usecs(unsigned long nsecs)
1136 {
1137         return nsecs / 1000;
1138 }
1139
1140 /*
1141  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1142  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1143  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1144  * of strings in the order that the evals (enum) were defined.
1145  */
1146 #undef C
1147 #define C(a, b) b
1148
1149 /* These must match the bit postions in trace_iterator_flags */
1150 static const char *trace_options[] = {
1151         TRACE_FLAGS
1152         NULL
1153 };
1154
1155 static struct {
1156         u64 (*func)(void);
1157         const char *name;
1158         int in_ns;              /* is this clock in nanoseconds? */
1159 } trace_clocks[] = {
1160         { trace_clock_local,            "local",        1 },
1161         { trace_clock_global,           "global",       1 },
1162         { trace_clock_counter,          "counter",      0 },
1163         { trace_clock_jiffies,          "uptime",       0 },
1164         { trace_clock,                  "perf",         1 },
1165         { ktime_get_mono_fast_ns,       "mono",         1 },
1166         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1167         { ktime_get_boot_fast_ns,       "boot",         1 },
1168         ARCH_TRACE_CLOCKS
1169 };
1170
1171 /*
1172  * trace_parser_get_init - gets the buffer for trace parser
1173  */
1174 int trace_parser_get_init(struct trace_parser *parser, int size)
1175 {
1176         memset(parser, 0, sizeof(*parser));
1177
1178         parser->buffer = kmalloc(size, GFP_KERNEL);
1179         if (!parser->buffer)
1180                 return 1;
1181
1182         parser->size = size;
1183         return 0;
1184 }
1185
1186 /*
1187  * trace_parser_put - frees the buffer for trace parser
1188  */
1189 void trace_parser_put(struct trace_parser *parser)
1190 {
1191         kfree(parser->buffer);
1192         parser->buffer = NULL;
1193 }
1194
1195 /*
1196  * trace_get_user - reads the user input string separated by  space
1197  * (matched by isspace(ch))
1198  *
1199  * For each string found the 'struct trace_parser' is updated,
1200  * and the function returns.
1201  *
1202  * Returns number of bytes read.
1203  *
1204  * See kernel/trace/trace.h for 'struct trace_parser' details.
1205  */
1206 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1207         size_t cnt, loff_t *ppos)
1208 {
1209         char ch;
1210         size_t read = 0;
1211         ssize_t ret;
1212
1213         if (!*ppos)
1214                 trace_parser_clear(parser);
1215
1216         ret = get_user(ch, ubuf++);
1217         if (ret)
1218                 goto out;
1219
1220         read++;
1221         cnt--;
1222
1223         /*
1224          * The parser is not finished with the last write,
1225          * continue reading the user input without skipping spaces.
1226          */
1227         if (!parser->cont) {
1228                 /* skip white space */
1229                 while (cnt && isspace(ch)) {
1230                         ret = get_user(ch, ubuf++);
1231                         if (ret)
1232                                 goto out;
1233                         read++;
1234                         cnt--;
1235                 }
1236
1237                 parser->idx = 0;
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch) || !ch) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245         }
1246
1247         /* read the non-space input */
1248         while (cnt && !isspace(ch) && ch) {
1249                 if (parser->idx < parser->size - 1)
1250                         parser->buffer[parser->idx++] = ch;
1251                 else {
1252                         ret = -EINVAL;
1253                         goto out;
1254                 }
1255                 ret = get_user(ch, ubuf++);
1256                 if (ret)
1257                         goto out;
1258                 read++;
1259                 cnt--;
1260         }
1261
1262         /* We either got finished input or we have to wait for another call. */
1263         if (isspace(ch) || !ch) {
1264                 parser->buffer[parser->idx] = 0;
1265                 parser->cont = false;
1266         } else if (parser->idx < parser->size - 1) {
1267                 parser->cont = true;
1268                 parser->buffer[parser->idx++] = ch;
1269                 /* Make sure the parsed string always terminates with '\0'. */
1270                 parser->buffer[parser->idx] = 0;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 if (!tr->clear_trace)
1706                         continue;
1707                 tr->clear_trace = false;
1708                 tracing_reset_online_cpus(&tr->trace_buffer);
1709 #ifdef CONFIG_TRACER_MAX_TRACE
1710                 tracing_reset_online_cpus(&tr->max_buffer);
1711 #endif
1712         }
1713 }
1714
1715 static int *tgid_map;
1716
1717 #define SAVED_CMDLINES_DEFAULT 128
1718 #define NO_CMDLINE_MAP UINT_MAX
1719 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1720 struct saved_cmdlines_buffer {
1721         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1722         unsigned *map_cmdline_to_pid;
1723         unsigned cmdline_num;
1724         int cmdline_idx;
1725         char *saved_cmdlines;
1726 };
1727 static struct saved_cmdlines_buffer *savedcmd;
1728
1729 /* temporary disable recording */
1730 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1731
1732 static inline char *get_saved_cmdlines(int idx)
1733 {
1734         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1735 }
1736
1737 static inline void set_cmdline(int idx, const char *cmdline)
1738 {
1739         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1740 }
1741
1742 static int allocate_cmdlines_buffer(unsigned int val,
1743                                     struct saved_cmdlines_buffer *s)
1744 {
1745         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1746                                         GFP_KERNEL);
1747         if (!s->map_cmdline_to_pid)
1748                 return -ENOMEM;
1749
1750         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1751         if (!s->saved_cmdlines) {
1752                 kfree(s->map_cmdline_to_pid);
1753                 return -ENOMEM;
1754         }
1755
1756         s->cmdline_idx = 0;
1757         s->cmdline_num = val;
1758         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1759                sizeof(s->map_pid_to_cmdline));
1760         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1761                val * sizeof(*s->map_cmdline_to_pid));
1762
1763         return 0;
1764 }
1765
1766 static int trace_create_savedcmd(void)
1767 {
1768         int ret;
1769
1770         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1771         if (!savedcmd)
1772                 return -ENOMEM;
1773
1774         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1775         if (ret < 0) {
1776                 kfree(savedcmd);
1777                 savedcmd = NULL;
1778                 return -ENOMEM;
1779         }
1780
1781         return 0;
1782 }
1783
1784 int is_tracing_stopped(void)
1785 {
1786         return global_trace.stop_count;
1787 }
1788
1789 /**
1790  * tracing_start - quick start of the tracer
1791  *
1792  * If tracing is enabled but was stopped by tracing_stop,
1793  * this will start the tracer back up.
1794  */
1795 void tracing_start(void)
1796 {
1797         struct ring_buffer *buffer;
1798         unsigned long flags;
1799
1800         if (tracing_disabled)
1801                 return;
1802
1803         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1804         if (--global_trace.stop_count) {
1805                 if (global_trace.stop_count < 0) {
1806                         /* Someone screwed up their debugging */
1807                         WARN_ON_ONCE(1);
1808                         global_trace.stop_count = 0;
1809                 }
1810                 goto out;
1811         }
1812
1813         /* Prevent the buffers from switching */
1814         arch_spin_lock(&global_trace.max_lock);
1815
1816         buffer = global_trace.trace_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819
1820 #ifdef CONFIG_TRACER_MAX_TRACE
1821         buffer = global_trace.max_buffer.buffer;
1822         if (buffer)
1823                 ring_buffer_record_enable(buffer);
1824 #endif
1825
1826         arch_spin_unlock(&global_trace.max_lock);
1827
1828  out:
1829         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1830 }
1831
1832 static void tracing_start_tr(struct trace_array *tr)
1833 {
1834         struct ring_buffer *buffer;
1835         unsigned long flags;
1836
1837         if (tracing_disabled)
1838                 return;
1839
1840         /* If global, we need to also start the max tracer */
1841         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1842                 return tracing_start();
1843
1844         raw_spin_lock_irqsave(&tr->start_lock, flags);
1845
1846         if (--tr->stop_count) {
1847                 if (tr->stop_count < 0) {
1848                         /* Someone screwed up their debugging */
1849                         WARN_ON_ONCE(1);
1850                         tr->stop_count = 0;
1851                 }
1852                 goto out;
1853         }
1854
1855         buffer = tr->trace_buffer.buffer;
1856         if (buffer)
1857                 ring_buffer_record_enable(buffer);
1858
1859  out:
1860         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1861 }
1862
1863 /**
1864  * tracing_stop - quick stop of the tracer
1865  *
1866  * Light weight way to stop tracing. Use in conjunction with
1867  * tracing_start.
1868  */
1869 void tracing_stop(void)
1870 {
1871         struct ring_buffer *buffer;
1872         unsigned long flags;
1873
1874         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1875         if (global_trace.stop_count++)
1876                 goto out;
1877
1878         /* Prevent the buffers from switching */
1879         arch_spin_lock(&global_trace.max_lock);
1880
1881         buffer = global_trace.trace_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884
1885 #ifdef CONFIG_TRACER_MAX_TRACE
1886         buffer = global_trace.max_buffer.buffer;
1887         if (buffer)
1888                 ring_buffer_record_disable(buffer);
1889 #endif
1890
1891         arch_spin_unlock(&global_trace.max_lock);
1892
1893  out:
1894         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1895 }
1896
1897 static void tracing_stop_tr(struct trace_array *tr)
1898 {
1899         struct ring_buffer *buffer;
1900         unsigned long flags;
1901
1902         /* If global, we need to also stop the max tracer */
1903         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1904                 return tracing_stop();
1905
1906         raw_spin_lock_irqsave(&tr->start_lock, flags);
1907         if (tr->stop_count++)
1908                 goto out;
1909
1910         buffer = tr->trace_buffer.buffer;
1911         if (buffer)
1912                 ring_buffer_record_disable(buffer);
1913
1914  out:
1915         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1916 }
1917
1918 static int trace_save_cmdline(struct task_struct *tsk)
1919 {
1920         unsigned pid, idx;
1921
1922         /* treat recording of idle task as a success */
1923         if (!tsk->pid)
1924                 return 1;
1925
1926         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1927                 return 0;
1928
1929         /*
1930          * It's not the end of the world if we don't get
1931          * the lock, but we also don't want to spin
1932          * nor do we want to disable interrupts,
1933          * so if we miss here, then better luck next time.
1934          */
1935         if (!arch_spin_trylock(&trace_cmdline_lock))
1936                 return 0;
1937
1938         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1939         if (idx == NO_CMDLINE_MAP) {
1940                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1941
1942                 /*
1943                  * Check whether the cmdline buffer at idx has a pid
1944                  * mapped. We are going to overwrite that entry so we
1945                  * need to clear the map_pid_to_cmdline. Otherwise we
1946                  * would read the new comm for the old pid.
1947                  */
1948                 pid = savedcmd->map_cmdline_to_pid[idx];
1949                 if (pid != NO_CMDLINE_MAP)
1950                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1951
1952                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1953                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1954
1955                 savedcmd->cmdline_idx = idx;
1956         }
1957
1958         set_cmdline(idx, tsk->comm);
1959
1960         arch_spin_unlock(&trace_cmdline_lock);
1961
1962         return 1;
1963 }
1964
1965 static void __trace_find_cmdline(int pid, char comm[])
1966 {
1967         unsigned map;
1968
1969         if (!pid) {
1970                 strcpy(comm, "<idle>");
1971                 return;
1972         }
1973
1974         if (WARN_ON_ONCE(pid < 0)) {
1975                 strcpy(comm, "<XXX>");
1976                 return;
1977         }
1978
1979         if (pid > PID_MAX_DEFAULT) {
1980                 strcpy(comm, "<...>");
1981                 return;
1982         }
1983
1984         map = savedcmd->map_pid_to_cmdline[pid];
1985         if (map != NO_CMDLINE_MAP)
1986                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1987         else
1988                 strcpy(comm, "<...>");
1989 }
1990
1991 void trace_find_cmdline(int pid, char comm[])
1992 {
1993         preempt_disable();
1994         arch_spin_lock(&trace_cmdline_lock);
1995
1996         __trace_find_cmdline(pid, comm);
1997
1998         arch_spin_unlock(&trace_cmdline_lock);
1999         preempt_enable();
2000 }
2001
2002 int trace_find_tgid(int pid)
2003 {
2004         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2005                 return 0;
2006
2007         return tgid_map[pid];
2008 }
2009
2010 static int trace_save_tgid(struct task_struct *tsk)
2011 {
2012         /* treat recording of idle task as a success */
2013         if (!tsk->pid)
2014                 return 1;
2015
2016         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2017                 return 0;
2018
2019         tgid_map[tsk->pid] = tsk->tgid;
2020         return 1;
2021 }
2022
2023 static bool tracing_record_taskinfo_skip(int flags)
2024 {
2025         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2026                 return true;
2027         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2028                 return true;
2029         if (!__this_cpu_read(trace_taskinfo_save))
2030                 return true;
2031         return false;
2032 }
2033
2034 /**
2035  * tracing_record_taskinfo - record the task info of a task
2036  *
2037  * @task  - task to record
2038  * @flags - TRACE_RECORD_CMDLINE for recording comm
2039  *        - TRACE_RECORD_TGID for recording tgid
2040  */
2041 void tracing_record_taskinfo(struct task_struct *task, int flags)
2042 {
2043         bool done;
2044
2045         if (tracing_record_taskinfo_skip(flags))
2046                 return;
2047
2048         /*
2049          * Record as much task information as possible. If some fail, continue
2050          * to try to record the others.
2051          */
2052         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2053         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2054
2055         /* If recording any information failed, retry again soon. */
2056         if (!done)
2057                 return;
2058
2059         __this_cpu_write(trace_taskinfo_save, false);
2060 }
2061
2062 /**
2063  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2064  *
2065  * @prev - previous task during sched_switch
2066  * @next - next task during sched_switch
2067  * @flags - TRACE_RECORD_CMDLINE for recording comm
2068  *          TRACE_RECORD_TGID for recording tgid
2069  */
2070 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2071                                           struct task_struct *next, int flags)
2072 {
2073         bool done;
2074
2075         if (tracing_record_taskinfo_skip(flags))
2076                 return;
2077
2078         /*
2079          * Record as much task information as possible. If some fail, continue
2080          * to try to record the others.
2081          */
2082         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2083         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2084         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2085         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2086
2087         /* If recording any information failed, retry again soon. */
2088         if (!done)
2089                 return;
2090
2091         __this_cpu_write(trace_taskinfo_save, false);
2092 }
2093
2094 /* Helpers to record a specific task information */
2095 void tracing_record_cmdline(struct task_struct *task)
2096 {
2097         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2098 }
2099
2100 void tracing_record_tgid(struct task_struct *task)
2101 {
2102         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2103 }
2104
2105 /*
2106  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2107  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2108  * simplifies those functions and keeps them in sync.
2109  */
2110 enum print_line_t trace_handle_return(struct trace_seq *s)
2111 {
2112         return trace_seq_has_overflowed(s) ?
2113                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2114 }
2115 EXPORT_SYMBOL_GPL(trace_handle_return);
2116
2117 void
2118 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2119                              int pc)
2120 {
2121         struct task_struct *tsk = current;
2122
2123         entry->preempt_count            = pc & 0xff;
2124         entry->pid                      = (tsk) ? tsk->pid : 0;
2125         entry->flags =
2126 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2127                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2128 #else
2129                 TRACE_FLAG_IRQS_NOSUPPORT |
2130 #endif
2131                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2132                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2133                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2134                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2135                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2136 }
2137 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2138
2139 struct ring_buffer_event *
2140 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2141                           int type,
2142                           unsigned long len,
2143                           unsigned long flags, int pc)
2144 {
2145         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2146 }
2147
2148 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2149 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2150 static int trace_buffered_event_ref;
2151
2152 /**
2153  * trace_buffered_event_enable - enable buffering events
2154  *
2155  * When events are being filtered, it is quicker to use a temporary
2156  * buffer to write the event data into if there's a likely chance
2157  * that it will not be committed. The discard of the ring buffer
2158  * is not as fast as committing, and is much slower than copying
2159  * a commit.
2160  *
2161  * When an event is to be filtered, allocate per cpu buffers to
2162  * write the event data into, and if the event is filtered and discarded
2163  * it is simply dropped, otherwise, the entire data is to be committed
2164  * in one shot.
2165  */
2166 void trace_buffered_event_enable(void)
2167 {
2168         struct ring_buffer_event *event;
2169         struct page *page;
2170         int cpu;
2171
2172         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2173
2174         if (trace_buffered_event_ref++)
2175                 return;
2176
2177         for_each_tracing_cpu(cpu) {
2178                 page = alloc_pages_node(cpu_to_node(cpu),
2179                                         GFP_KERNEL | __GFP_NORETRY, 0);
2180                 if (!page)
2181                         goto failed;
2182
2183                 event = page_address(page);
2184                 memset(event, 0, sizeof(*event));
2185
2186                 per_cpu(trace_buffered_event, cpu) = event;
2187
2188                 preempt_disable();
2189                 if (cpu == smp_processor_id() &&
2190                     this_cpu_read(trace_buffered_event) !=
2191                     per_cpu(trace_buffered_event, cpu))
2192                         WARN_ON_ONCE(1);
2193                 preempt_enable();
2194         }
2195
2196         return;
2197  failed:
2198         trace_buffered_event_disable();
2199 }
2200
2201 static void enable_trace_buffered_event(void *data)
2202 {
2203         /* Probably not needed, but do it anyway */
2204         smp_rmb();
2205         this_cpu_dec(trace_buffered_event_cnt);
2206 }
2207
2208 static void disable_trace_buffered_event(void *data)
2209 {
2210         this_cpu_inc(trace_buffered_event_cnt);
2211 }
2212
2213 /**
2214  * trace_buffered_event_disable - disable buffering events
2215  *
2216  * When a filter is removed, it is faster to not use the buffered
2217  * events, and to commit directly into the ring buffer. Free up
2218  * the temp buffers when there are no more users. This requires
2219  * special synchronization with current events.
2220  */
2221 void trace_buffered_event_disable(void)
2222 {
2223         int cpu;
2224
2225         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2226
2227         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2228                 return;
2229
2230         if (--trace_buffered_event_ref)
2231                 return;
2232
2233         preempt_disable();
2234         /* For each CPU, set the buffer as used. */
2235         smp_call_function_many(tracing_buffer_mask,
2236                                disable_trace_buffered_event, NULL, 1);
2237         preempt_enable();
2238
2239         /* Wait for all current users to finish */
2240         synchronize_sched();
2241
2242         for_each_tracing_cpu(cpu) {
2243                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2244                 per_cpu(trace_buffered_event, cpu) = NULL;
2245         }
2246         /*
2247          * Make sure trace_buffered_event is NULL before clearing
2248          * trace_buffered_event_cnt.
2249          */
2250         smp_wmb();
2251
2252         preempt_disable();
2253         /* Do the work on each cpu */
2254         smp_call_function_many(tracing_buffer_mask,
2255                                enable_trace_buffered_event, NULL, 1);
2256         preempt_enable();
2257 }
2258
2259 static struct ring_buffer *temp_buffer;
2260
2261 struct ring_buffer_event *
2262 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2263                           struct trace_event_file *trace_file,
2264                           int type, unsigned long len,
2265                           unsigned long flags, int pc)
2266 {
2267         struct ring_buffer_event *entry;
2268         int val;
2269
2270         *current_rb = trace_file->tr->trace_buffer.buffer;
2271
2272         if ((trace_file->flags &
2273              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2274             (entry = this_cpu_read(trace_buffered_event))) {
2275                 /* Try to use the per cpu buffer first */
2276                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2277                 if (val == 1) {
2278                         trace_event_setup(entry, type, flags, pc);
2279                         entry->array[0] = len;
2280                         return entry;
2281                 }
2282                 this_cpu_dec(trace_buffered_event_cnt);
2283         }
2284
2285         entry = __trace_buffer_lock_reserve(*current_rb,
2286                                             type, len, flags, pc);
2287         /*
2288          * If tracing is off, but we have triggers enabled
2289          * we still need to look at the event data. Use the temp_buffer
2290          * to store the trace event for the tigger to use. It's recusive
2291          * safe and will not be recorded anywhere.
2292          */
2293         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2294                 *current_rb = temp_buffer;
2295                 entry = __trace_buffer_lock_reserve(*current_rb,
2296                                                     type, len, flags, pc);
2297         }
2298         return entry;
2299 }
2300 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2301
2302 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2303 static DEFINE_MUTEX(tracepoint_printk_mutex);
2304
2305 static void output_printk(struct trace_event_buffer *fbuffer)
2306 {
2307         struct trace_event_call *event_call;
2308         struct trace_event *event;
2309         unsigned long flags;
2310         struct trace_iterator *iter = tracepoint_print_iter;
2311
2312         /* We should never get here if iter is NULL */
2313         if (WARN_ON_ONCE(!iter))
2314                 return;
2315
2316         event_call = fbuffer->trace_file->event_call;
2317         if (!event_call || !event_call->event.funcs ||
2318             !event_call->event.funcs->trace)
2319                 return;
2320
2321         event = &fbuffer->trace_file->event_call->event;
2322
2323         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2324         trace_seq_init(&iter->seq);
2325         iter->ent = fbuffer->entry;
2326         event_call->event.funcs->trace(iter, 0, event);
2327         trace_seq_putc(&iter->seq, 0);
2328         printk("%s", iter->seq.buffer);
2329
2330         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2331 }
2332
2333 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2334                              void __user *buffer, size_t *lenp,
2335                              loff_t *ppos)
2336 {
2337         int save_tracepoint_printk;
2338         int ret;
2339
2340         mutex_lock(&tracepoint_printk_mutex);
2341         save_tracepoint_printk = tracepoint_printk;
2342
2343         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2344
2345         /*
2346          * This will force exiting early, as tracepoint_printk
2347          * is always zero when tracepoint_printk_iter is not allocated
2348          */
2349         if (!tracepoint_print_iter)
2350                 tracepoint_printk = 0;
2351
2352         if (save_tracepoint_printk == tracepoint_printk)
2353                 goto out;
2354
2355         if (tracepoint_printk)
2356                 static_key_enable(&tracepoint_printk_key.key);
2357         else
2358                 static_key_disable(&tracepoint_printk_key.key);
2359
2360  out:
2361         mutex_unlock(&tracepoint_printk_mutex);
2362
2363         return ret;
2364 }
2365
2366 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2367 {
2368         if (static_key_false(&tracepoint_printk_key.key))
2369                 output_printk(fbuffer);
2370
2371         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2372                                     fbuffer->event, fbuffer->entry,
2373                                     fbuffer->flags, fbuffer->pc);
2374 }
2375 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2376
2377 /*
2378  * Skip 3:
2379  *
2380  *   trace_buffer_unlock_commit_regs()
2381  *   trace_event_buffer_commit()
2382  *   trace_event_raw_event_xxx()
2383 */
2384 # define STACK_SKIP 3
2385
2386 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2387                                      struct ring_buffer *buffer,
2388                                      struct ring_buffer_event *event,
2389                                      unsigned long flags, int pc,
2390                                      struct pt_regs *regs)
2391 {
2392         __buffer_unlock_commit(buffer, event);
2393
2394         /*
2395          * If regs is not set, then skip the necessary functions.
2396          * Note, we can still get here via blktrace, wakeup tracer
2397          * and mmiotrace, but that's ok if they lose a function or
2398          * two. They are not that meaningful.
2399          */
2400         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2401         ftrace_trace_userstack(buffer, flags, pc);
2402 }
2403
2404 /*
2405  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2406  */
2407 void
2408 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2409                                    struct ring_buffer_event *event)
2410 {
2411         __buffer_unlock_commit(buffer, event);
2412 }
2413
2414 static void
2415 trace_process_export(struct trace_export *export,
2416                struct ring_buffer_event *event)
2417 {
2418         struct trace_entry *entry;
2419         unsigned int size = 0;
2420
2421         entry = ring_buffer_event_data(event);
2422         size = ring_buffer_event_length(event);
2423         export->write(export, entry, size);
2424 }
2425
2426 static DEFINE_MUTEX(ftrace_export_lock);
2427
2428 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2429
2430 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2431
2432 static inline void ftrace_exports_enable(void)
2433 {
2434         static_branch_enable(&ftrace_exports_enabled);
2435 }
2436
2437 static inline void ftrace_exports_disable(void)
2438 {
2439         static_branch_disable(&ftrace_exports_enabled);
2440 }
2441
2442 void ftrace_exports(struct ring_buffer_event *event)
2443 {
2444         struct trace_export *export;
2445
2446         preempt_disable_notrace();
2447
2448         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2449         while (export) {
2450                 trace_process_export(export, event);
2451                 export = rcu_dereference_raw_notrace(export->next);
2452         }
2453
2454         preempt_enable_notrace();
2455 }
2456
2457 static inline void
2458 add_trace_export(struct trace_export **list, struct trace_export *export)
2459 {
2460         rcu_assign_pointer(export->next, *list);
2461         /*
2462          * We are entering export into the list but another
2463          * CPU might be walking that list. We need to make sure
2464          * the export->next pointer is valid before another CPU sees
2465          * the export pointer included into the list.
2466          */
2467         rcu_assign_pointer(*list, export);
2468 }
2469
2470 static inline int
2471 rm_trace_export(struct trace_export **list, struct trace_export *export)
2472 {
2473         struct trace_export **p;
2474
2475         for (p = list; *p != NULL; p = &(*p)->next)
2476                 if (*p == export)
2477                         break;
2478
2479         if (*p != export)
2480                 return -1;
2481
2482         rcu_assign_pointer(*p, (*p)->next);
2483
2484         return 0;
2485 }
2486
2487 static inline void
2488 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2489 {
2490         if (*list == NULL)
2491                 ftrace_exports_enable();
2492
2493         add_trace_export(list, export);
2494 }
2495
2496 static inline int
2497 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2498 {
2499         int ret;
2500
2501         ret = rm_trace_export(list, export);
2502         if (*list == NULL)
2503                 ftrace_exports_disable();
2504
2505         return ret;
2506 }
2507
2508 int register_ftrace_export(struct trace_export *export)
2509 {
2510         if (WARN_ON_ONCE(!export->write))
2511                 return -1;
2512
2513         mutex_lock(&ftrace_export_lock);
2514
2515         add_ftrace_export(&ftrace_exports_list, export);
2516
2517         mutex_unlock(&ftrace_export_lock);
2518
2519         return 0;
2520 }
2521 EXPORT_SYMBOL_GPL(register_ftrace_export);
2522
2523 int unregister_ftrace_export(struct trace_export *export)
2524 {
2525         int ret;
2526
2527         mutex_lock(&ftrace_export_lock);
2528
2529         ret = rm_ftrace_export(&ftrace_exports_list, export);
2530
2531         mutex_unlock(&ftrace_export_lock);
2532
2533         return ret;
2534 }
2535 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2536
2537 void
2538 trace_function(struct trace_array *tr,
2539                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2540                int pc)
2541 {
2542         struct trace_event_call *call = &event_function;
2543         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2544         struct ring_buffer_event *event;
2545         struct ftrace_entry *entry;
2546
2547         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2548                                             flags, pc);
2549         if (!event)
2550                 return;
2551         entry   = ring_buffer_event_data(event);
2552         entry->ip                       = ip;
2553         entry->parent_ip                = parent_ip;
2554
2555         if (!call_filter_check_discard(call, entry, buffer, event)) {
2556                 if (static_branch_unlikely(&ftrace_exports_enabled))
2557                         ftrace_exports(event);
2558                 __buffer_unlock_commit(buffer, event);
2559         }
2560 }
2561
2562 #ifdef CONFIG_STACKTRACE
2563
2564 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2565 struct ftrace_stack {
2566         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2567 };
2568
2569 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2570 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2571
2572 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2573                                  unsigned long flags,
2574                                  int skip, int pc, struct pt_regs *regs)
2575 {
2576         struct trace_event_call *call = &event_kernel_stack;
2577         struct ring_buffer_event *event;
2578         struct stack_entry *entry;
2579         struct stack_trace trace;
2580         int use_stack;
2581         int size = FTRACE_STACK_ENTRIES;
2582
2583         trace.nr_entries        = 0;
2584         trace.skip              = skip;
2585
2586         /*
2587          * Add one, for this function and the call to save_stack_trace()
2588          * If regs is set, then these functions will not be in the way.
2589          */
2590 #ifndef CONFIG_UNWINDER_ORC
2591         if (!regs)
2592                 trace.skip++;
2593 #endif
2594
2595         /*
2596          * Since events can happen in NMIs there's no safe way to
2597          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2598          * or NMI comes in, it will just have to use the default
2599          * FTRACE_STACK_SIZE.
2600          */
2601         preempt_disable_notrace();
2602
2603         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2604         /*
2605          * We don't need any atomic variables, just a barrier.
2606          * If an interrupt comes in, we don't care, because it would
2607          * have exited and put the counter back to what we want.
2608          * We just need a barrier to keep gcc from moving things
2609          * around.
2610          */
2611         barrier();
2612         if (use_stack == 1) {
2613                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2614                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2615
2616                 if (regs)
2617                         save_stack_trace_regs(regs, &trace);
2618                 else
2619                         save_stack_trace(&trace);
2620
2621                 if (trace.nr_entries > size)
2622                         size = trace.nr_entries;
2623         } else
2624                 /* From now on, use_stack is a boolean */
2625                 use_stack = 0;
2626
2627         size *= sizeof(unsigned long);
2628
2629         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2630                                             sizeof(*entry) + size, flags, pc);
2631         if (!event)
2632                 goto out;
2633         entry = ring_buffer_event_data(event);
2634
2635         memset(&entry->caller, 0, size);
2636
2637         if (use_stack)
2638                 memcpy(&entry->caller, trace.entries,
2639                        trace.nr_entries * sizeof(unsigned long));
2640         else {
2641                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2642                 trace.entries           = entry->caller;
2643                 if (regs)
2644                         save_stack_trace_regs(regs, &trace);
2645                 else
2646                         save_stack_trace(&trace);
2647         }
2648
2649         entry->size = trace.nr_entries;
2650
2651         if (!call_filter_check_discard(call, entry, buffer, event))
2652                 __buffer_unlock_commit(buffer, event);
2653
2654  out:
2655         /* Again, don't let gcc optimize things here */
2656         barrier();
2657         __this_cpu_dec(ftrace_stack_reserve);
2658         preempt_enable_notrace();
2659
2660 }
2661
2662 static inline void ftrace_trace_stack(struct trace_array *tr,
2663                                       struct ring_buffer *buffer,
2664                                       unsigned long flags,
2665                                       int skip, int pc, struct pt_regs *regs)
2666 {
2667         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2668                 return;
2669
2670         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2671 }
2672
2673 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2674                    int pc)
2675 {
2676         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2677
2678         if (rcu_is_watching()) {
2679                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2680                 return;
2681         }
2682
2683         /*
2684          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2685          * but if the above rcu_is_watching() failed, then the NMI
2686          * triggered someplace critical, and rcu_irq_enter() should
2687          * not be called from NMI.
2688          */
2689         if (unlikely(in_nmi()))
2690                 return;
2691
2692         rcu_irq_enter_irqson();
2693         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2694         rcu_irq_exit_irqson();
2695 }
2696
2697 /**
2698  * trace_dump_stack - record a stack back trace in the trace buffer
2699  * @skip: Number of functions to skip (helper handlers)
2700  */
2701 void trace_dump_stack(int skip)
2702 {
2703         unsigned long flags;
2704
2705         if (tracing_disabled || tracing_selftest_running)
2706                 return;
2707
2708         local_save_flags(flags);
2709
2710 #ifndef CONFIG_UNWINDER_ORC
2711         /* Skip 1 to skip this function. */
2712         skip++;
2713 #endif
2714         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2715                              flags, skip, preempt_count(), NULL);
2716 }
2717
2718 static DEFINE_PER_CPU(int, user_stack_count);
2719
2720 void
2721 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2722 {
2723         struct trace_event_call *call = &event_user_stack;
2724         struct ring_buffer_event *event;
2725         struct userstack_entry *entry;
2726         struct stack_trace trace;
2727
2728         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2729                 return;
2730
2731         /*
2732          * NMIs can not handle page faults, even with fix ups.
2733          * The save user stack can (and often does) fault.
2734          */
2735         if (unlikely(in_nmi()))
2736                 return;
2737
2738         /*
2739          * prevent recursion, since the user stack tracing may
2740          * trigger other kernel events.
2741          */
2742         preempt_disable();
2743         if (__this_cpu_read(user_stack_count))
2744                 goto out;
2745
2746         __this_cpu_inc(user_stack_count);
2747
2748         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2749                                             sizeof(*entry), flags, pc);
2750         if (!event)
2751                 goto out_drop_count;
2752         entry   = ring_buffer_event_data(event);
2753
2754         entry->tgid             = current->tgid;
2755         memset(&entry->caller, 0, sizeof(entry->caller));
2756
2757         trace.nr_entries        = 0;
2758         trace.max_entries       = FTRACE_STACK_ENTRIES;
2759         trace.skip              = 0;
2760         trace.entries           = entry->caller;
2761
2762         save_stack_trace_user(&trace);
2763         if (!call_filter_check_discard(call, entry, buffer, event))
2764                 __buffer_unlock_commit(buffer, event);
2765
2766  out_drop_count:
2767         __this_cpu_dec(user_stack_count);
2768  out:
2769         preempt_enable();
2770 }
2771
2772 #ifdef UNUSED
2773 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2774 {
2775         ftrace_trace_userstack(tr, flags, preempt_count());
2776 }
2777 #endif /* UNUSED */
2778
2779 #endif /* CONFIG_STACKTRACE */
2780
2781 /* created for use with alloc_percpu */
2782 struct trace_buffer_struct {
2783         int nesting;
2784         char buffer[4][TRACE_BUF_SIZE];
2785 };
2786
2787 static struct trace_buffer_struct *trace_percpu_buffer;
2788
2789 /*
2790  * Thise allows for lockless recording.  If we're nested too deeply, then
2791  * this returns NULL.
2792  */
2793 static char *get_trace_buf(void)
2794 {
2795         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2796
2797         if (!buffer || buffer->nesting >= 4)
2798                 return NULL;
2799
2800         buffer->nesting++;
2801
2802         /* Interrupts must see nesting incremented before we use the buffer */
2803         barrier();
2804         return &buffer->buffer[buffer->nesting][0];
2805 }
2806
2807 static void put_trace_buf(void)
2808 {
2809         /* Don't let the decrement of nesting leak before this */
2810         barrier();
2811         this_cpu_dec(trace_percpu_buffer->nesting);
2812 }
2813
2814 static int alloc_percpu_trace_buffer(void)
2815 {
2816         struct trace_buffer_struct *buffers;
2817
2818         buffers = alloc_percpu(struct trace_buffer_struct);
2819         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2820                 return -ENOMEM;
2821
2822         trace_percpu_buffer = buffers;
2823         return 0;
2824 }
2825
2826 static int buffers_allocated;
2827
2828 void trace_printk_init_buffers(void)
2829 {
2830         if (buffers_allocated)
2831                 return;
2832
2833         if (alloc_percpu_trace_buffer())
2834                 return;
2835
2836         /* trace_printk() is for debug use only. Don't use it in production. */
2837
2838         pr_warn("\n");
2839         pr_warn("**********************************************************\n");
2840         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2841         pr_warn("**                                                      **\n");
2842         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2843         pr_warn("**                                                      **\n");
2844         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2845         pr_warn("** unsafe for production use.                           **\n");
2846         pr_warn("**                                                      **\n");
2847         pr_warn("** If you see this message and you are not debugging    **\n");
2848         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2849         pr_warn("**                                                      **\n");
2850         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2851         pr_warn("**********************************************************\n");
2852
2853         /* Expand the buffers to set size */
2854         tracing_update_buffers();
2855
2856         buffers_allocated = 1;
2857
2858         /*
2859          * trace_printk_init_buffers() can be called by modules.
2860          * If that happens, then we need to start cmdline recording
2861          * directly here. If the global_trace.buffer is already
2862          * allocated here, then this was called by module code.
2863          */
2864         if (global_trace.trace_buffer.buffer)
2865                 tracing_start_cmdline_record();
2866 }
2867
2868 void trace_printk_start_comm(void)
2869 {
2870         /* Start tracing comms if trace printk is set */
2871         if (!buffers_allocated)
2872                 return;
2873         tracing_start_cmdline_record();
2874 }
2875
2876 static void trace_printk_start_stop_comm(int enabled)
2877 {
2878         if (!buffers_allocated)
2879                 return;
2880
2881         if (enabled)
2882                 tracing_start_cmdline_record();
2883         else
2884                 tracing_stop_cmdline_record();
2885 }
2886
2887 /**
2888  * trace_vbprintk - write binary msg to tracing buffer
2889  *
2890  */
2891 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2892 {
2893         struct trace_event_call *call = &event_bprint;
2894         struct ring_buffer_event *event;
2895         struct ring_buffer *buffer;
2896         struct trace_array *tr = &global_trace;
2897         struct bprint_entry *entry;
2898         unsigned long flags;
2899         char *tbuffer;
2900         int len = 0, size, pc;
2901
2902         if (unlikely(tracing_selftest_running || tracing_disabled))
2903                 return 0;
2904
2905         /* Don't pollute graph traces with trace_vprintk internals */
2906         pause_graph_tracing();
2907
2908         pc = preempt_count();
2909         preempt_disable_notrace();
2910
2911         tbuffer = get_trace_buf();
2912         if (!tbuffer) {
2913                 len = 0;
2914                 goto out_nobuffer;
2915         }
2916
2917         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2918
2919         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2920                 goto out;
2921
2922         local_save_flags(flags);
2923         size = sizeof(*entry) + sizeof(u32) * len;
2924         buffer = tr->trace_buffer.buffer;
2925         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2926                                             flags, pc);
2927         if (!event)
2928                 goto out;
2929         entry = ring_buffer_event_data(event);
2930         entry->ip                       = ip;
2931         entry->fmt                      = fmt;
2932
2933         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2934         if (!call_filter_check_discard(call, entry, buffer, event)) {
2935                 __buffer_unlock_commit(buffer, event);
2936                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2937         }
2938
2939 out:
2940         put_trace_buf();
2941
2942 out_nobuffer:
2943         preempt_enable_notrace();
2944         unpause_graph_tracing();
2945
2946         return len;
2947 }
2948 EXPORT_SYMBOL_GPL(trace_vbprintk);
2949
2950 static int
2951 __trace_array_vprintk(struct ring_buffer *buffer,
2952                       unsigned long ip, const char *fmt, va_list args)
2953 {
2954         struct trace_event_call *call = &event_print;
2955         struct ring_buffer_event *event;
2956         int len = 0, size, pc;
2957         struct print_entry *entry;
2958         unsigned long flags;
2959         char *tbuffer;
2960
2961         if (tracing_disabled || tracing_selftest_running)
2962                 return 0;
2963
2964         /* Don't pollute graph traces with trace_vprintk internals */
2965         pause_graph_tracing();
2966
2967         pc = preempt_count();
2968         preempt_disable_notrace();
2969
2970
2971         tbuffer = get_trace_buf();
2972         if (!tbuffer) {
2973                 len = 0;
2974                 goto out_nobuffer;
2975         }
2976
2977         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2978
2979         local_save_flags(flags);
2980         size = sizeof(*entry) + len + 1;
2981         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2982                                             flags, pc);
2983         if (!event)
2984                 goto out;
2985         entry = ring_buffer_event_data(event);
2986         entry->ip = ip;
2987
2988         memcpy(&entry->buf, tbuffer, len + 1);
2989         if (!call_filter_check_discard(call, entry, buffer, event)) {
2990                 __buffer_unlock_commit(buffer, event);
2991                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2992         }
2993
2994 out:
2995         put_trace_buf();
2996
2997 out_nobuffer:
2998         preempt_enable_notrace();
2999         unpause_graph_tracing();
3000
3001         return len;
3002 }
3003
3004 int trace_array_vprintk(struct trace_array *tr,
3005                         unsigned long ip, const char *fmt, va_list args)
3006 {
3007         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3008 }
3009
3010 int trace_array_printk(struct trace_array *tr,
3011                        unsigned long ip, const char *fmt, ...)
3012 {
3013         int ret;
3014         va_list ap;
3015
3016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3017                 return 0;
3018
3019         va_start(ap, fmt);
3020         ret = trace_array_vprintk(tr, ip, fmt, ap);
3021         va_end(ap);
3022         return ret;
3023 }
3024
3025 int trace_array_printk_buf(struct ring_buffer *buffer,
3026                            unsigned long ip, const char *fmt, ...)
3027 {
3028         int ret;
3029         va_list ap;
3030
3031         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3032                 return 0;
3033
3034         va_start(ap, fmt);
3035         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3036         va_end(ap);
3037         return ret;
3038 }
3039
3040 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3041 {
3042         return trace_array_vprintk(&global_trace, ip, fmt, args);
3043 }
3044 EXPORT_SYMBOL_GPL(trace_vprintk);
3045
3046 static void trace_iterator_increment(struct trace_iterator *iter)
3047 {
3048         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3049
3050         iter->idx++;
3051         if (buf_iter)
3052                 ring_buffer_read(buf_iter, NULL);
3053 }
3054
3055 static struct trace_entry *
3056 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3057                 unsigned long *lost_events)
3058 {
3059         struct ring_buffer_event *event;
3060         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3061
3062         if (buf_iter)
3063                 event = ring_buffer_iter_peek(buf_iter, ts);
3064         else
3065                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3066                                          lost_events);
3067
3068         if (event) {
3069                 iter->ent_size = ring_buffer_event_length(event);
3070                 return ring_buffer_event_data(event);
3071         }
3072         iter->ent_size = 0;
3073         return NULL;
3074 }
3075
3076 static struct trace_entry *
3077 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3078                   unsigned long *missing_events, u64 *ent_ts)
3079 {
3080         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3081         struct trace_entry *ent, *next = NULL;
3082         unsigned long lost_events = 0, next_lost = 0;
3083         int cpu_file = iter->cpu_file;
3084         u64 next_ts = 0, ts;
3085         int next_cpu = -1;
3086         int next_size = 0;
3087         int cpu;
3088
3089         /*
3090          * If we are in a per_cpu trace file, don't bother by iterating over
3091          * all cpu and peek directly.
3092          */
3093         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3094                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3095                         return NULL;
3096                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3097                 if (ent_cpu)
3098                         *ent_cpu = cpu_file;
3099
3100                 return ent;
3101         }
3102
3103         for_each_tracing_cpu(cpu) {
3104
3105                 if (ring_buffer_empty_cpu(buffer, cpu))
3106                         continue;
3107
3108                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3109
3110                 /*
3111                  * Pick the entry with the smallest timestamp:
3112                  */
3113                 if (ent && (!next || ts < next_ts)) {
3114                         next = ent;
3115                         next_cpu = cpu;
3116                         next_ts = ts;
3117                         next_lost = lost_events;
3118                         next_size = iter->ent_size;
3119                 }
3120         }
3121
3122         iter->ent_size = next_size;
3123
3124         if (ent_cpu)
3125                 *ent_cpu = next_cpu;
3126
3127         if (ent_ts)
3128                 *ent_ts = next_ts;
3129
3130         if (missing_events)
3131                 *missing_events = next_lost;
3132
3133         return next;
3134 }
3135
3136 /* Find the next real entry, without updating the iterator itself */
3137 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3138                                           int *ent_cpu, u64 *ent_ts)
3139 {
3140         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3141 }
3142
3143 /* Find the next real entry, and increment the iterator to the next entry */
3144 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3145 {
3146         iter->ent = __find_next_entry(iter, &iter->cpu,
3147                                       &iter->lost_events, &iter->ts);
3148
3149         if (iter->ent)
3150                 trace_iterator_increment(iter);
3151
3152         return iter->ent ? iter : NULL;
3153 }
3154
3155 static void trace_consume(struct trace_iterator *iter)
3156 {
3157         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3158                             &iter->lost_events);
3159 }
3160
3161 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3162 {
3163         struct trace_iterator *iter = m->private;
3164         int i = (int)*pos;
3165         void *ent;
3166
3167         WARN_ON_ONCE(iter->leftover);
3168
3169         (*pos)++;
3170
3171         /* can't go backwards */
3172         if (iter->idx > i)
3173                 return NULL;
3174
3175         if (iter->idx < 0)
3176                 ent = trace_find_next_entry_inc(iter);
3177         else
3178                 ent = iter;
3179
3180         while (ent && iter->idx < i)
3181                 ent = trace_find_next_entry_inc(iter);
3182
3183         iter->pos = *pos;
3184
3185         return ent;
3186 }
3187
3188 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3189 {
3190         struct ring_buffer_event *event;
3191         struct ring_buffer_iter *buf_iter;
3192         unsigned long entries = 0;
3193         u64 ts;
3194
3195         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3196
3197         buf_iter = trace_buffer_iter(iter, cpu);
3198         if (!buf_iter)
3199                 return;
3200
3201         ring_buffer_iter_reset(buf_iter);
3202
3203         /*
3204          * We could have the case with the max latency tracers
3205          * that a reset never took place on a cpu. This is evident
3206          * by the timestamp being before the start of the buffer.
3207          */
3208         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3209                 if (ts >= iter->trace_buffer->time_start)
3210                         break;
3211                 entries++;
3212                 ring_buffer_read(buf_iter, NULL);
3213         }
3214
3215         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3216 }
3217
3218 /*
3219  * The current tracer is copied to avoid a global locking
3220  * all around.
3221  */
3222 static void *s_start(struct seq_file *m, loff_t *pos)
3223 {
3224         struct trace_iterator *iter = m->private;
3225         struct trace_array *tr = iter->tr;
3226         int cpu_file = iter->cpu_file;
3227         void *p = NULL;
3228         loff_t l = 0;
3229         int cpu;
3230
3231         /*
3232          * copy the tracer to avoid using a global lock all around.
3233          * iter->trace is a copy of current_trace, the pointer to the
3234          * name may be used instead of a strcmp(), as iter->trace->name
3235          * will point to the same string as current_trace->name.
3236          */
3237         mutex_lock(&trace_types_lock);
3238         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3239                 *iter->trace = *tr->current_trace;
3240         mutex_unlock(&trace_types_lock);
3241
3242 #ifdef CONFIG_TRACER_MAX_TRACE
3243         if (iter->snapshot && iter->trace->use_max_tr)
3244                 return ERR_PTR(-EBUSY);
3245 #endif
3246
3247         if (!iter->snapshot)
3248                 atomic_inc(&trace_record_taskinfo_disabled);
3249
3250         if (*pos != iter->pos) {
3251                 iter->ent = NULL;
3252                 iter->cpu = 0;
3253                 iter->idx = -1;
3254
3255                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3256                         for_each_tracing_cpu(cpu)
3257                                 tracing_iter_reset(iter, cpu);
3258                 } else
3259                         tracing_iter_reset(iter, cpu_file);
3260
3261                 iter->leftover = 0;
3262                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3263                         ;
3264
3265         } else {
3266                 /*
3267                  * If we overflowed the seq_file before, then we want
3268                  * to just reuse the trace_seq buffer again.
3269                  */
3270                 if (iter->leftover)
3271                         p = iter;
3272                 else {
3273                         l = *pos - 1;
3274                         p = s_next(m, p, &l);
3275                 }
3276         }
3277
3278         trace_event_read_lock();
3279         trace_access_lock(cpu_file);
3280         return p;
3281 }
3282
3283 static void s_stop(struct seq_file *m, void *p)
3284 {
3285         struct trace_iterator *iter = m->private;
3286
3287 #ifdef CONFIG_TRACER_MAX_TRACE
3288         if (iter->snapshot && iter->trace->use_max_tr)
3289                 return;
3290 #endif
3291
3292         if (!iter->snapshot)
3293                 atomic_dec(&trace_record_taskinfo_disabled);
3294
3295         trace_access_unlock(iter->cpu_file);
3296         trace_event_read_unlock();
3297 }
3298
3299 static void
3300 get_total_entries(struct trace_buffer *buf,
3301                   unsigned long *total, unsigned long *entries)
3302 {
3303         unsigned long count;
3304         int cpu;
3305
3306         *total = 0;
3307         *entries = 0;
3308
3309         for_each_tracing_cpu(cpu) {
3310                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3311                 /*
3312                  * If this buffer has skipped entries, then we hold all
3313                  * entries for the trace and we need to ignore the
3314                  * ones before the time stamp.
3315                  */
3316                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3317                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3318                         /* total is the same as the entries */
3319                         *total += count;
3320                 } else
3321                         *total += count +
3322                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3323                 *entries += count;
3324         }
3325 }
3326
3327 static void print_lat_help_header(struct seq_file *m)
3328 {
3329         seq_puts(m, "#                  _------=> CPU#            \n"
3330                     "#                 / _-----=> irqs-off        \n"
3331                     "#                | / _----=> need-resched    \n"
3332                     "#                || / _---=> hardirq/softirq \n"
3333                     "#                ||| / _--=> preempt-depth   \n"
3334                     "#                |||| /     delay            \n"
3335                     "#  cmd     pid   ||||| time  |   caller      \n"
3336                     "#     \\   /      |||||  \\    |   /         \n");
3337 }
3338
3339 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3340 {
3341         unsigned long total;
3342         unsigned long entries;
3343
3344         get_total_entries(buf, &total, &entries);
3345         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3346                    entries, total, num_online_cpus());
3347         seq_puts(m, "#\n");
3348 }
3349
3350 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3351                                    unsigned int flags)
3352 {
3353         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3354
3355         print_event_info(buf, m);
3356
3357         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3358         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3359 }
3360
3361 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3362                                        unsigned int flags)
3363 {
3364         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3365         const char tgid_space[] = "          ";
3366         const char space[] = "  ";
3367
3368         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3369                    tgid ? tgid_space : space);
3370         seq_printf(m, "#                          %s / _----=> need-resched\n",
3371                    tgid ? tgid_space : space);
3372         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3373                    tgid ? tgid_space : space);
3374         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3375                    tgid ? tgid_space : space);
3376         seq_printf(m, "#                          %s||| /     delay\n",
3377                    tgid ? tgid_space : space);
3378         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3379                    tgid ? "   TGID   " : space);
3380         seq_printf(m, "#              | |       | %s||||       |         |\n",
3381                    tgid ? "     |    " : space);
3382 }
3383
3384 void
3385 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3386 {
3387         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3388         struct trace_buffer *buf = iter->trace_buffer;
3389         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3390         struct tracer *type = iter->trace;
3391         unsigned long entries;
3392         unsigned long total;
3393         const char *name = "preemption";
3394
3395         name = type->name;
3396
3397         get_total_entries(buf, &total, &entries);
3398
3399         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3400                    name, UTS_RELEASE);
3401         seq_puts(m, "# -----------------------------------"
3402                  "---------------------------------\n");
3403         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3404                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3405                    nsecs_to_usecs(data->saved_latency),
3406                    entries,
3407                    total,
3408                    buf->cpu,
3409 #if defined(CONFIG_PREEMPT_NONE)
3410                    "server",
3411 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3412                    "desktop",
3413 #elif defined(CONFIG_PREEMPT)
3414                    "preempt",
3415 #else
3416                    "unknown",
3417 #endif
3418                    /* These are reserved for later use */
3419                    0, 0, 0, 0);
3420 #ifdef CONFIG_SMP
3421         seq_printf(m, " #P:%d)\n", num_online_cpus());
3422 #else
3423         seq_puts(m, ")\n");
3424 #endif
3425         seq_puts(m, "#    -----------------\n");
3426         seq_printf(m, "#    | task: %.16s-%d "
3427                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3428                    data->comm, data->pid,
3429                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3430                    data->policy, data->rt_priority);
3431         seq_puts(m, "#    -----------------\n");
3432
3433         if (data->critical_start) {
3434                 seq_puts(m, "#  => started at: ");
3435                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3436                 trace_print_seq(m, &iter->seq);
3437                 seq_puts(m, "\n#  => ended at:   ");
3438                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3439                 trace_print_seq(m, &iter->seq);
3440                 seq_puts(m, "\n#\n");
3441         }
3442
3443         seq_puts(m, "#\n");
3444 }
3445
3446 static void test_cpu_buff_start(struct trace_iterator *iter)
3447 {
3448         struct trace_seq *s = &iter->seq;
3449         struct trace_array *tr = iter->tr;
3450
3451         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3452                 return;
3453
3454         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3455                 return;
3456
3457         if (cpumask_available(iter->started) &&
3458             cpumask_test_cpu(iter->cpu, iter->started))
3459                 return;
3460
3461         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3462                 return;
3463
3464         if (cpumask_available(iter->started))
3465                 cpumask_set_cpu(iter->cpu, iter->started);
3466
3467         /* Don't print started cpu buffer for the first entry of the trace */
3468         if (iter->idx > 1)
3469                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3470                                 iter->cpu);
3471 }
3472
3473 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3474 {
3475         struct trace_array *tr = iter->tr;
3476         struct trace_seq *s = &iter->seq;
3477         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3478         struct trace_entry *entry;
3479         struct trace_event *event;
3480
3481         entry = iter->ent;
3482
3483         test_cpu_buff_start(iter);
3484
3485         event = ftrace_find_event(entry->type);
3486
3487         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3488                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3489                         trace_print_lat_context(iter);
3490                 else
3491                         trace_print_context(iter);
3492         }
3493
3494         if (trace_seq_has_overflowed(s))
3495                 return TRACE_TYPE_PARTIAL_LINE;
3496
3497         if (event)
3498                 return event->funcs->trace(iter, sym_flags, event);
3499
3500         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3501
3502         return trace_handle_return(s);
3503 }
3504
3505 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3506 {
3507         struct trace_array *tr = iter->tr;
3508         struct trace_seq *s = &iter->seq;
3509         struct trace_entry *entry;
3510         struct trace_event *event;
3511
3512         entry = iter->ent;
3513
3514         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3515                 trace_seq_printf(s, "%d %d %llu ",
3516                                  entry->pid, iter->cpu, iter->ts);
3517
3518         if (trace_seq_has_overflowed(s))
3519                 return TRACE_TYPE_PARTIAL_LINE;
3520
3521         event = ftrace_find_event(entry->type);
3522         if (event)
3523                 return event->funcs->raw(iter, 0, event);
3524
3525         trace_seq_printf(s, "%d ?\n", entry->type);
3526
3527         return trace_handle_return(s);
3528 }
3529
3530 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3531 {
3532         struct trace_array *tr = iter->tr;
3533         struct trace_seq *s = &iter->seq;
3534         unsigned char newline = '\n';
3535         struct trace_entry *entry;
3536         struct trace_event *event;
3537
3538         entry = iter->ent;
3539
3540         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3541                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3542                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3543                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3544                 if (trace_seq_has_overflowed(s))
3545                         return TRACE_TYPE_PARTIAL_LINE;
3546         }
3547
3548         event = ftrace_find_event(entry->type);
3549         if (event) {
3550                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3551                 if (ret != TRACE_TYPE_HANDLED)
3552                         return ret;
3553         }
3554
3555         SEQ_PUT_FIELD(s, newline);
3556
3557         return trace_handle_return(s);
3558 }
3559
3560 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3561 {
3562         struct trace_array *tr = iter->tr;
3563         struct trace_seq *s = &iter->seq;
3564         struct trace_entry *entry;
3565         struct trace_event *event;
3566
3567         entry = iter->ent;
3568
3569         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3570                 SEQ_PUT_FIELD(s, entry->pid);
3571                 SEQ_PUT_FIELD(s, iter->cpu);
3572                 SEQ_PUT_FIELD(s, iter->ts);
3573                 if (trace_seq_has_overflowed(s))
3574                         return TRACE_TYPE_PARTIAL_LINE;
3575         }
3576
3577         event = ftrace_find_event(entry->type);
3578         return event ? event->funcs->binary(iter, 0, event) :
3579                 TRACE_TYPE_HANDLED;
3580 }
3581
3582 int trace_empty(struct trace_iterator *iter)
3583 {
3584         struct ring_buffer_iter *buf_iter;
3585         int cpu;
3586
3587         /* If we are looking at one CPU buffer, only check that one */
3588         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3589                 cpu = iter->cpu_file;
3590                 buf_iter = trace_buffer_iter(iter, cpu);
3591                 if (buf_iter) {
3592                         if (!ring_buffer_iter_empty(buf_iter))
3593                                 return 0;
3594                 } else {
3595                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3596                                 return 0;
3597                 }
3598                 return 1;
3599         }
3600
3601         for_each_tracing_cpu(cpu) {
3602                 buf_iter = trace_buffer_iter(iter, cpu);
3603                 if (buf_iter) {
3604                         if (!ring_buffer_iter_empty(buf_iter))
3605                                 return 0;
3606                 } else {
3607                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3608                                 return 0;
3609                 }
3610         }
3611
3612         return 1;
3613 }
3614
3615 /*  Called with trace_event_read_lock() held. */
3616 enum print_line_t print_trace_line(struct trace_iterator *iter)
3617 {
3618         struct trace_array *tr = iter->tr;
3619         unsigned long trace_flags = tr->trace_flags;
3620         enum print_line_t ret;
3621
3622         if (iter->lost_events) {
3623                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3624                                  iter->cpu, iter->lost_events);
3625                 if (trace_seq_has_overflowed(&iter->seq))
3626                         return TRACE_TYPE_PARTIAL_LINE;
3627         }
3628
3629         if (iter->trace && iter->trace->print_line) {
3630                 ret = iter->trace->print_line(iter);
3631                 if (ret != TRACE_TYPE_UNHANDLED)
3632                         return ret;
3633         }
3634
3635         if (iter->ent->type == TRACE_BPUTS &&
3636                         trace_flags & TRACE_ITER_PRINTK &&
3637                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3638                 return trace_print_bputs_msg_only(iter);
3639
3640         if (iter->ent->type == TRACE_BPRINT &&
3641                         trace_flags & TRACE_ITER_PRINTK &&
3642                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3643                 return trace_print_bprintk_msg_only(iter);
3644
3645         if (iter->ent->type == TRACE_PRINT &&
3646                         trace_flags & TRACE_ITER_PRINTK &&
3647                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3648                 return trace_print_printk_msg_only(iter);
3649
3650         if (trace_flags & TRACE_ITER_BIN)
3651                 return print_bin_fmt(iter);
3652
3653         if (trace_flags & TRACE_ITER_HEX)
3654                 return print_hex_fmt(iter);
3655
3656         if (trace_flags & TRACE_ITER_RAW)
3657                 return print_raw_fmt(iter);
3658
3659         return print_trace_fmt(iter);
3660 }
3661
3662 void trace_latency_header(struct seq_file *m)
3663 {
3664         struct trace_iterator *iter = m->private;
3665         struct trace_array *tr = iter->tr;
3666
3667         /* print nothing if the buffers are empty */
3668         if (trace_empty(iter))
3669                 return;
3670
3671         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3672                 print_trace_header(m, iter);
3673
3674         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3675                 print_lat_help_header(m);
3676 }
3677
3678 void trace_default_header(struct seq_file *m)
3679 {
3680         struct trace_iterator *iter = m->private;
3681         struct trace_array *tr = iter->tr;
3682         unsigned long trace_flags = tr->trace_flags;
3683
3684         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3685                 return;
3686
3687         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3688                 /* print nothing if the buffers are empty */
3689                 if (trace_empty(iter))
3690                         return;
3691                 print_trace_header(m, iter);
3692                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3693                         print_lat_help_header(m);
3694         } else {
3695                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3696                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3697                                 print_func_help_header_irq(iter->trace_buffer,
3698                                                            m, trace_flags);
3699                         else
3700                                 print_func_help_header(iter->trace_buffer, m,
3701                                                        trace_flags);
3702                 }
3703         }
3704 }
3705
3706 static void test_ftrace_alive(struct seq_file *m)
3707 {
3708         if (!ftrace_is_dead())
3709                 return;
3710         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3711                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3712 }
3713
3714 #ifdef CONFIG_TRACER_MAX_TRACE
3715 static void show_snapshot_main_help(struct seq_file *m)
3716 {
3717         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3718                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3719                     "#                      Takes a snapshot of the main buffer.\n"
3720                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3721                     "#                      (Doesn't have to be '2' works with any number that\n"
3722                     "#                       is not a '0' or '1')\n");
3723 }
3724
3725 static void show_snapshot_percpu_help(struct seq_file *m)
3726 {
3727         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3728 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3729         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3730                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3731 #else
3732         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3733                     "#                     Must use main snapshot file to allocate.\n");
3734 #endif
3735         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3736                     "#                      (Doesn't have to be '2' works with any number that\n"
3737                     "#                       is not a '0' or '1')\n");
3738 }
3739
3740 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3741 {
3742         if (iter->tr->allocated_snapshot)
3743                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3744         else
3745                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3746
3747         seq_puts(m, "# Snapshot commands:\n");
3748         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3749                 show_snapshot_main_help(m);
3750         else
3751                 show_snapshot_percpu_help(m);
3752 }
3753 #else
3754 /* Should never be called */
3755 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3756 #endif
3757
3758 static int s_show(struct seq_file *m, void *v)
3759 {
3760         struct trace_iterator *iter = v;
3761         int ret;
3762
3763         if (iter->ent == NULL) {
3764                 if (iter->tr) {
3765                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3766                         seq_puts(m, "#\n");
3767                         test_ftrace_alive(m);
3768                 }
3769                 if (iter->snapshot && trace_empty(iter))
3770                         print_snapshot_help(m, iter);
3771                 else if (iter->trace && iter->trace->print_header)
3772                         iter->trace->print_header(m);
3773                 else
3774                         trace_default_header(m);
3775
3776         } else if (iter->leftover) {
3777                 /*
3778                  * If we filled the seq_file buffer earlier, we
3779                  * want to just show it now.
3780                  */
3781                 ret = trace_print_seq(m, &iter->seq);
3782
3783                 /* ret should this time be zero, but you never know */
3784                 iter->leftover = ret;
3785
3786         } else {
3787                 print_trace_line(iter);
3788                 ret = trace_print_seq(m, &iter->seq);
3789                 /*
3790                  * If we overflow the seq_file buffer, then it will
3791                  * ask us for this data again at start up.
3792                  * Use that instead.
3793                  *  ret is 0 if seq_file write succeeded.
3794                  *        -1 otherwise.
3795                  */
3796                 iter->leftover = ret;
3797         }
3798
3799         return 0;
3800 }
3801
3802 /*
3803  * Should be used after trace_array_get(), trace_types_lock
3804  * ensures that i_cdev was already initialized.
3805  */
3806 static inline int tracing_get_cpu(struct inode *inode)
3807 {
3808         if (inode->i_cdev) /* See trace_create_cpu_file() */
3809                 return (long)inode->i_cdev - 1;
3810         return RING_BUFFER_ALL_CPUS;
3811 }
3812
3813 static const struct seq_operations tracer_seq_ops = {
3814         .start          = s_start,
3815         .next           = s_next,
3816         .stop           = s_stop,
3817         .show           = s_show,
3818 };
3819
3820 static struct trace_iterator *
3821 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3822 {
3823         struct trace_array *tr = inode->i_private;
3824         struct trace_iterator *iter;
3825         int cpu;
3826
3827         if (tracing_disabled)
3828                 return ERR_PTR(-ENODEV);
3829
3830         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3831         if (!iter)
3832                 return ERR_PTR(-ENOMEM);
3833
3834         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3835                                     GFP_KERNEL);
3836         if (!iter->buffer_iter)
3837                 goto release;
3838
3839         /*
3840          * We make a copy of the current tracer to avoid concurrent
3841          * changes on it while we are reading.
3842          */
3843         mutex_lock(&trace_types_lock);
3844         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3845         if (!iter->trace)
3846                 goto fail;
3847
3848         *iter->trace = *tr->current_trace;
3849
3850         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3851                 goto fail;
3852
3853         iter->tr = tr;
3854
3855 #ifdef CONFIG_TRACER_MAX_TRACE
3856         /* Currently only the top directory has a snapshot */
3857         if (tr->current_trace->print_max || snapshot)
3858                 iter->trace_buffer = &tr->max_buffer;
3859         else
3860 #endif
3861                 iter->trace_buffer = &tr->trace_buffer;
3862         iter->snapshot = snapshot;
3863         iter->pos = -1;
3864         iter->cpu_file = tracing_get_cpu(inode);
3865         mutex_init(&iter->mutex);
3866
3867         /* Notify the tracer early; before we stop tracing. */
3868         if (iter->trace && iter->trace->open)
3869                 iter->trace->open(iter);
3870
3871         /* Annotate start of buffers if we had overruns */
3872         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3873                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3874
3875         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3876         if (trace_clocks[tr->clock_id].in_ns)
3877                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3878
3879         /* stop the trace while dumping if we are not opening "snapshot" */
3880         if (!iter->snapshot)
3881                 tracing_stop_tr(tr);
3882
3883         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3884                 for_each_tracing_cpu(cpu) {
3885                         iter->buffer_iter[cpu] =
3886                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3887                 }
3888                 ring_buffer_read_prepare_sync();
3889                 for_each_tracing_cpu(cpu) {
3890                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3891                         tracing_iter_reset(iter, cpu);
3892                 }
3893         } else {
3894                 cpu = iter->cpu_file;
3895                 iter->buffer_iter[cpu] =
3896                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3897                 ring_buffer_read_prepare_sync();
3898                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3899                 tracing_iter_reset(iter, cpu);
3900         }
3901
3902         mutex_unlock(&trace_types_lock);
3903
3904         return iter;
3905
3906  fail:
3907         mutex_unlock(&trace_types_lock);
3908         kfree(iter->trace);
3909         kfree(iter->buffer_iter);
3910 release:
3911         seq_release_private(inode, file);
3912         return ERR_PTR(-ENOMEM);
3913 }
3914
3915 int tracing_open_generic(struct inode *inode, struct file *filp)
3916 {
3917         if (tracing_disabled)
3918                 return -ENODEV;
3919
3920         filp->private_data = inode->i_private;
3921         return 0;
3922 }
3923
3924 bool tracing_is_disabled(void)
3925 {
3926         return (tracing_disabled) ? true: false;
3927 }
3928
3929 /*
3930  * Open and update trace_array ref count.
3931  * Must have the current trace_array passed to it.
3932  */
3933 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3934 {
3935         struct trace_array *tr = inode->i_private;
3936
3937         if (tracing_disabled)
3938                 return -ENODEV;
3939
3940         if (trace_array_get(tr) < 0)
3941                 return -ENODEV;
3942
3943         filp->private_data = inode->i_private;
3944
3945         return 0;
3946 }
3947
3948 static int tracing_release(struct inode *inode, struct file *file)
3949 {
3950         struct trace_array *tr = inode->i_private;
3951         struct seq_file *m = file->private_data;
3952         struct trace_iterator *iter;
3953         int cpu;
3954
3955         if (!(file->f_mode & FMODE_READ)) {
3956                 trace_array_put(tr);
3957                 return 0;
3958         }
3959
3960         /* Writes do not use seq_file */
3961         iter = m->private;
3962         mutex_lock(&trace_types_lock);
3963
3964         for_each_tracing_cpu(cpu) {
3965                 if (iter->buffer_iter[cpu])
3966                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3967         }
3968
3969         if (iter->trace && iter->trace->close)
3970                 iter->trace->close(iter);
3971
3972         if (!iter->snapshot)
3973                 /* reenable tracing if it was previously enabled */
3974                 tracing_start_tr(tr);
3975
3976         __trace_array_put(tr);
3977
3978         mutex_unlock(&trace_types_lock);
3979
3980         mutex_destroy(&iter->mutex);
3981         free_cpumask_var(iter->started);
3982         kfree(iter->trace);
3983         kfree(iter->buffer_iter);
3984         seq_release_private(inode, file);
3985
3986         return 0;
3987 }
3988
3989 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3990 {
3991         struct trace_array *tr = inode->i_private;
3992
3993         trace_array_put(tr);
3994         return 0;
3995 }
3996
3997 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3998 {
3999         struct trace_array *tr = inode->i_private;
4000
4001         trace_array_put(tr);
4002
4003         return single_release(inode, file);
4004 }
4005
4006 static int tracing_open(struct inode *inode, struct file *file)
4007 {
4008         struct trace_array *tr = inode->i_private;
4009         struct trace_iterator *iter;
4010         int ret = 0;
4011
4012         if (trace_array_get(tr) < 0)
4013                 return -ENODEV;
4014
4015         /* If this file was open for write, then erase contents */
4016         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4017                 int cpu = tracing_get_cpu(inode);
4018                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4019
4020 #ifdef CONFIG_TRACER_MAX_TRACE
4021                 if (tr->current_trace->print_max)
4022                         trace_buf = &tr->max_buffer;
4023 #endif
4024
4025                 if (cpu == RING_BUFFER_ALL_CPUS)
4026                         tracing_reset_online_cpus(trace_buf);
4027                 else
4028                         tracing_reset(trace_buf, cpu);
4029         }
4030
4031         if (file->f_mode & FMODE_READ) {
4032                 iter = __tracing_open(inode, file, false);
4033                 if (IS_ERR(iter))
4034                         ret = PTR_ERR(iter);
4035                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4036                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4037         }
4038
4039         if (ret < 0)
4040                 trace_array_put(tr);
4041
4042         return ret;
4043 }
4044
4045 /*
4046  * Some tracers are not suitable for instance buffers.
4047  * A tracer is always available for the global array (toplevel)
4048  * or if it explicitly states that it is.
4049  */
4050 static bool
4051 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4052 {
4053         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4054 }
4055
4056 /* Find the next tracer that this trace array may use */
4057 static struct tracer *
4058 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4059 {
4060         while (t && !trace_ok_for_array(t, tr))
4061                 t = t->next;
4062
4063         return t;
4064 }
4065
4066 static void *
4067 t_next(struct seq_file *m, void *v, loff_t *pos)
4068 {
4069         struct trace_array *tr = m->private;
4070         struct tracer *t = v;
4071
4072         (*pos)++;
4073
4074         if (t)
4075                 t = get_tracer_for_array(tr, t->next);
4076
4077         return t;
4078 }
4079
4080 static void *t_start(struct seq_file *m, loff_t *pos)
4081 {
4082         struct trace_array *tr = m->private;
4083         struct tracer *t;
4084         loff_t l = 0;
4085
4086         mutex_lock(&trace_types_lock);
4087
4088         t = get_tracer_for_array(tr, trace_types);
4089         for (; t && l < *pos; t = t_next(m, t, &l))
4090                         ;
4091
4092         return t;
4093 }
4094
4095 static void t_stop(struct seq_file *m, void *p)
4096 {
4097         mutex_unlock(&trace_types_lock);
4098 }
4099
4100 static int t_show(struct seq_file *m, void *v)
4101 {
4102         struct tracer *t = v;
4103
4104         if (!t)
4105                 return 0;
4106
4107         seq_puts(m, t->name);
4108         if (t->next)
4109                 seq_putc(m, ' ');
4110         else
4111                 seq_putc(m, '\n');
4112
4113         return 0;
4114 }
4115
4116 static const struct seq_operations show_traces_seq_ops = {
4117         .start          = t_start,
4118         .next           = t_next,
4119         .stop           = t_stop,
4120         .show           = t_show,
4121 };
4122
4123 static int show_traces_open(struct inode *inode, struct file *file)
4124 {
4125         struct trace_array *tr = inode->i_private;
4126         struct seq_file *m;
4127         int ret;
4128
4129         if (tracing_disabled)
4130                 return -ENODEV;
4131
4132         ret = seq_open(file, &show_traces_seq_ops);
4133         if (ret)
4134                 return ret;
4135
4136         m = file->private_data;
4137         m->private = tr;
4138
4139         return 0;
4140 }
4141
4142 static ssize_t
4143 tracing_write_stub(struct file *filp, const char __user *ubuf,
4144                    size_t count, loff_t *ppos)
4145 {
4146         return count;
4147 }
4148
4149 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4150 {
4151         int ret;
4152
4153         if (file->f_mode & FMODE_READ)
4154                 ret = seq_lseek(file, offset, whence);
4155         else
4156                 file->f_pos = ret = 0;
4157
4158         return ret;
4159 }
4160
4161 static const struct file_operations tracing_fops = {
4162         .open           = tracing_open,
4163         .read           = seq_read,
4164         .write          = tracing_write_stub,
4165         .llseek         = tracing_lseek,
4166         .release        = tracing_release,
4167 };
4168
4169 static const struct file_operations show_traces_fops = {
4170         .open           = show_traces_open,
4171         .read           = seq_read,
4172         .release        = seq_release,
4173         .llseek         = seq_lseek,
4174 };
4175
4176 static ssize_t
4177 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4178                      size_t count, loff_t *ppos)
4179 {
4180         struct trace_array *tr = file_inode(filp)->i_private;
4181         char *mask_str;
4182         int len;
4183
4184         len = snprintf(NULL, 0, "%*pb\n",
4185                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4186         mask_str = kmalloc(len, GFP_KERNEL);
4187         if (!mask_str)
4188                 return -ENOMEM;
4189
4190         len = snprintf(mask_str, len, "%*pb\n",
4191                        cpumask_pr_args(tr->tracing_cpumask));
4192         if (len >= count) {
4193                 count = -EINVAL;
4194                 goto out_err;
4195         }
4196         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4197
4198 out_err:
4199         kfree(mask_str);
4200
4201         return count;
4202 }
4203
4204 static ssize_t
4205 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4206                       size_t count, loff_t *ppos)
4207 {
4208         struct trace_array *tr = file_inode(filp)->i_private;
4209         cpumask_var_t tracing_cpumask_new;
4210         int err, cpu;
4211
4212         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4213                 return -ENOMEM;
4214
4215         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4216         if (err)
4217                 goto err_unlock;
4218
4219         local_irq_disable();
4220         arch_spin_lock(&tr->max_lock);
4221         for_each_tracing_cpu(cpu) {
4222                 /*
4223                  * Increase/decrease the disabled counter if we are
4224                  * about to flip a bit in the cpumask:
4225                  */
4226                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4227                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4228                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4229                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4230                 }
4231                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4232                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4233                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4234                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4235                 }
4236         }
4237         arch_spin_unlock(&tr->max_lock);
4238         local_irq_enable();
4239
4240         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4241         free_cpumask_var(tracing_cpumask_new);
4242
4243         return count;
4244
4245 err_unlock:
4246         free_cpumask_var(tracing_cpumask_new);
4247
4248         return err;
4249 }
4250
4251 static const struct file_operations tracing_cpumask_fops = {
4252         .open           = tracing_open_generic_tr,
4253         .read           = tracing_cpumask_read,
4254         .write          = tracing_cpumask_write,
4255         .release        = tracing_release_generic_tr,
4256         .llseek         = generic_file_llseek,
4257 };
4258
4259 static int tracing_trace_options_show(struct seq_file *m, void *v)
4260 {
4261         struct tracer_opt *trace_opts;
4262         struct trace_array *tr = m->private;
4263         u32 tracer_flags;
4264         int i;
4265
4266         mutex_lock(&trace_types_lock);
4267         tracer_flags = tr->current_trace->flags->val;
4268         trace_opts = tr->current_trace->flags->opts;
4269
4270         for (i = 0; trace_options[i]; i++) {
4271                 if (tr->trace_flags & (1 << i))
4272                         seq_printf(m, "%s\n", trace_options[i]);
4273                 else
4274                         seq_printf(m, "no%s\n", trace_options[i]);
4275         }
4276
4277         for (i = 0; trace_opts[i].name; i++) {
4278                 if (tracer_flags & trace_opts[i].bit)
4279                         seq_printf(m, "%s\n", trace_opts[i].name);
4280                 else
4281                         seq_printf(m, "no%s\n", trace_opts[i].name);
4282         }
4283         mutex_unlock(&trace_types_lock);
4284
4285         return 0;
4286 }
4287
4288 static int __set_tracer_option(struct trace_array *tr,
4289                                struct tracer_flags *tracer_flags,
4290                                struct tracer_opt *opts, int neg)
4291 {
4292         struct tracer *trace = tracer_flags->trace;
4293         int ret;
4294
4295         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4296         if (ret)
4297                 return ret;
4298
4299         if (neg)
4300                 tracer_flags->val &= ~opts->bit;
4301         else
4302                 tracer_flags->val |= opts->bit;
4303         return 0;
4304 }
4305
4306 /* Try to assign a tracer specific option */
4307 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4308 {
4309         struct tracer *trace = tr->current_trace;
4310         struct tracer_flags *tracer_flags = trace->flags;
4311         struct tracer_opt *opts = NULL;
4312         int i;
4313
4314         for (i = 0; tracer_flags->opts[i].name; i++) {
4315                 opts = &tracer_flags->opts[i];
4316
4317                 if (strcmp(cmp, opts->name) == 0)
4318                         return __set_tracer_option(tr, trace->flags, opts, neg);
4319         }
4320
4321         return -EINVAL;
4322 }
4323
4324 /* Some tracers require overwrite to stay enabled */
4325 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4326 {
4327         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4328                 return -1;
4329
4330         return 0;
4331 }
4332
4333 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4334 {
4335         /* do nothing if flag is already set */
4336         if (!!(tr->trace_flags & mask) == !!enabled)
4337                 return 0;
4338
4339         /* Give the tracer a chance to approve the change */
4340         if (tr->current_trace->flag_changed)
4341                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4342                         return -EINVAL;
4343
4344         if (enabled)
4345                 tr->trace_flags |= mask;
4346         else
4347                 tr->trace_flags &= ~mask;
4348
4349         if (mask == TRACE_ITER_RECORD_CMD)
4350                 trace_event_enable_cmd_record(enabled);
4351
4352         if (mask == TRACE_ITER_RECORD_TGID) {
4353                 if (!tgid_map)
4354                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4355                                            GFP_KERNEL);
4356                 if (!tgid_map) {
4357                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4358                         return -ENOMEM;
4359                 }
4360
4361                 trace_event_enable_tgid_record(enabled);
4362         }
4363
4364         if (mask == TRACE_ITER_EVENT_FORK)
4365                 trace_event_follow_fork(tr, enabled);
4366
4367         if (mask == TRACE_ITER_FUNC_FORK)
4368                 ftrace_pid_follow_fork(tr, enabled);
4369
4370         if (mask == TRACE_ITER_OVERWRITE) {
4371                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4372 #ifdef CONFIG_TRACER_MAX_TRACE
4373                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4374 #endif
4375         }
4376
4377         if (mask == TRACE_ITER_PRINTK) {
4378                 trace_printk_start_stop_comm(enabled);
4379                 trace_printk_control(enabled);
4380         }
4381
4382         return 0;
4383 }
4384
4385 static int trace_set_options(struct trace_array *tr, char *option)
4386 {
4387         char *cmp;
4388         int neg = 0;
4389         int ret = -ENODEV;
4390         int i;
4391         size_t orig_len = strlen(option);
4392
4393         cmp = strstrip(option);
4394
4395         if (strncmp(cmp, "no", 2) == 0) {
4396                 neg = 1;
4397                 cmp += 2;
4398         }
4399
4400         mutex_lock(&trace_types_lock);
4401
4402         for (i = 0; trace_options[i]; i++) {
4403                 if (strcmp(cmp, trace_options[i]) == 0) {
4404                         ret = set_tracer_flag(tr, 1 << i, !neg);
4405                         break;
4406                 }
4407         }
4408
4409         /* If no option could be set, test the specific tracer options */
4410         if (!trace_options[i])
4411                 ret = set_tracer_option(tr, cmp, neg);
4412
4413         mutex_unlock(&trace_types_lock);
4414
4415         /*
4416          * If the first trailing whitespace is replaced with '\0' by strstrip,
4417          * turn it back into a space.
4418          */
4419         if (orig_len > strlen(option))
4420                 option[strlen(option)] = ' ';
4421
4422         return ret;
4423 }
4424
4425 static void __init apply_trace_boot_options(void)
4426 {
4427         char *buf = trace_boot_options_buf;
4428         char *option;
4429
4430         while (true) {
4431                 option = strsep(&buf, ",");
4432
4433                 if (!option)
4434                         break;
4435
4436                 if (*option)
4437                         trace_set_options(&global_trace, option);
4438
4439                 /* Put back the comma to allow this to be called again */
4440                 if (buf)
4441                         *(buf - 1) = ',';
4442         }
4443 }
4444
4445 static ssize_t
4446 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4447                         size_t cnt, loff_t *ppos)
4448 {
4449         struct seq_file *m = filp->private_data;
4450         struct trace_array *tr = m->private;
4451         char buf[64];
4452         int ret;
4453
4454         if (cnt >= sizeof(buf))
4455                 return -EINVAL;
4456
4457         if (copy_from_user(buf, ubuf, cnt))
4458                 return -EFAULT;
4459
4460         buf[cnt] = 0;
4461
4462         ret = trace_set_options(tr, buf);
4463         if (ret < 0)
4464                 return ret;
4465
4466         *ppos += cnt;
4467
4468         return cnt;
4469 }
4470
4471 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4472 {
4473         struct trace_array *tr = inode->i_private;
4474         int ret;
4475
4476         if (tracing_disabled)
4477                 return -ENODEV;
4478
4479         if (trace_array_get(tr) < 0)
4480                 return -ENODEV;
4481
4482         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4483         if (ret < 0)
4484                 trace_array_put(tr);
4485
4486         return ret;
4487 }
4488
4489 static const struct file_operations tracing_iter_fops = {
4490         .open           = tracing_trace_options_open,
4491         .read           = seq_read,
4492         .llseek         = seq_lseek,
4493         .release        = tracing_single_release_tr,
4494         .write          = tracing_trace_options_write,
4495 };
4496
4497 static const char readme_msg[] =
4498         "tracing mini-HOWTO:\n\n"
4499         "# echo 0 > tracing_on : quick way to disable tracing\n"
4500         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4501         " Important files:\n"
4502         "  trace\t\t\t- The static contents of the buffer\n"
4503         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4504         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4505         "  current_tracer\t- function and latency tracers\n"
4506         "  available_tracers\t- list of configured tracers for current_tracer\n"
4507         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4508         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4509         "  trace_clock\t\t-change the clock used to order events\n"
4510         "       local:   Per cpu clock but may not be synced across CPUs\n"
4511         "      global:   Synced across CPUs but slows tracing down.\n"
4512         "     counter:   Not a clock, but just an increment\n"
4513         "      uptime:   Jiffy counter from time of boot\n"
4514         "        perf:   Same clock that perf events use\n"
4515 #ifdef CONFIG_X86_64
4516         "     x86-tsc:   TSC cycle counter\n"
4517 #endif
4518         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4519         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4520         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4521         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4522         "\t\t\t  Remove sub-buffer with rmdir\n"
4523         "  trace_options\t\t- Set format or modify how tracing happens\n"
4524         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4525         "\t\t\t  option name\n"
4526         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4527 #ifdef CONFIG_DYNAMIC_FTRACE
4528         "\n  available_filter_functions - list of functions that can be filtered on\n"
4529         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4530         "\t\t\t  functions\n"
4531         "\t     accepts: func_full_name or glob-matching-pattern\n"
4532         "\t     modules: Can select a group via module\n"
4533         "\t      Format: :mod:<module-name>\n"
4534         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4535         "\t    triggers: a command to perform when function is hit\n"
4536         "\t      Format: <function>:<trigger>[:count]\n"
4537         "\t     trigger: traceon, traceoff\n"
4538         "\t\t      enable_event:<system>:<event>\n"
4539         "\t\t      disable_event:<system>:<event>\n"
4540 #ifdef CONFIG_STACKTRACE
4541         "\t\t      stacktrace\n"
4542 #endif
4543 #ifdef CONFIG_TRACER_SNAPSHOT
4544         "\t\t      snapshot\n"
4545 #endif
4546         "\t\t      dump\n"
4547         "\t\t      cpudump\n"
4548         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4549         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4550         "\t     The first one will disable tracing every time do_fault is hit\n"
4551         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4552         "\t       The first time do trap is hit and it disables tracing, the\n"
4553         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4554         "\t       the counter will not decrement. It only decrements when the\n"
4555         "\t       trigger did work\n"
4556         "\t     To remove trigger without count:\n"
4557         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4558         "\t     To remove trigger with a count:\n"
4559         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4560         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4561         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4562         "\t    modules: Can select a group via module command :mod:\n"
4563         "\t    Does not accept triggers\n"
4564 #endif /* CONFIG_DYNAMIC_FTRACE */
4565 #ifdef CONFIG_FUNCTION_TRACER
4566         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4567         "\t\t    (function)\n"
4568 #endif
4569 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4570         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4571         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4572         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4573 #endif
4574 #ifdef CONFIG_TRACER_SNAPSHOT
4575         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4576         "\t\t\t  snapshot buffer. Read the contents for more\n"
4577         "\t\t\t  information\n"
4578 #endif
4579 #ifdef CONFIG_STACK_TRACER
4580         "  stack_trace\t\t- Shows the max stack trace when active\n"
4581         "  stack_max_size\t- Shows current max stack size that was traced\n"
4582         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4583         "\t\t\t  new trace)\n"
4584 #ifdef CONFIG_DYNAMIC_FTRACE
4585         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4586         "\t\t\t  traces\n"
4587 #endif
4588 #endif /* CONFIG_STACK_TRACER */
4589 #ifdef CONFIG_KPROBE_EVENTS
4590         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4591         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4592 #endif
4593 #ifdef CONFIG_UPROBE_EVENTS
4594         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4595         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4596 #endif
4597 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4598         "\t  accepts: event-definitions (one definition per line)\n"
4599         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4600         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4601         "\t           -:[<group>/]<event>\n"
4602 #ifdef CONFIG_KPROBE_EVENTS
4603         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4604   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4605 #endif
4606 #ifdef CONFIG_UPROBE_EVENTS
4607         "\t    place: <path>:<offset>\n"
4608 #endif
4609         "\t     args: <name>=fetcharg[:type]\n"
4610         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4611         "\t           $stack<index>, $stack, $retval, $comm\n"
4612         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4613         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4614 #endif
4615         "  events/\t\t- Directory containing all trace event subsystems:\n"
4616         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4617         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4618         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4619         "\t\t\t  events\n"
4620         "      filter\t\t- If set, only events passing filter are traced\n"
4621         "  events/<system>/<event>/\t- Directory containing control files for\n"
4622         "\t\t\t  <event>:\n"
4623         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4624         "      filter\t\t- If set, only events passing filter are traced\n"
4625         "      trigger\t\t- If set, a command to perform when event is hit\n"
4626         "\t    Format: <trigger>[:count][if <filter>]\n"
4627         "\t   trigger: traceon, traceoff\n"
4628         "\t            enable_event:<system>:<event>\n"
4629         "\t            disable_event:<system>:<event>\n"
4630 #ifdef CONFIG_HIST_TRIGGERS
4631         "\t            enable_hist:<system>:<event>\n"
4632         "\t            disable_hist:<system>:<event>\n"
4633 #endif
4634 #ifdef CONFIG_STACKTRACE
4635         "\t\t    stacktrace\n"
4636 #endif
4637 #ifdef CONFIG_TRACER_SNAPSHOT
4638         "\t\t    snapshot\n"
4639 #endif
4640 #ifdef CONFIG_HIST_TRIGGERS
4641         "\t\t    hist (see below)\n"
4642 #endif
4643         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4644         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4645         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4646         "\t                  events/block/block_unplug/trigger\n"
4647         "\t   The first disables tracing every time block_unplug is hit.\n"
4648         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4649         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4650         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4651         "\t   Like function triggers, the counter is only decremented if it\n"
4652         "\t    enabled or disabled tracing.\n"
4653         "\t   To remove a trigger without a count:\n"
4654         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4655         "\t   To remove a trigger with a count:\n"
4656         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4657         "\t   Filters can be ignored when removing a trigger.\n"
4658 #ifdef CONFIG_HIST_TRIGGERS
4659         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4660         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4661         "\t            [:values=<field1[,field2,...]>]\n"
4662         "\t            [:sort=<field1[,field2,...]>]\n"
4663         "\t            [:size=#entries]\n"
4664         "\t            [:pause][:continue][:clear]\n"
4665         "\t            [:name=histname1]\n"
4666         "\t            [if <filter>]\n\n"
4667         "\t    When a matching event is hit, an entry is added to a hash\n"
4668         "\t    table using the key(s) and value(s) named, and the value of a\n"
4669         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4670         "\t    correspond to fields in the event's format description.  Keys\n"
4671         "\t    can be any field, or the special string 'stacktrace'.\n"
4672         "\t    Compound keys consisting of up to two fields can be specified\n"
4673         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4674         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4675         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4676         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4677         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4678         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4679         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4680         "\t    its histogram data will be shared with other triggers of the\n"
4681         "\t    same name, and trigger hits will update this common data.\n\n"
4682         "\t    Reading the 'hist' file for the event will dump the hash\n"
4683         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4684         "\t    triggers attached to an event, there will be a table for each\n"
4685         "\t    trigger in the output.  The table displayed for a named\n"
4686         "\t    trigger will be the same as any other instance having the\n"
4687         "\t    same name.  The default format used to display a given field\n"
4688         "\t    can be modified by appending any of the following modifiers\n"
4689         "\t    to the field name, as applicable:\n\n"
4690         "\t            .hex        display a number as a hex value\n"
4691         "\t            .sym        display an address as a symbol\n"
4692         "\t            .sym-offset display an address as a symbol and offset\n"
4693         "\t            .execname   display a common_pid as a program name\n"
4694         "\t            .syscall    display a syscall id as a syscall name\n\n"
4695         "\t            .log2       display log2 value rather than raw number\n\n"
4696         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4697         "\t    trigger or to start a hist trigger but not log any events\n"
4698         "\t    until told to do so.  'continue' can be used to start or\n"
4699         "\t    restart a paused hist trigger.\n\n"
4700         "\t    The 'clear' parameter will clear the contents of a running\n"
4701         "\t    hist trigger and leave its current paused/active state\n"
4702         "\t    unchanged.\n\n"
4703         "\t    The enable_hist and disable_hist triggers can be used to\n"
4704         "\t    have one event conditionally start and stop another event's\n"
4705         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4706         "\t    the enable_event and disable_event triggers.\n"
4707 #endif
4708 ;
4709
4710 static ssize_t
4711 tracing_readme_read(struct file *filp, char __user *ubuf,
4712                        size_t cnt, loff_t *ppos)
4713 {
4714         return simple_read_from_buffer(ubuf, cnt, ppos,
4715                                         readme_msg, strlen(readme_msg));
4716 }
4717
4718 static const struct file_operations tracing_readme_fops = {
4719         .open           = tracing_open_generic,
4720         .read           = tracing_readme_read,
4721         .llseek         = generic_file_llseek,
4722 };
4723
4724 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4725 {
4726         int *ptr = v;
4727
4728         if (*pos || m->count)
4729                 ptr++;
4730
4731         (*pos)++;
4732
4733         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4734                 if (trace_find_tgid(*ptr))
4735                         return ptr;
4736         }
4737
4738         return NULL;
4739 }
4740
4741 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4742 {
4743         void *v;
4744         loff_t l = 0;
4745
4746         if (!tgid_map)
4747                 return NULL;
4748
4749         v = &tgid_map[0];
4750         while (l <= *pos) {
4751                 v = saved_tgids_next(m, v, &l);
4752                 if (!v)
4753                         return NULL;
4754         }
4755
4756         return v;
4757 }
4758
4759 static void saved_tgids_stop(struct seq_file *m, void *v)
4760 {
4761 }
4762
4763 static int saved_tgids_show(struct seq_file *m, void *v)
4764 {
4765         int pid = (int *)v - tgid_map;
4766
4767         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4768         return 0;
4769 }
4770
4771 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4772         .start          = saved_tgids_start,
4773         .stop           = saved_tgids_stop,
4774         .next           = saved_tgids_next,
4775         .show           = saved_tgids_show,
4776 };
4777
4778 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4779 {
4780         if (tracing_disabled)
4781                 return -ENODEV;
4782
4783         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4784 }
4785
4786
4787 static const struct file_operations tracing_saved_tgids_fops = {
4788         .open           = tracing_saved_tgids_open,
4789         .read           = seq_read,
4790         .llseek         = seq_lseek,
4791         .release        = seq_release,
4792 };
4793
4794 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4795 {
4796         unsigned int *ptr = v;
4797
4798         if (*pos || m->count)
4799                 ptr++;
4800
4801         (*pos)++;
4802
4803         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4804              ptr++) {
4805                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4806                         continue;
4807
4808                 return ptr;
4809         }
4810
4811         return NULL;
4812 }
4813
4814 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4815 {
4816         void *v;
4817         loff_t l = 0;
4818
4819         preempt_disable();
4820         arch_spin_lock(&trace_cmdline_lock);
4821
4822         v = &savedcmd->map_cmdline_to_pid[0];
4823         while (l <= *pos) {
4824                 v = saved_cmdlines_next(m, v, &l);
4825                 if (!v)
4826                         return NULL;
4827         }
4828
4829         return v;
4830 }
4831
4832 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4833 {
4834         arch_spin_unlock(&trace_cmdline_lock);
4835         preempt_enable();
4836 }
4837
4838 static int saved_cmdlines_show(struct seq_file *m, void *v)
4839 {
4840         char buf[TASK_COMM_LEN];
4841         unsigned int *pid = v;
4842
4843         __trace_find_cmdline(*pid, buf);
4844         seq_printf(m, "%d %s\n", *pid, buf);
4845         return 0;
4846 }
4847
4848 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4849         .start          = saved_cmdlines_start,
4850         .next           = saved_cmdlines_next,
4851         .stop           = saved_cmdlines_stop,
4852         .show           = saved_cmdlines_show,
4853 };
4854
4855 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4856 {
4857         if (tracing_disabled)
4858                 return -ENODEV;
4859
4860         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4861 }
4862
4863 static const struct file_operations tracing_saved_cmdlines_fops = {
4864         .open           = tracing_saved_cmdlines_open,
4865         .read           = seq_read,
4866         .llseek         = seq_lseek,
4867         .release        = seq_release,
4868 };
4869
4870 static ssize_t
4871 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4872                                  size_t cnt, loff_t *ppos)
4873 {
4874         char buf[64];
4875         int r;
4876
4877         arch_spin_lock(&trace_cmdline_lock);
4878         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4879         arch_spin_unlock(&trace_cmdline_lock);
4880
4881         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4882 }
4883
4884 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4885 {
4886         kfree(s->saved_cmdlines);
4887         kfree(s->map_cmdline_to_pid);
4888         kfree(s);
4889 }
4890
4891 static int tracing_resize_saved_cmdlines(unsigned int val)
4892 {
4893         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4894
4895         s = kmalloc(sizeof(*s), GFP_KERNEL);
4896         if (!s)
4897                 return -ENOMEM;
4898
4899         if (allocate_cmdlines_buffer(val, s) < 0) {
4900                 kfree(s);
4901                 return -ENOMEM;
4902         }
4903
4904         arch_spin_lock(&trace_cmdline_lock);
4905         savedcmd_temp = savedcmd;
4906         savedcmd = s;
4907         arch_spin_unlock(&trace_cmdline_lock);
4908         free_saved_cmdlines_buffer(savedcmd_temp);
4909
4910         return 0;
4911 }
4912
4913 static ssize_t
4914 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4915                                   size_t cnt, loff_t *ppos)
4916 {
4917         unsigned long val;
4918         int ret;
4919
4920         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4921         if (ret)
4922                 return ret;
4923
4924         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4925         if (!val || val > PID_MAX_DEFAULT)
4926                 return -EINVAL;
4927
4928         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4929         if (ret < 0)
4930                 return ret;
4931
4932         *ppos += cnt;
4933
4934         return cnt;
4935 }
4936
4937 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4938         .open           = tracing_open_generic,
4939         .read           = tracing_saved_cmdlines_size_read,
4940         .write          = tracing_saved_cmdlines_size_write,
4941 };
4942
4943 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4944 static union trace_eval_map_item *
4945 update_eval_map(union trace_eval_map_item *ptr)
4946 {
4947         if (!ptr->map.eval_string) {
4948                 if (ptr->tail.next) {
4949                         ptr = ptr->tail.next;
4950                         /* Set ptr to the next real item (skip head) */
4951                         ptr++;
4952                 } else
4953                         return NULL;
4954         }
4955         return ptr;
4956 }
4957
4958 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4959 {
4960         union trace_eval_map_item *ptr = v;
4961
4962         /*
4963          * Paranoid! If ptr points to end, we don't want to increment past it.
4964          * This really should never happen.
4965          */
4966         ptr = update_eval_map(ptr);
4967         if (WARN_ON_ONCE(!ptr))
4968                 return NULL;
4969
4970         ptr++;
4971
4972         (*pos)++;
4973
4974         ptr = update_eval_map(ptr);
4975
4976         return ptr;
4977 }
4978
4979 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4980 {
4981         union trace_eval_map_item *v;
4982         loff_t l = 0;
4983
4984         mutex_lock(&trace_eval_mutex);
4985
4986         v = trace_eval_maps;
4987         if (v)
4988                 v++;
4989
4990         while (v && l < *pos) {
4991                 v = eval_map_next(m, v, &l);
4992         }
4993
4994         return v;
4995 }
4996
4997 static void eval_map_stop(struct seq_file *m, void *v)
4998 {
4999         mutex_unlock(&trace_eval_mutex);
5000 }
5001
5002 static int eval_map_show(struct seq_file *m, void *v)
5003 {
5004         union trace_eval_map_item *ptr = v;
5005
5006         seq_printf(m, "%s %ld (%s)\n",
5007                    ptr->map.eval_string, ptr->map.eval_value,
5008                    ptr->map.system);
5009
5010         return 0;
5011 }
5012
5013 static const struct seq_operations tracing_eval_map_seq_ops = {
5014         .start          = eval_map_start,
5015         .next           = eval_map_next,
5016         .stop           = eval_map_stop,
5017         .show           = eval_map_show,
5018 };
5019
5020 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5021 {
5022         if (tracing_disabled)
5023                 return -ENODEV;
5024
5025         return seq_open(filp, &tracing_eval_map_seq_ops);
5026 }
5027
5028 static const struct file_operations tracing_eval_map_fops = {
5029         .open           = tracing_eval_map_open,
5030         .read           = seq_read,
5031         .llseek         = seq_lseek,
5032         .release        = seq_release,
5033 };
5034
5035 static inline union trace_eval_map_item *
5036 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5037 {
5038         /* Return tail of array given the head */
5039         return ptr + ptr->head.length + 1;
5040 }
5041
5042 static void
5043 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5044                            int len)
5045 {
5046         struct trace_eval_map **stop;
5047         struct trace_eval_map **map;
5048         union trace_eval_map_item *map_array;
5049         union trace_eval_map_item *ptr;
5050
5051         stop = start + len;
5052
5053         /*
5054          * The trace_eval_maps contains the map plus a head and tail item,
5055          * where the head holds the module and length of array, and the
5056          * tail holds a pointer to the next list.
5057          */
5058         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5059         if (!map_array) {
5060                 pr_warn("Unable to allocate trace eval mapping\n");
5061                 return;
5062         }
5063
5064         mutex_lock(&trace_eval_mutex);
5065
5066         if (!trace_eval_maps)
5067                 trace_eval_maps = map_array;
5068         else {
5069                 ptr = trace_eval_maps;
5070                 for (;;) {
5071                         ptr = trace_eval_jmp_to_tail(ptr);
5072                         if (!ptr->tail.next)
5073                                 break;
5074                         ptr = ptr->tail.next;
5075
5076                 }
5077                 ptr->tail.next = map_array;
5078         }
5079         map_array->head.mod = mod;
5080         map_array->head.length = len;
5081         map_array++;
5082
5083         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5084                 map_array->map = **map;
5085                 map_array++;
5086         }
5087         memset(map_array, 0, sizeof(*map_array));
5088
5089         mutex_unlock(&trace_eval_mutex);
5090 }
5091
5092 static void trace_create_eval_file(struct dentry *d_tracer)
5093 {
5094         trace_create_file("eval_map", 0444, d_tracer,
5095                           NULL, &tracing_eval_map_fops);
5096 }
5097
5098 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5099 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5100 static inline void trace_insert_eval_map_file(struct module *mod,
5101                               struct trace_eval_map **start, int len) { }
5102 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5103
5104 static void trace_insert_eval_map(struct module *mod,
5105                                   struct trace_eval_map **start, int len)
5106 {
5107         struct trace_eval_map **map;
5108
5109         if (len <= 0)
5110                 return;
5111
5112         map = start;
5113
5114         trace_event_eval_update(map, len);
5115
5116         trace_insert_eval_map_file(mod, start, len);
5117 }
5118
5119 static ssize_t
5120 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5121                        size_t cnt, loff_t *ppos)
5122 {
5123         struct trace_array *tr = filp->private_data;
5124         char buf[MAX_TRACER_SIZE+2];
5125         int r;
5126
5127         mutex_lock(&trace_types_lock);
5128         r = sprintf(buf, "%s\n", tr->current_trace->name);
5129         mutex_unlock(&trace_types_lock);
5130
5131         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5132 }
5133
5134 int tracer_init(struct tracer *t, struct trace_array *tr)
5135 {
5136         tracing_reset_online_cpus(&tr->trace_buffer);
5137         return t->init(tr);
5138 }
5139
5140 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5141 {
5142         int cpu;
5143
5144         for_each_tracing_cpu(cpu)
5145                 per_cpu_ptr(buf->data, cpu)->entries = val;
5146 }
5147
5148 #ifdef CONFIG_TRACER_MAX_TRACE
5149 /* resize @tr's buffer to the size of @size_tr's entries */
5150 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5151                                         struct trace_buffer *size_buf, int cpu_id)
5152 {
5153         int cpu, ret = 0;
5154
5155         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5156                 for_each_tracing_cpu(cpu) {
5157                         ret = ring_buffer_resize(trace_buf->buffer,
5158                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5159                         if (ret < 0)
5160                                 break;
5161                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5162                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5163                 }
5164         } else {
5165                 ret = ring_buffer_resize(trace_buf->buffer,
5166                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5167                 if (ret == 0)
5168                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5169                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5170         }
5171
5172         return ret;
5173 }
5174 #endif /* CONFIG_TRACER_MAX_TRACE */
5175
5176 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5177                                         unsigned long size, int cpu)
5178 {
5179         int ret;
5180
5181         /*
5182          * If kernel or user changes the size of the ring buffer
5183          * we use the size that was given, and we can forget about
5184          * expanding it later.
5185          */
5186         ring_buffer_expanded = true;
5187
5188         /* May be called before buffers are initialized */
5189         if (!tr->trace_buffer.buffer)
5190                 return 0;
5191
5192         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5193         if (ret < 0)
5194                 return ret;
5195
5196 #ifdef CONFIG_TRACER_MAX_TRACE
5197         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5198             !tr->current_trace->use_max_tr)
5199                 goto out;
5200
5201         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5202         if (ret < 0) {
5203                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5204                                                      &tr->trace_buffer, cpu);
5205                 if (r < 0) {
5206                         /*
5207                          * AARGH! We are left with different
5208                          * size max buffer!!!!
5209                          * The max buffer is our "snapshot" buffer.
5210                          * When a tracer needs a snapshot (one of the
5211                          * latency tracers), it swaps the max buffer
5212                          * with the saved snap shot. We succeeded to
5213                          * update the size of the main buffer, but failed to
5214                          * update the size of the max buffer. But when we tried
5215                          * to reset the main buffer to the original size, we
5216                          * failed there too. This is very unlikely to
5217                          * happen, but if it does, warn and kill all
5218                          * tracing.
5219                          */
5220                         WARN_ON(1);
5221                         tracing_disabled = 1;
5222                 }
5223                 return ret;
5224         }
5225
5226         if (cpu == RING_BUFFER_ALL_CPUS)
5227                 set_buffer_entries(&tr->max_buffer, size);
5228         else
5229                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5230
5231  out:
5232 #endif /* CONFIG_TRACER_MAX_TRACE */
5233
5234         if (cpu == RING_BUFFER_ALL_CPUS)
5235                 set_buffer_entries(&tr->trace_buffer, size);
5236         else
5237                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5238
5239         return ret;
5240 }
5241
5242 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5243                                           unsigned long size, int cpu_id)
5244 {
5245         int ret = size;
5246
5247         mutex_lock(&trace_types_lock);
5248
5249         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5250                 /* make sure, this cpu is enabled in the mask */
5251                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5252                         ret = -EINVAL;
5253                         goto out;
5254                 }
5255         }
5256
5257         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5258         if (ret < 0)
5259                 ret = -ENOMEM;
5260
5261 out:
5262         mutex_unlock(&trace_types_lock);
5263
5264         return ret;
5265 }
5266
5267
5268 /**
5269  * tracing_update_buffers - used by tracing facility to expand ring buffers
5270  *
5271  * To save on memory when the tracing is never used on a system with it
5272  * configured in. The ring buffers are set to a minimum size. But once
5273  * a user starts to use the tracing facility, then they need to grow
5274  * to their default size.
5275  *
5276  * This function is to be called when a tracer is about to be used.
5277  */
5278 int tracing_update_buffers(void)
5279 {
5280         int ret = 0;
5281
5282         mutex_lock(&trace_types_lock);
5283         if (!ring_buffer_expanded)
5284                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5285                                                 RING_BUFFER_ALL_CPUS);
5286         mutex_unlock(&trace_types_lock);
5287
5288         return ret;
5289 }
5290
5291 struct trace_option_dentry;
5292
5293 static void
5294 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5295
5296 /*
5297  * Used to clear out the tracer before deletion of an instance.
5298  * Must have trace_types_lock held.
5299  */
5300 static void tracing_set_nop(struct trace_array *tr)
5301 {
5302         if (tr->current_trace == &nop_trace)
5303                 return;
5304         
5305         tr->current_trace->enabled--;
5306
5307         if (tr->current_trace->reset)
5308                 tr->current_trace->reset(tr);
5309
5310         tr->current_trace = &nop_trace;
5311 }
5312
5313 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5314 {
5315         /* Only enable if the directory has been created already. */
5316         if (!tr->dir)
5317                 return;
5318
5319         create_trace_option_files(tr, t);
5320 }
5321
5322 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5323 {
5324         struct tracer *t;
5325 #ifdef CONFIG_TRACER_MAX_TRACE
5326         bool had_max_tr;
5327 #endif
5328         int ret = 0;
5329
5330         mutex_lock(&trace_types_lock);
5331
5332         if (!ring_buffer_expanded) {
5333                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5334                                                 RING_BUFFER_ALL_CPUS);
5335                 if (ret < 0)
5336                         goto out;
5337                 ret = 0;
5338         }
5339
5340         for (t = trace_types; t; t = t->next) {
5341                 if (strcmp(t->name, buf) == 0)
5342                         break;
5343         }
5344         if (!t) {
5345                 ret = -EINVAL;
5346                 goto out;
5347         }
5348         if (t == tr->current_trace)
5349                 goto out;
5350
5351         /* Some tracers won't work on kernel command line */
5352         if (system_state < SYSTEM_RUNNING && t->noboot) {
5353                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5354                         t->name);
5355                 goto out;
5356         }
5357
5358         /* Some tracers are only allowed for the top level buffer */
5359         if (!trace_ok_for_array(t, tr)) {
5360                 ret = -EINVAL;
5361                 goto out;
5362         }
5363
5364         /* If trace pipe files are being read, we can't change the tracer */
5365         if (tr->current_trace->ref) {
5366                 ret = -EBUSY;
5367                 goto out;
5368         }
5369
5370         trace_branch_disable();
5371
5372         tr->current_trace->enabled--;
5373
5374         if (tr->current_trace->reset)
5375                 tr->current_trace->reset(tr);
5376
5377         /* Current trace needs to be nop_trace before synchronize_sched */
5378         tr->current_trace = &nop_trace;
5379
5380 #ifdef CONFIG_TRACER_MAX_TRACE
5381         had_max_tr = tr->allocated_snapshot;
5382
5383         if (had_max_tr && !t->use_max_tr) {
5384                 /*
5385                  * We need to make sure that the update_max_tr sees that
5386                  * current_trace changed to nop_trace to keep it from
5387                  * swapping the buffers after we resize it.
5388                  * The update_max_tr is called from interrupts disabled
5389                  * so a synchronized_sched() is sufficient.
5390                  */
5391                 synchronize_sched();
5392                 free_snapshot(tr);
5393         }
5394 #endif
5395
5396 #ifdef CONFIG_TRACER_MAX_TRACE
5397         if (t->use_max_tr && !had_max_tr) {
5398                 ret = alloc_snapshot(tr);
5399                 if (ret < 0)
5400                         goto out;
5401         }
5402 #endif
5403
5404         if (t->init) {
5405                 ret = tracer_init(t, tr);
5406                 if (ret)
5407                         goto out;
5408         }
5409
5410         tr->current_trace = t;
5411         tr->current_trace->enabled++;
5412         trace_branch_enable(tr);
5413  out:
5414         mutex_unlock(&trace_types_lock);
5415
5416         return ret;
5417 }
5418
5419 static ssize_t
5420 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5421                         size_t cnt, loff_t *ppos)
5422 {
5423         struct trace_array *tr = filp->private_data;
5424         char buf[MAX_TRACER_SIZE+1];
5425         int i;
5426         size_t ret;
5427         int err;
5428
5429         ret = cnt;
5430
5431         if (cnt > MAX_TRACER_SIZE)
5432                 cnt = MAX_TRACER_SIZE;
5433
5434         if (copy_from_user(buf, ubuf, cnt))
5435                 return -EFAULT;
5436
5437         buf[cnt] = 0;
5438
5439         /* strip ending whitespace. */
5440         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5441                 buf[i] = 0;
5442
5443         err = tracing_set_tracer(tr, buf);
5444         if (err)
5445                 return err;
5446
5447         *ppos += ret;
5448
5449         return ret;
5450 }
5451
5452 static ssize_t
5453 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5454                    size_t cnt, loff_t *ppos)
5455 {
5456         char buf[64];
5457         int r;
5458
5459         r = snprintf(buf, sizeof(buf), "%ld\n",
5460                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5461         if (r > sizeof(buf))
5462                 r = sizeof(buf);
5463         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5464 }
5465
5466 static ssize_t
5467 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5468                     size_t cnt, loff_t *ppos)
5469 {
5470         unsigned long val;
5471         int ret;
5472
5473         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5474         if (ret)
5475                 return ret;
5476
5477         *ptr = val * 1000;
5478
5479         return cnt;
5480 }
5481
5482 static ssize_t
5483 tracing_thresh_read(struct file *filp, char __user *ubuf,
5484                     size_t cnt, loff_t *ppos)
5485 {
5486         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5487 }
5488
5489 static ssize_t
5490 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5491                      size_t cnt, loff_t *ppos)
5492 {
5493         struct trace_array *tr = filp->private_data;
5494         int ret;
5495
5496         mutex_lock(&trace_types_lock);
5497         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5498         if (ret < 0)
5499                 goto out;
5500
5501         if (tr->current_trace->update_thresh) {
5502                 ret = tr->current_trace->update_thresh(tr);
5503                 if (ret < 0)
5504                         goto out;
5505         }
5506
5507         ret = cnt;
5508 out:
5509         mutex_unlock(&trace_types_lock);
5510
5511         return ret;
5512 }
5513
5514 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5515
5516 static ssize_t
5517 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5518                      size_t cnt, loff_t *ppos)
5519 {
5520         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5521 }
5522
5523 static ssize_t
5524 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5525                       size_t cnt, loff_t *ppos)
5526 {
5527         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5528 }
5529
5530 #endif
5531
5532 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5533 {
5534         struct trace_array *tr = inode->i_private;
5535         struct trace_iterator *iter;
5536         int ret = 0;
5537
5538         if (tracing_disabled)
5539                 return -ENODEV;
5540
5541         if (trace_array_get(tr) < 0)
5542                 return -ENODEV;
5543
5544         mutex_lock(&trace_types_lock);
5545
5546         /* create a buffer to store the information to pass to userspace */
5547         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5548         if (!iter) {
5549                 ret = -ENOMEM;
5550                 __trace_array_put(tr);
5551                 goto out;
5552         }
5553
5554         trace_seq_init(&iter->seq);
5555         iter->trace = tr->current_trace;
5556
5557         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5558                 ret = -ENOMEM;
5559                 goto fail;
5560         }
5561
5562         /* trace pipe does not show start of buffer */
5563         cpumask_setall(iter->started);
5564
5565         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5566                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5567
5568         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5569         if (trace_clocks[tr->clock_id].in_ns)
5570                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5571
5572         iter->tr = tr;
5573         iter->trace_buffer = &tr->trace_buffer;
5574         iter->cpu_file = tracing_get_cpu(inode);
5575         mutex_init(&iter->mutex);
5576         filp->private_data = iter;
5577
5578         if (iter->trace->pipe_open)
5579                 iter->trace->pipe_open(iter);
5580
5581         nonseekable_open(inode, filp);
5582
5583         tr->current_trace->ref++;
5584 out:
5585         mutex_unlock(&trace_types_lock);
5586         return ret;
5587
5588 fail:
5589         kfree(iter->trace);
5590         kfree(iter);
5591         __trace_array_put(tr);
5592         mutex_unlock(&trace_types_lock);
5593         return ret;
5594 }
5595
5596 static int tracing_release_pipe(struct inode *inode, struct file *file)
5597 {
5598         struct trace_iterator *iter = file->private_data;
5599         struct trace_array *tr = inode->i_private;
5600
5601         mutex_lock(&trace_types_lock);
5602
5603         tr->current_trace->ref--;
5604
5605         if (iter->trace->pipe_close)
5606                 iter->trace->pipe_close(iter);
5607
5608         mutex_unlock(&trace_types_lock);
5609
5610         free_cpumask_var(iter->started);
5611         mutex_destroy(&iter->mutex);
5612         kfree(iter);
5613
5614         trace_array_put(tr);
5615
5616         return 0;
5617 }
5618
5619 static __poll_t
5620 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5621 {
5622         struct trace_array *tr = iter->tr;
5623
5624         /* Iterators are static, they should be filled or empty */
5625         if (trace_buffer_iter(iter, iter->cpu_file))
5626                 return EPOLLIN | EPOLLRDNORM;
5627
5628         if (tr->trace_flags & TRACE_ITER_BLOCK)
5629                 /*
5630                  * Always select as readable when in blocking mode
5631                  */
5632                 return EPOLLIN | EPOLLRDNORM;
5633         else
5634                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5635                                              filp, poll_table);
5636 }
5637
5638 static __poll_t
5639 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5640 {
5641         struct trace_iterator *iter = filp->private_data;
5642
5643         return trace_poll(iter, filp, poll_table);
5644 }
5645
5646 /* Must be called with iter->mutex held. */
5647 static int tracing_wait_pipe(struct file *filp)
5648 {
5649         struct trace_iterator *iter = filp->private_data;
5650         int ret;
5651
5652         while (trace_empty(iter)) {
5653
5654                 if ((filp->f_flags & O_NONBLOCK)) {
5655                         return -EAGAIN;
5656                 }
5657
5658                 /*
5659                  * We block until we read something and tracing is disabled.
5660                  * We still block if tracing is disabled, but we have never
5661                  * read anything. This allows a user to cat this file, and
5662                  * then enable tracing. But after we have read something,
5663                  * we give an EOF when tracing is again disabled.
5664                  *
5665                  * iter->pos will be 0 if we haven't read anything.
5666                  */
5667                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5668                         break;
5669
5670                 mutex_unlock(&iter->mutex);
5671
5672                 ret = wait_on_pipe(iter, false);
5673
5674                 mutex_lock(&iter->mutex);
5675
5676                 if (ret)
5677                         return ret;
5678         }
5679
5680         return 1;
5681 }
5682
5683 /*
5684  * Consumer reader.
5685  */
5686 static ssize_t
5687 tracing_read_pipe(struct file *filp, char __user *ubuf,
5688                   size_t cnt, loff_t *ppos)
5689 {
5690         struct trace_iterator *iter = filp->private_data;
5691         ssize_t sret;
5692
5693         /*
5694          * Avoid more than one consumer on a single file descriptor
5695          * This is just a matter of traces coherency, the ring buffer itself
5696          * is protected.
5697          */
5698         mutex_lock(&iter->mutex);
5699
5700         /* return any leftover data */
5701         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5702         if (sret != -EBUSY)
5703                 goto out;
5704
5705         trace_seq_init(&iter->seq);
5706
5707         if (iter->trace->read) {
5708                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5709                 if (sret)
5710                         goto out;
5711         }
5712
5713 waitagain:
5714         sret = tracing_wait_pipe(filp);
5715         if (sret <= 0)
5716                 goto out;
5717
5718         /* stop when tracing is finished */
5719         if (trace_empty(iter)) {
5720                 sret = 0;
5721                 goto out;
5722         }
5723
5724         if (cnt >= PAGE_SIZE)
5725                 cnt = PAGE_SIZE - 1;
5726
5727         /* reset all but tr, trace, and overruns */
5728         memset(&iter->seq, 0,
5729                sizeof(struct trace_iterator) -
5730                offsetof(struct trace_iterator, seq));
5731         cpumask_clear(iter->started);
5732         iter->pos = -1;
5733
5734         trace_event_read_lock();
5735         trace_access_lock(iter->cpu_file);
5736         while (trace_find_next_entry_inc(iter) != NULL) {
5737                 enum print_line_t ret;
5738                 int save_len = iter->seq.seq.len;
5739
5740                 ret = print_trace_line(iter);
5741                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5742                         /* don't print partial lines */
5743                         iter->seq.seq.len = save_len;
5744                         break;
5745                 }
5746                 if (ret != TRACE_TYPE_NO_CONSUME)
5747                         trace_consume(iter);
5748
5749                 if (trace_seq_used(&iter->seq) >= cnt)
5750                         break;
5751
5752                 /*
5753                  * Setting the full flag means we reached the trace_seq buffer
5754                  * size and we should leave by partial output condition above.
5755                  * One of the trace_seq_* functions is not used properly.
5756                  */
5757                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5758                           iter->ent->type);
5759         }
5760         trace_access_unlock(iter->cpu_file);
5761         trace_event_read_unlock();
5762
5763         /* Now copy what we have to the user */
5764         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5765         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5766                 trace_seq_init(&iter->seq);
5767
5768         /*
5769          * If there was nothing to send to user, in spite of consuming trace
5770          * entries, go back to wait for more entries.
5771          */
5772         if (sret == -EBUSY)
5773                 goto waitagain;
5774
5775 out:
5776         mutex_unlock(&iter->mutex);
5777
5778         return sret;
5779 }
5780
5781 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5782                                      unsigned int idx)
5783 {
5784         __free_page(spd->pages[idx]);
5785 }
5786
5787 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5788         .can_merge              = 0,
5789         .confirm                = generic_pipe_buf_confirm,
5790         .release                = generic_pipe_buf_release,
5791         .steal                  = generic_pipe_buf_steal,
5792         .get                    = generic_pipe_buf_get,
5793 };
5794
5795 static size_t
5796 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5797 {
5798         size_t count;
5799         int save_len;
5800         int ret;
5801
5802         /* Seq buffer is page-sized, exactly what we need. */
5803         for (;;) {
5804                 save_len = iter->seq.seq.len;
5805                 ret = print_trace_line(iter);
5806
5807                 if (trace_seq_has_overflowed(&iter->seq)) {
5808                         iter->seq.seq.len = save_len;
5809                         break;
5810                 }
5811
5812                 /*
5813                  * This should not be hit, because it should only
5814                  * be set if the iter->seq overflowed. But check it
5815                  * anyway to be safe.
5816                  */
5817                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5818                         iter->seq.seq.len = save_len;
5819                         break;
5820                 }
5821
5822                 count = trace_seq_used(&iter->seq) - save_len;
5823                 if (rem < count) {
5824                         rem = 0;
5825                         iter->seq.seq.len = save_len;
5826                         break;
5827                 }
5828
5829                 if (ret != TRACE_TYPE_NO_CONSUME)
5830                         trace_consume(iter);
5831                 rem -= count;
5832                 if (!trace_find_next_entry_inc(iter))   {
5833                         rem = 0;
5834                         iter->ent = NULL;
5835                         break;
5836                 }
5837         }
5838
5839         return rem;
5840 }
5841
5842 static ssize_t tracing_splice_read_pipe(struct file *filp,
5843                                         loff_t *ppos,
5844                                         struct pipe_inode_info *pipe,
5845                                         size_t len,
5846                                         unsigned int flags)
5847 {
5848         struct page *pages_def[PIPE_DEF_BUFFERS];
5849         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5850         struct trace_iterator *iter = filp->private_data;
5851         struct splice_pipe_desc spd = {
5852                 .pages          = pages_def,
5853                 .partial        = partial_def,
5854                 .nr_pages       = 0, /* This gets updated below. */
5855                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5856                 .ops            = &tracing_pipe_buf_ops,
5857                 .spd_release    = tracing_spd_release_pipe,
5858         };
5859         ssize_t ret;
5860         size_t rem;
5861         unsigned int i;
5862
5863         if (splice_grow_spd(pipe, &spd))
5864                 return -ENOMEM;
5865
5866         mutex_lock(&iter->mutex);
5867
5868         if (iter->trace->splice_read) {
5869                 ret = iter->trace->splice_read(iter, filp,
5870                                                ppos, pipe, len, flags);
5871                 if (ret)
5872                         goto out_err;
5873         }
5874
5875         ret = tracing_wait_pipe(filp);
5876         if (ret <= 0)
5877                 goto out_err;
5878
5879         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5880                 ret = -EFAULT;
5881                 goto out_err;
5882         }
5883
5884         trace_event_read_lock();
5885         trace_access_lock(iter->cpu_file);
5886
5887         /* Fill as many pages as possible. */
5888         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5889                 spd.pages[i] = alloc_page(GFP_KERNEL);
5890                 if (!spd.pages[i])
5891                         break;
5892
5893                 rem = tracing_fill_pipe_page(rem, iter);
5894
5895                 /* Copy the data into the page, so we can start over. */
5896                 ret = trace_seq_to_buffer(&iter->seq,
5897                                           page_address(spd.pages[i]),
5898                                           trace_seq_used(&iter->seq));
5899                 if (ret < 0) {
5900                         __free_page(spd.pages[i]);
5901                         break;
5902                 }
5903                 spd.partial[i].offset = 0;
5904                 spd.partial[i].len = trace_seq_used(&iter->seq);
5905
5906                 trace_seq_init(&iter->seq);
5907         }
5908
5909         trace_access_unlock(iter->cpu_file);
5910         trace_event_read_unlock();
5911         mutex_unlock(&iter->mutex);
5912
5913         spd.nr_pages = i;
5914
5915         if (i)
5916                 ret = splice_to_pipe(pipe, &spd);
5917         else
5918                 ret = 0;
5919 out:
5920         splice_shrink_spd(&spd);
5921         return ret;
5922
5923 out_err:
5924         mutex_unlock(&iter->mutex);
5925         goto out;
5926 }
5927
5928 static ssize_t
5929 tracing_entries_read(struct file *filp, char __user *ubuf,
5930                      size_t cnt, loff_t *ppos)
5931 {
5932         struct inode *inode = file_inode(filp);
5933         struct trace_array *tr = inode->i_private;
5934         int cpu = tracing_get_cpu(inode);
5935         char buf[64];
5936         int r = 0;
5937         ssize_t ret;
5938
5939         mutex_lock(&trace_types_lock);
5940
5941         if (cpu == RING_BUFFER_ALL_CPUS) {
5942                 int cpu, buf_size_same;
5943                 unsigned long size;
5944
5945                 size = 0;
5946                 buf_size_same = 1;
5947                 /* check if all cpu sizes are same */
5948                 for_each_tracing_cpu(cpu) {
5949                         /* fill in the size from first enabled cpu */
5950                         if (size == 0)
5951                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5952                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5953                                 buf_size_same = 0;
5954                                 break;
5955                         }
5956                 }
5957
5958                 if (buf_size_same) {
5959                         if (!ring_buffer_expanded)
5960                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5961                                             size >> 10,
5962                                             trace_buf_size >> 10);
5963                         else
5964                                 r = sprintf(buf, "%lu\n", size >> 10);
5965                 } else
5966                         r = sprintf(buf, "X\n");
5967         } else
5968                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5969
5970         mutex_unlock(&trace_types_lock);
5971
5972         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5973         return ret;
5974 }
5975
5976 static ssize_t
5977 tracing_entries_write(struct file *filp, const char __user *ubuf,
5978                       size_t cnt, loff_t *ppos)
5979 {
5980         struct inode *inode = file_inode(filp);
5981         struct trace_array *tr = inode->i_private;
5982         unsigned long val;
5983         int ret;
5984
5985         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5986         if (ret)
5987                 return ret;
5988
5989         /* must have at least 1 entry */
5990         if (!val)
5991                 return -EINVAL;
5992
5993         /* value is in KB */
5994         val <<= 10;
5995         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5996         if (ret < 0)
5997                 return ret;
5998
5999         *ppos += cnt;
6000
6001         return cnt;
6002 }
6003
6004 static ssize_t
6005 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6006                                 size_t cnt, loff_t *ppos)
6007 {
6008         struct trace_array *tr = filp->private_data;
6009         char buf[64];
6010         int r, cpu;
6011         unsigned long size = 0, expanded_size = 0;
6012
6013         mutex_lock(&trace_types_lock);
6014         for_each_tracing_cpu(cpu) {
6015                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6016                 if (!ring_buffer_expanded)
6017                         expanded_size += trace_buf_size >> 10;
6018         }
6019         if (ring_buffer_expanded)
6020                 r = sprintf(buf, "%lu\n", size);
6021         else
6022                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6023         mutex_unlock(&trace_types_lock);
6024
6025         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6026 }
6027
6028 static ssize_t
6029 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6030                           size_t cnt, loff_t *ppos)
6031 {
6032         /*
6033          * There is no need to read what the user has written, this function
6034          * is just to make sure that there is no error when "echo" is used
6035          */
6036
6037         *ppos += cnt;
6038
6039         return cnt;
6040 }
6041
6042 static int
6043 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6044 {
6045         struct trace_array *tr = inode->i_private;
6046
6047         /* disable tracing ? */
6048         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6049                 tracer_tracing_off(tr);
6050         /* resize the ring buffer to 0 */
6051         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6052
6053         trace_array_put(tr);
6054
6055         return 0;
6056 }
6057
6058 static ssize_t
6059 tracing_mark_write(struct file *filp, const char __user *ubuf,
6060                                         size_t cnt, loff_t *fpos)
6061 {
6062         struct trace_array *tr = filp->private_data;
6063         struct ring_buffer_event *event;
6064         struct ring_buffer *buffer;
6065         struct print_entry *entry;
6066         unsigned long irq_flags;
6067         const char faulted[] = "<faulted>";
6068         ssize_t written;
6069         int size;
6070         int len;
6071
6072 /* Used in tracing_mark_raw_write() as well */
6073 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6074
6075         if (tracing_disabled)
6076                 return -EINVAL;
6077
6078         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6079                 return -EINVAL;
6080
6081         if (cnt > TRACE_BUF_SIZE)
6082                 cnt = TRACE_BUF_SIZE;
6083
6084         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6085
6086         local_save_flags(irq_flags);
6087         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6088
6089         /* If less than "<faulted>", then make sure we can still add that */
6090         if (cnt < FAULTED_SIZE)
6091                 size += FAULTED_SIZE - cnt;
6092
6093         buffer = tr->trace_buffer.buffer;
6094         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6095                                             irq_flags, preempt_count());
6096         if (unlikely(!event))
6097                 /* Ring buffer disabled, return as if not open for write */
6098                 return -EBADF;
6099
6100         entry = ring_buffer_event_data(event);
6101         entry->ip = _THIS_IP_;
6102
6103         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6104         if (len) {
6105                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6106                 cnt = FAULTED_SIZE;
6107                 written = -EFAULT;
6108         } else
6109                 written = cnt;
6110         len = cnt;
6111
6112         if (entry->buf[cnt - 1] != '\n') {
6113                 entry->buf[cnt] = '\n';
6114                 entry->buf[cnt + 1] = '\0';
6115         } else
6116                 entry->buf[cnt] = '\0';
6117
6118         __buffer_unlock_commit(buffer, event);
6119
6120         if (written > 0)
6121                 *fpos += written;
6122
6123         return written;
6124 }
6125
6126 /* Limit it for now to 3K (including tag) */
6127 #define RAW_DATA_MAX_SIZE (1024*3)
6128
6129 static ssize_t
6130 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6131                                         size_t cnt, loff_t *fpos)
6132 {
6133         struct trace_array *tr = filp->private_data;
6134         struct ring_buffer_event *event;
6135         struct ring_buffer *buffer;
6136         struct raw_data_entry *entry;
6137         const char faulted[] = "<faulted>";
6138         unsigned long irq_flags;
6139         ssize_t written;
6140         int size;
6141         int len;
6142
6143 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6144
6145         if (tracing_disabled)
6146                 return -EINVAL;
6147
6148         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6149                 return -EINVAL;
6150
6151         /* The marker must at least have a tag id */
6152         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6153                 return -EINVAL;
6154
6155         if (cnt > TRACE_BUF_SIZE)
6156                 cnt = TRACE_BUF_SIZE;
6157
6158         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6159
6160         local_save_flags(irq_flags);
6161         size = sizeof(*entry) + cnt;
6162         if (cnt < FAULT_SIZE_ID)
6163                 size += FAULT_SIZE_ID - cnt;
6164
6165         buffer = tr->trace_buffer.buffer;
6166         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6167                                             irq_flags, preempt_count());
6168         if (!event)
6169                 /* Ring buffer disabled, return as if not open for write */
6170                 return -EBADF;
6171
6172         entry = ring_buffer_event_data(event);
6173
6174         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6175         if (len) {
6176                 entry->id = -1;
6177                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6178                 written = -EFAULT;
6179         } else
6180                 written = cnt;
6181
6182         __buffer_unlock_commit(buffer, event);
6183
6184         if (written > 0)
6185                 *fpos += written;
6186
6187         return written;
6188 }
6189
6190 static int tracing_clock_show(struct seq_file *m, void *v)
6191 {
6192         struct trace_array *tr = m->private;
6193         int i;
6194
6195         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6196                 seq_printf(m,
6197                         "%s%s%s%s", i ? " " : "",
6198                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6199                         i == tr->clock_id ? "]" : "");
6200         seq_putc(m, '\n');
6201
6202         return 0;
6203 }
6204
6205 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6206 {
6207         int i;
6208
6209         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6210                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6211                         break;
6212         }
6213         if (i == ARRAY_SIZE(trace_clocks))
6214                 return -EINVAL;
6215
6216         mutex_lock(&trace_types_lock);
6217
6218         tr->clock_id = i;
6219
6220         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6221
6222         /*
6223          * New clock may not be consistent with the previous clock.
6224          * Reset the buffer so that it doesn't have incomparable timestamps.
6225          */
6226         tracing_reset_online_cpus(&tr->trace_buffer);
6227
6228 #ifdef CONFIG_TRACER_MAX_TRACE
6229         if (tr->max_buffer.buffer)
6230                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6231         tracing_reset_online_cpus(&tr->max_buffer);
6232 #endif
6233
6234         mutex_unlock(&trace_types_lock);
6235
6236         return 0;
6237 }
6238
6239 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6240                                    size_t cnt, loff_t *fpos)
6241 {
6242         struct seq_file *m = filp->private_data;
6243         struct trace_array *tr = m->private;
6244         char buf[64];
6245         const char *clockstr;
6246         int ret;
6247
6248         if (cnt >= sizeof(buf))
6249                 return -EINVAL;
6250
6251         if (copy_from_user(buf, ubuf, cnt))
6252                 return -EFAULT;
6253
6254         buf[cnt] = 0;
6255
6256         clockstr = strstrip(buf);
6257
6258         ret = tracing_set_clock(tr, clockstr);
6259         if (ret)
6260                 return ret;
6261
6262         *fpos += cnt;
6263
6264         return cnt;
6265 }
6266
6267 static int tracing_clock_open(struct inode *inode, struct file *file)
6268 {
6269         struct trace_array *tr = inode->i_private;
6270         int ret;
6271
6272         if (tracing_disabled)
6273                 return -ENODEV;
6274
6275         if (trace_array_get(tr))
6276                 return -ENODEV;
6277
6278         ret = single_open(file, tracing_clock_show, inode->i_private);
6279         if (ret < 0)
6280                 trace_array_put(tr);
6281
6282         return ret;
6283 }
6284
6285 struct ftrace_buffer_info {
6286         struct trace_iterator   iter;
6287         void                    *spare;
6288         unsigned int            spare_cpu;
6289         unsigned int            read;
6290 };
6291
6292 #ifdef CONFIG_TRACER_SNAPSHOT
6293 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6294 {
6295         struct trace_array *tr = inode->i_private;
6296         struct trace_iterator *iter;
6297         struct seq_file *m;
6298         int ret = 0;
6299
6300         if (trace_array_get(tr) < 0)
6301                 return -ENODEV;
6302
6303         if (file->f_mode & FMODE_READ) {
6304                 iter = __tracing_open(inode, file, true);
6305                 if (IS_ERR(iter))
6306                         ret = PTR_ERR(iter);
6307         } else {
6308                 /* Writes still need the seq_file to hold the private data */
6309                 ret = -ENOMEM;
6310                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6311                 if (!m)
6312                         goto out;
6313                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6314                 if (!iter) {
6315                         kfree(m);
6316                         goto out;
6317                 }
6318                 ret = 0;
6319
6320                 iter->tr = tr;
6321                 iter->trace_buffer = &tr->max_buffer;
6322                 iter->cpu_file = tracing_get_cpu(inode);
6323                 m->private = iter;
6324                 file->private_data = m;
6325         }
6326 out:
6327         if (ret < 0)
6328                 trace_array_put(tr);
6329
6330         return ret;
6331 }
6332
6333 static ssize_t
6334 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6335                        loff_t *ppos)
6336 {
6337         struct seq_file *m = filp->private_data;
6338         struct trace_iterator *iter = m->private;
6339         struct trace_array *tr = iter->tr;
6340         unsigned long val;
6341         int ret;
6342
6343         ret = tracing_update_buffers();
6344         if (ret < 0)
6345                 return ret;
6346
6347         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6348         if (ret)
6349                 return ret;
6350
6351         mutex_lock(&trace_types_lock);
6352
6353         if (tr->current_trace->use_max_tr) {
6354                 ret = -EBUSY;
6355                 goto out;
6356         }
6357
6358         switch (val) {
6359         case 0:
6360                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6361                         ret = -EINVAL;
6362                         break;
6363                 }
6364                 if (tr->allocated_snapshot)
6365                         free_snapshot(tr);
6366                 break;
6367         case 1:
6368 /* Only allow per-cpu swap if the ring buffer supports it */
6369 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6370                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6371                         ret = -EINVAL;
6372                         break;
6373                 }
6374 #endif
6375                 if (!tr->allocated_snapshot) {
6376                         ret = alloc_snapshot(tr);
6377                         if (ret < 0)
6378                                 break;
6379                 }
6380                 local_irq_disable();
6381                 /* Now, we're going to swap */
6382                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6383                         update_max_tr(tr, current, smp_processor_id());
6384                 else
6385                         update_max_tr_single(tr, current, iter->cpu_file);
6386                 local_irq_enable();
6387                 break;
6388         default:
6389                 if (tr->allocated_snapshot) {
6390                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6391                                 tracing_reset_online_cpus(&tr->max_buffer);
6392                         else
6393                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6394                 }
6395                 break;
6396         }
6397
6398         if (ret >= 0) {
6399                 *ppos += cnt;
6400                 ret = cnt;
6401         }
6402 out:
6403         mutex_unlock(&trace_types_lock);
6404         return ret;
6405 }
6406
6407 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6408 {
6409         struct seq_file *m = file->private_data;
6410         int ret;
6411
6412         ret = tracing_release(inode, file);
6413
6414         if (file->f_mode & FMODE_READ)
6415                 return ret;
6416
6417         /* If write only, the seq_file is just a stub */
6418         if (m)
6419                 kfree(m->private);
6420         kfree(m);
6421
6422         return 0;
6423 }
6424
6425 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6426 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6427                                     size_t count, loff_t *ppos);
6428 static int tracing_buffers_release(struct inode *inode, struct file *file);
6429 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6430                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6431
6432 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6433 {
6434         struct ftrace_buffer_info *info;
6435         int ret;
6436
6437         ret = tracing_buffers_open(inode, filp);
6438         if (ret < 0)
6439                 return ret;
6440
6441         info = filp->private_data;
6442
6443         if (info->iter.trace->use_max_tr) {
6444                 tracing_buffers_release(inode, filp);
6445                 return -EBUSY;
6446         }
6447
6448         info->iter.snapshot = true;
6449         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6450
6451         return ret;
6452 }
6453
6454 #endif /* CONFIG_TRACER_SNAPSHOT */
6455
6456
6457 static const struct file_operations tracing_thresh_fops = {
6458         .open           = tracing_open_generic,
6459         .read           = tracing_thresh_read,
6460         .write          = tracing_thresh_write,
6461         .llseek         = generic_file_llseek,
6462 };
6463
6464 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6465 static const struct file_operations tracing_max_lat_fops = {
6466         .open           = tracing_open_generic,
6467         .read           = tracing_max_lat_read,
6468         .write          = tracing_max_lat_write,
6469         .llseek         = generic_file_llseek,
6470 };
6471 #endif
6472
6473 static const struct file_operations set_tracer_fops = {
6474         .open           = tracing_open_generic,
6475         .read           = tracing_set_trace_read,
6476         .write          = tracing_set_trace_write,
6477         .llseek         = generic_file_llseek,
6478 };
6479
6480 static const struct file_operations tracing_pipe_fops = {
6481         .open           = tracing_open_pipe,
6482         .poll           = tracing_poll_pipe,
6483         .read           = tracing_read_pipe,
6484         .splice_read    = tracing_splice_read_pipe,
6485         .release        = tracing_release_pipe,
6486         .llseek         = no_llseek,
6487 };
6488
6489 static const struct file_operations tracing_entries_fops = {
6490         .open           = tracing_open_generic_tr,
6491         .read           = tracing_entries_read,
6492         .write          = tracing_entries_write,
6493         .llseek         = generic_file_llseek,
6494         .release        = tracing_release_generic_tr,
6495 };
6496
6497 static const struct file_operations tracing_total_entries_fops = {
6498         .open           = tracing_open_generic_tr,
6499         .read           = tracing_total_entries_read,
6500         .llseek         = generic_file_llseek,
6501         .release        = tracing_release_generic_tr,
6502 };
6503
6504 static const struct file_operations tracing_free_buffer_fops = {
6505         .open           = tracing_open_generic_tr,
6506         .write          = tracing_free_buffer_write,
6507         .release        = tracing_free_buffer_release,
6508 };
6509
6510 static const struct file_operations tracing_mark_fops = {
6511         .open           = tracing_open_generic_tr,
6512         .write          = tracing_mark_write,
6513         .llseek         = generic_file_llseek,
6514         .release        = tracing_release_generic_tr,
6515 };
6516
6517 static const struct file_operations tracing_mark_raw_fops = {
6518         .open           = tracing_open_generic_tr,
6519         .write          = tracing_mark_raw_write,
6520         .llseek         = generic_file_llseek,
6521         .release        = tracing_release_generic_tr,
6522 };
6523
6524 static const struct file_operations trace_clock_fops = {
6525         .open           = tracing_clock_open,
6526         .read           = seq_read,
6527         .llseek         = seq_lseek,
6528         .release        = tracing_single_release_tr,
6529         .write          = tracing_clock_write,
6530 };
6531
6532 #ifdef CONFIG_TRACER_SNAPSHOT
6533 static const struct file_operations snapshot_fops = {
6534         .open           = tracing_snapshot_open,
6535         .read           = seq_read,
6536         .write          = tracing_snapshot_write,
6537         .llseek         = tracing_lseek,
6538         .release        = tracing_snapshot_release,
6539 };
6540
6541 static const struct file_operations snapshot_raw_fops = {
6542         .open           = snapshot_raw_open,
6543         .read           = tracing_buffers_read,
6544         .release        = tracing_buffers_release,
6545         .splice_read    = tracing_buffers_splice_read,
6546         .llseek         = no_llseek,
6547 };
6548
6549 #endif /* CONFIG_TRACER_SNAPSHOT */
6550
6551 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6552 {
6553         struct trace_array *tr = inode->i_private;
6554         struct ftrace_buffer_info *info;
6555         int ret;
6556
6557         if (tracing_disabled)
6558                 return -ENODEV;
6559
6560         if (trace_array_get(tr) < 0)
6561                 return -ENODEV;
6562
6563         info = kzalloc(sizeof(*info), GFP_KERNEL);
6564         if (!info) {
6565                 trace_array_put(tr);
6566                 return -ENOMEM;
6567         }
6568
6569         mutex_lock(&trace_types_lock);
6570
6571         info->iter.tr           = tr;
6572         info->iter.cpu_file     = tracing_get_cpu(inode);
6573         info->iter.trace        = tr->current_trace;
6574         info->iter.trace_buffer = &tr->trace_buffer;
6575         info->spare             = NULL;
6576         /* Force reading ring buffer for first read */
6577         info->read              = (unsigned int)-1;
6578
6579         filp->private_data = info;
6580
6581         tr->current_trace->ref++;
6582
6583         mutex_unlock(&trace_types_lock);
6584
6585         ret = nonseekable_open(inode, filp);
6586         if (ret < 0)
6587                 trace_array_put(tr);
6588
6589         return ret;
6590 }
6591
6592 static __poll_t
6593 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6594 {
6595         struct ftrace_buffer_info *info = filp->private_data;
6596         struct trace_iterator *iter = &info->iter;
6597
6598         return trace_poll(iter, filp, poll_table);
6599 }
6600
6601 static ssize_t
6602 tracing_buffers_read(struct file *filp, char __user *ubuf,
6603                      size_t count, loff_t *ppos)
6604 {
6605         struct ftrace_buffer_info *info = filp->private_data;
6606         struct trace_iterator *iter = &info->iter;
6607         ssize_t ret = 0;
6608         ssize_t size;
6609
6610         if (!count)
6611                 return 0;
6612
6613 #ifdef CONFIG_TRACER_MAX_TRACE
6614         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6615                 return -EBUSY;
6616 #endif
6617
6618         if (!info->spare) {
6619                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6620                                                           iter->cpu_file);
6621                 if (IS_ERR(info->spare)) {
6622                         ret = PTR_ERR(info->spare);
6623                         info->spare = NULL;
6624                 } else {
6625                         info->spare_cpu = iter->cpu_file;
6626                 }
6627         }
6628         if (!info->spare)
6629                 return ret;
6630
6631         /* Do we have previous read data to read? */
6632         if (info->read < PAGE_SIZE)
6633                 goto read;
6634
6635  again:
6636         trace_access_lock(iter->cpu_file);
6637         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6638                                     &info->spare,
6639                                     count,
6640                                     iter->cpu_file, 0);
6641         trace_access_unlock(iter->cpu_file);
6642
6643         if (ret < 0) {
6644                 if (trace_empty(iter)) {
6645                         if ((filp->f_flags & O_NONBLOCK))
6646                                 return -EAGAIN;
6647
6648                         ret = wait_on_pipe(iter, false);
6649                         if (ret)
6650                                 return ret;
6651
6652                         goto again;
6653                 }
6654                 return 0;
6655         }
6656
6657         info->read = 0;
6658  read:
6659         size = PAGE_SIZE - info->read;
6660         if (size > count)
6661                 size = count;
6662
6663         ret = copy_to_user(ubuf, info->spare + info->read, size);
6664         if (ret == size)
6665                 return -EFAULT;
6666
6667         size -= ret;
6668
6669         *ppos += size;
6670         info->read += size;
6671
6672         return size;
6673 }
6674
6675 static int tracing_buffers_release(struct inode *inode, struct file *file)
6676 {
6677         struct ftrace_buffer_info *info = file->private_data;
6678         struct trace_iterator *iter = &info->iter;
6679
6680         mutex_lock(&trace_types_lock);
6681
6682         iter->tr->current_trace->ref--;
6683
6684         __trace_array_put(iter->tr);
6685
6686         if (info->spare)
6687                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6688                                            info->spare_cpu, info->spare);
6689         kfree(info);
6690
6691         mutex_unlock(&trace_types_lock);
6692
6693         return 0;
6694 }
6695
6696 struct buffer_ref {
6697         struct ring_buffer      *buffer;
6698         void                    *page;
6699         int                     cpu;
6700         int                     ref;
6701 };
6702
6703 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6704                                     struct pipe_buffer *buf)
6705 {
6706         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6707
6708         if (--ref->ref)
6709                 return;
6710
6711         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6712         kfree(ref);
6713         buf->private = 0;
6714 }
6715
6716 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6717                                 struct pipe_buffer *buf)
6718 {
6719         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6720
6721         ref->ref++;
6722 }
6723
6724 /* Pipe buffer operations for a buffer. */
6725 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6726         .can_merge              = 0,
6727         .confirm                = generic_pipe_buf_confirm,
6728         .release                = buffer_pipe_buf_release,
6729         .steal                  = generic_pipe_buf_steal,
6730         .get                    = buffer_pipe_buf_get,
6731 };
6732
6733 /*
6734  * Callback from splice_to_pipe(), if we need to release some pages
6735  * at the end of the spd in case we error'ed out in filling the pipe.
6736  */
6737 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6738 {
6739         struct buffer_ref *ref =
6740                 (struct buffer_ref *)spd->partial[i].private;
6741
6742         if (--ref->ref)
6743                 return;
6744
6745         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6746         kfree(ref);
6747         spd->partial[i].private = 0;
6748 }
6749
6750 static ssize_t
6751 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6752                             struct pipe_inode_info *pipe, size_t len,
6753                             unsigned int flags)
6754 {
6755         struct ftrace_buffer_info *info = file->private_data;
6756         struct trace_iterator *iter = &info->iter;
6757         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6758         struct page *pages_def[PIPE_DEF_BUFFERS];
6759         struct splice_pipe_desc spd = {
6760                 .pages          = pages_def,
6761                 .partial        = partial_def,
6762                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6763                 .ops            = &buffer_pipe_buf_ops,
6764                 .spd_release    = buffer_spd_release,
6765         };
6766         struct buffer_ref *ref;
6767         int entries, i;
6768         ssize_t ret = 0;
6769
6770 #ifdef CONFIG_TRACER_MAX_TRACE
6771         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6772                 return -EBUSY;
6773 #endif
6774
6775         if (*ppos & (PAGE_SIZE - 1))
6776                 return -EINVAL;
6777
6778         if (len & (PAGE_SIZE - 1)) {
6779                 if (len < PAGE_SIZE)
6780                         return -EINVAL;
6781                 len &= PAGE_MASK;
6782         }
6783
6784         if (splice_grow_spd(pipe, &spd))
6785                 return -ENOMEM;
6786
6787  again:
6788         trace_access_lock(iter->cpu_file);
6789         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6790
6791         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6792                 struct page *page;
6793                 int r;
6794
6795                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6796                 if (!ref) {
6797                         ret = -ENOMEM;
6798                         break;
6799                 }
6800
6801                 ref->ref = 1;
6802                 ref->buffer = iter->trace_buffer->buffer;
6803                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6804                 if (IS_ERR(ref->page)) {
6805                         ret = PTR_ERR(ref->page);
6806                         ref->page = NULL;
6807                         kfree(ref);
6808                         break;
6809                 }
6810                 ref->cpu = iter->cpu_file;
6811
6812                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6813                                           len, iter->cpu_file, 1);
6814                 if (r < 0) {
6815                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6816                                                    ref->page);
6817                         kfree(ref);
6818                         break;
6819                 }
6820
6821                 page = virt_to_page(ref->page);
6822
6823                 spd.pages[i] = page;
6824                 spd.partial[i].len = PAGE_SIZE;
6825                 spd.partial[i].offset = 0;
6826                 spd.partial[i].private = (unsigned long)ref;
6827                 spd.nr_pages++;
6828                 *ppos += PAGE_SIZE;
6829
6830                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6831         }
6832
6833         trace_access_unlock(iter->cpu_file);
6834         spd.nr_pages = i;
6835
6836         /* did we read anything? */
6837         if (!spd.nr_pages) {
6838                 if (ret)
6839                         goto out;
6840
6841                 ret = -EAGAIN;
6842                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6843                         goto out;
6844
6845                 ret = wait_on_pipe(iter, true);
6846                 if (ret)
6847                         goto out;
6848
6849                 goto again;
6850         }
6851
6852         ret = splice_to_pipe(pipe, &spd);
6853 out:
6854         splice_shrink_spd(&spd);
6855
6856         return ret;
6857 }
6858
6859 static const struct file_operations tracing_buffers_fops = {
6860         .open           = tracing_buffers_open,
6861         .read           = tracing_buffers_read,
6862         .poll           = tracing_buffers_poll,
6863         .release        = tracing_buffers_release,
6864         .splice_read    = tracing_buffers_splice_read,
6865         .llseek         = no_llseek,
6866 };
6867
6868 static ssize_t
6869 tracing_stats_read(struct file *filp, char __user *ubuf,
6870                    size_t count, loff_t *ppos)
6871 {
6872         struct inode *inode = file_inode(filp);
6873         struct trace_array *tr = inode->i_private;
6874         struct trace_buffer *trace_buf = &tr->trace_buffer;
6875         int cpu = tracing_get_cpu(inode);
6876         struct trace_seq *s;
6877         unsigned long cnt;
6878         unsigned long long t;
6879         unsigned long usec_rem;
6880
6881         s = kmalloc(sizeof(*s), GFP_KERNEL);
6882         if (!s)
6883                 return -ENOMEM;
6884
6885         trace_seq_init(s);
6886
6887         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6888         trace_seq_printf(s, "entries: %ld\n", cnt);
6889
6890         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6891         trace_seq_printf(s, "overrun: %ld\n", cnt);
6892
6893         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6894         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6895
6896         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6897         trace_seq_printf(s, "bytes: %ld\n", cnt);
6898
6899         if (trace_clocks[tr->clock_id].in_ns) {
6900                 /* local or global for trace_clock */
6901                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6902                 usec_rem = do_div(t, USEC_PER_SEC);
6903                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6904                                                                 t, usec_rem);
6905
6906                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6907                 usec_rem = do_div(t, USEC_PER_SEC);
6908                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6909         } else {
6910                 /* counter or tsc mode for trace_clock */
6911                 trace_seq_printf(s, "oldest event ts: %llu\n",
6912                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6913
6914                 trace_seq_printf(s, "now ts: %llu\n",
6915                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6916         }
6917
6918         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6919         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6920
6921         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6922         trace_seq_printf(s, "read events: %ld\n", cnt);
6923
6924         count = simple_read_from_buffer(ubuf, count, ppos,
6925                                         s->buffer, trace_seq_used(s));
6926
6927         kfree(s);
6928
6929         return count;
6930 }
6931
6932 static const struct file_operations tracing_stats_fops = {
6933         .open           = tracing_open_generic_tr,
6934         .read           = tracing_stats_read,
6935         .llseek         = generic_file_llseek,
6936         .release        = tracing_release_generic_tr,
6937 };
6938
6939 #ifdef CONFIG_DYNAMIC_FTRACE
6940
6941 static ssize_t
6942 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6943                   size_t cnt, loff_t *ppos)
6944 {
6945         unsigned long *p = filp->private_data;
6946         char buf[64]; /* Not too big for a shallow stack */
6947         int r;
6948
6949         r = scnprintf(buf, 63, "%ld", *p);
6950         buf[r++] = '\n';
6951
6952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6953 }
6954
6955 static const struct file_operations tracing_dyn_info_fops = {
6956         .open           = tracing_open_generic,
6957         .read           = tracing_read_dyn_info,
6958         .llseek         = generic_file_llseek,
6959 };
6960 #endif /* CONFIG_DYNAMIC_FTRACE */
6961
6962 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6963 static void
6964 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6965                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6966                 void *data)
6967 {
6968         tracing_snapshot_instance(tr);
6969 }
6970
6971 static void
6972 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6973                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6974                       void *data)
6975 {
6976         struct ftrace_func_mapper *mapper = data;
6977         long *count = NULL;
6978
6979         if (mapper)
6980                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6981
6982         if (count) {
6983
6984                 if (*count <= 0)
6985                         return;
6986
6987                 (*count)--;
6988         }
6989
6990         tracing_snapshot_instance(tr);
6991 }
6992
6993 static int
6994 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6995                       struct ftrace_probe_ops *ops, void *data)
6996 {
6997         struct ftrace_func_mapper *mapper = data;
6998         long *count = NULL;
6999
7000         seq_printf(m, "%ps:", (void *)ip);
7001
7002         seq_puts(m, "snapshot");
7003
7004         if (mapper)
7005                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7006
7007         if (count)
7008                 seq_printf(m, ":count=%ld\n", *count);
7009         else
7010                 seq_puts(m, ":unlimited\n");
7011
7012         return 0;
7013 }
7014
7015 static int
7016 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7017                      unsigned long ip, void *init_data, void **data)
7018 {
7019         struct ftrace_func_mapper *mapper = *data;
7020
7021         if (!mapper) {
7022                 mapper = allocate_ftrace_func_mapper();
7023                 if (!mapper)
7024                         return -ENOMEM;
7025                 *data = mapper;
7026         }
7027
7028         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7029 }
7030
7031 static void
7032 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7033                      unsigned long ip, void *data)
7034 {
7035         struct ftrace_func_mapper *mapper = data;
7036
7037         if (!ip) {
7038                 if (!mapper)
7039                         return;
7040                 free_ftrace_func_mapper(mapper, NULL);
7041                 return;
7042         }
7043
7044         ftrace_func_mapper_remove_ip(mapper, ip);
7045 }
7046
7047 static struct ftrace_probe_ops snapshot_probe_ops = {
7048         .func                   = ftrace_snapshot,
7049         .print                  = ftrace_snapshot_print,
7050 };
7051
7052 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7053         .func                   = ftrace_count_snapshot,
7054         .print                  = ftrace_snapshot_print,
7055         .init                   = ftrace_snapshot_init,
7056         .free                   = ftrace_snapshot_free,
7057 };
7058
7059 static int
7060 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7061                                char *glob, char *cmd, char *param, int enable)
7062 {
7063         struct ftrace_probe_ops *ops;
7064         void *count = (void *)-1;
7065         char *number;
7066         int ret;
7067
7068         if (!tr)
7069                 return -ENODEV;
7070
7071         /* hash funcs only work with set_ftrace_filter */
7072         if (!enable)
7073                 return -EINVAL;
7074
7075         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7076
7077         if (glob[0] == '!')
7078                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7079
7080         if (!param)
7081                 goto out_reg;
7082
7083         number = strsep(&param, ":");
7084
7085         if (!strlen(number))
7086                 goto out_reg;
7087
7088         /*
7089          * We use the callback data field (which is a pointer)
7090          * as our counter.
7091          */
7092         ret = kstrtoul(number, 0, (unsigned long *)&count);
7093         if (ret)
7094                 return ret;
7095
7096  out_reg:
7097         ret = alloc_snapshot(tr);
7098         if (ret < 0)
7099                 goto out;
7100
7101         ret = register_ftrace_function_probe(glob, tr, ops, count);
7102
7103  out:
7104         return ret < 0 ? ret : 0;
7105 }
7106
7107 static struct ftrace_func_command ftrace_snapshot_cmd = {
7108         .name                   = "snapshot",
7109         .func                   = ftrace_trace_snapshot_callback,
7110 };
7111
7112 static __init int register_snapshot_cmd(void)
7113 {
7114         return register_ftrace_command(&ftrace_snapshot_cmd);
7115 }
7116 #else
7117 static inline __init int register_snapshot_cmd(void) { return 0; }
7118 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7119
7120 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7121 {
7122         if (WARN_ON(!tr->dir))
7123                 return ERR_PTR(-ENODEV);
7124
7125         /* Top directory uses NULL as the parent */
7126         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7127                 return NULL;
7128
7129         /* All sub buffers have a descriptor */
7130         return tr->dir;
7131 }
7132
7133 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7134 {
7135         struct dentry *d_tracer;
7136
7137         if (tr->percpu_dir)
7138                 return tr->percpu_dir;
7139
7140         d_tracer = tracing_get_dentry(tr);
7141         if (IS_ERR(d_tracer))
7142                 return NULL;
7143
7144         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7145
7146         WARN_ONCE(!tr->percpu_dir,
7147                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7148
7149         return tr->percpu_dir;
7150 }
7151
7152 static struct dentry *
7153 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7154                       void *data, long cpu, const struct file_operations *fops)
7155 {
7156         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7157
7158         if (ret) /* See tracing_get_cpu() */
7159                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7160         return ret;
7161 }
7162
7163 static void
7164 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7165 {
7166         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7167         struct dentry *d_cpu;
7168         char cpu_dir[30]; /* 30 characters should be more than enough */
7169
7170         if (!d_percpu)
7171                 return;
7172
7173         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7174         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7175         if (!d_cpu) {
7176                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7177                 return;
7178         }
7179
7180         /* per cpu trace_pipe */
7181         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7182                                 tr, cpu, &tracing_pipe_fops);
7183
7184         /* per cpu trace */
7185         trace_create_cpu_file("trace", 0644, d_cpu,
7186                                 tr, cpu, &tracing_fops);
7187
7188         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7189                                 tr, cpu, &tracing_buffers_fops);
7190
7191         trace_create_cpu_file("stats", 0444, d_cpu,
7192                                 tr, cpu, &tracing_stats_fops);
7193
7194         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7195                                 tr, cpu, &tracing_entries_fops);
7196
7197 #ifdef CONFIG_TRACER_SNAPSHOT
7198         trace_create_cpu_file("snapshot", 0644, d_cpu,
7199                                 tr, cpu, &snapshot_fops);
7200
7201         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7202                                 tr, cpu, &snapshot_raw_fops);
7203 #endif
7204 }
7205
7206 #ifdef CONFIG_FTRACE_SELFTEST
7207 /* Let selftest have access to static functions in this file */
7208 #include "trace_selftest.c"
7209 #endif
7210
7211 static ssize_t
7212 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7213                         loff_t *ppos)
7214 {
7215         struct trace_option_dentry *topt = filp->private_data;
7216         char *buf;
7217
7218         if (topt->flags->val & topt->opt->bit)
7219                 buf = "1\n";
7220         else
7221                 buf = "0\n";
7222
7223         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7224 }
7225
7226 static ssize_t
7227 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7228                          loff_t *ppos)
7229 {
7230         struct trace_option_dentry *topt = filp->private_data;
7231         unsigned long val;
7232         int ret;
7233
7234         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7235         if (ret)
7236                 return ret;
7237
7238         if (val != 0 && val != 1)
7239                 return -EINVAL;
7240
7241         if (!!(topt->flags->val & topt->opt->bit) != val) {
7242                 mutex_lock(&trace_types_lock);
7243                 ret = __set_tracer_option(topt->tr, topt->flags,
7244                                           topt->opt, !val);
7245                 mutex_unlock(&trace_types_lock);
7246                 if (ret)
7247                         return ret;
7248         }
7249
7250         *ppos += cnt;
7251
7252         return cnt;
7253 }
7254
7255
7256 static const struct file_operations trace_options_fops = {
7257         .open = tracing_open_generic,
7258         .read = trace_options_read,
7259         .write = trace_options_write,
7260         .llseek = generic_file_llseek,
7261 };
7262
7263 /*
7264  * In order to pass in both the trace_array descriptor as well as the index
7265  * to the flag that the trace option file represents, the trace_array
7266  * has a character array of trace_flags_index[], which holds the index
7267  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7268  * The address of this character array is passed to the flag option file
7269  * read/write callbacks.
7270  *
7271  * In order to extract both the index and the trace_array descriptor,
7272  * get_tr_index() uses the following algorithm.
7273  *
7274  *   idx = *ptr;
7275  *
7276  * As the pointer itself contains the address of the index (remember
7277  * index[1] == 1).
7278  *
7279  * Then to get the trace_array descriptor, by subtracting that index
7280  * from the ptr, we get to the start of the index itself.
7281  *
7282  *   ptr - idx == &index[0]
7283  *
7284  * Then a simple container_of() from that pointer gets us to the
7285  * trace_array descriptor.
7286  */
7287 static void get_tr_index(void *data, struct trace_array **ptr,
7288                          unsigned int *pindex)
7289 {
7290         *pindex = *(unsigned char *)data;
7291
7292         *ptr = container_of(data - *pindex, struct trace_array,
7293                             trace_flags_index);
7294 }
7295
7296 static ssize_t
7297 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7298                         loff_t *ppos)
7299 {
7300         void *tr_index = filp->private_data;
7301         struct trace_array *tr;
7302         unsigned int index;
7303         char *buf;
7304
7305         get_tr_index(tr_index, &tr, &index);
7306
7307         if (tr->trace_flags & (1 << index))
7308                 buf = "1\n";
7309         else
7310                 buf = "0\n";
7311
7312         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7313 }
7314
7315 static ssize_t
7316 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7317                          loff_t *ppos)
7318 {
7319         void *tr_index = filp->private_data;
7320         struct trace_array *tr;
7321         unsigned int index;
7322         unsigned long val;
7323         int ret;
7324
7325         get_tr_index(tr_index, &tr, &index);
7326
7327         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7328         if (ret)
7329                 return ret;
7330
7331         if (val != 0 && val != 1)
7332                 return -EINVAL;
7333
7334         mutex_lock(&trace_types_lock);
7335         ret = set_tracer_flag(tr, 1 << index, val);
7336         mutex_unlock(&trace_types_lock);
7337
7338         if (ret < 0)
7339                 return ret;
7340
7341         *ppos += cnt;
7342
7343         return cnt;
7344 }
7345
7346 static const struct file_operations trace_options_core_fops = {
7347         .open = tracing_open_generic,
7348         .read = trace_options_core_read,
7349         .write = trace_options_core_write,
7350         .llseek = generic_file_llseek,
7351 };
7352
7353 struct dentry *trace_create_file(const char *name,
7354                                  umode_t mode,
7355                                  struct dentry *parent,
7356                                  void *data,
7357                                  const struct file_operations *fops)
7358 {
7359         struct dentry *ret;
7360
7361         ret = tracefs_create_file(name, mode, parent, data, fops);
7362         if (!ret)
7363                 pr_warn("Could not create tracefs '%s' entry\n", name);
7364
7365         return ret;
7366 }
7367
7368
7369 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7370 {
7371         struct dentry *d_tracer;
7372
7373         if (tr->options)
7374                 return tr->options;
7375
7376         d_tracer = tracing_get_dentry(tr);
7377         if (IS_ERR(d_tracer))
7378                 return NULL;
7379
7380         tr->options = tracefs_create_dir("options", d_tracer);
7381         if (!tr->options) {
7382                 pr_warn("Could not create tracefs directory 'options'\n");
7383                 return NULL;
7384         }
7385
7386         return tr->options;
7387 }
7388
7389 static void
7390 create_trace_option_file(struct trace_array *tr,
7391                          struct trace_option_dentry *topt,
7392                          struct tracer_flags *flags,
7393                          struct tracer_opt *opt)
7394 {
7395         struct dentry *t_options;
7396
7397         t_options = trace_options_init_dentry(tr);
7398         if (!t_options)
7399                 return;
7400
7401         topt->flags = flags;
7402         topt->opt = opt;
7403         topt->tr = tr;
7404
7405         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7406                                     &trace_options_fops);
7407
7408 }
7409
7410 static void
7411 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7412 {
7413         struct trace_option_dentry *topts;
7414         struct trace_options *tr_topts;
7415         struct tracer_flags *flags;
7416         struct tracer_opt *opts;
7417         int cnt;
7418         int i;
7419
7420         if (!tracer)
7421                 return;
7422
7423         flags = tracer->flags;
7424
7425         if (!flags || !flags->opts)
7426                 return;
7427
7428         /*
7429          * If this is an instance, only create flags for tracers
7430          * the instance may have.
7431          */
7432         if (!trace_ok_for_array(tracer, tr))
7433                 return;
7434
7435         for (i = 0; i < tr->nr_topts; i++) {
7436                 /* Make sure there's no duplicate flags. */
7437                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7438                         return;
7439         }
7440
7441         opts = flags->opts;
7442
7443         for (cnt = 0; opts[cnt].name; cnt++)
7444                 ;
7445
7446         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7447         if (!topts)
7448                 return;
7449
7450         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7451                             GFP_KERNEL);
7452         if (!tr_topts) {
7453                 kfree(topts);
7454                 return;
7455         }
7456
7457         tr->topts = tr_topts;
7458         tr->topts[tr->nr_topts].tracer = tracer;
7459         tr->topts[tr->nr_topts].topts = topts;
7460         tr->nr_topts++;
7461
7462         for (cnt = 0; opts[cnt].name; cnt++) {
7463                 create_trace_option_file(tr, &topts[cnt], flags,
7464                                          &opts[cnt]);
7465                 WARN_ONCE(topts[cnt].entry == NULL,
7466                           "Failed to create trace option: %s",
7467                           opts[cnt].name);
7468         }
7469 }
7470
7471 static struct dentry *
7472 create_trace_option_core_file(struct trace_array *tr,
7473                               const char *option, long index)
7474 {
7475         struct dentry *t_options;
7476
7477         t_options = trace_options_init_dentry(tr);
7478         if (!t_options)
7479                 return NULL;
7480
7481         return trace_create_file(option, 0644, t_options,
7482                                  (void *)&tr->trace_flags_index[index],
7483                                  &trace_options_core_fops);
7484 }
7485
7486 static void create_trace_options_dir(struct trace_array *tr)
7487 {
7488         struct dentry *t_options;
7489         bool top_level = tr == &global_trace;
7490         int i;
7491
7492         t_options = trace_options_init_dentry(tr);
7493         if (!t_options)
7494                 return;
7495
7496         for (i = 0; trace_options[i]; i++) {
7497                 if (top_level ||
7498                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7499                         create_trace_option_core_file(tr, trace_options[i], i);
7500         }
7501 }
7502
7503 static ssize_t
7504 rb_simple_read(struct file *filp, char __user *ubuf,
7505                size_t cnt, loff_t *ppos)
7506 {
7507         struct trace_array *tr = filp->private_data;
7508         char buf[64];
7509         int r;
7510
7511         r = tracer_tracing_is_on(tr);
7512         r = sprintf(buf, "%d\n", r);
7513
7514         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7515 }
7516
7517 static ssize_t
7518 rb_simple_write(struct file *filp, const char __user *ubuf,
7519                 size_t cnt, loff_t *ppos)
7520 {
7521         struct trace_array *tr = filp->private_data;
7522         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7523         unsigned long val;
7524         int ret;
7525
7526         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7527         if (ret)
7528                 return ret;
7529
7530         if (buffer) {
7531                 mutex_lock(&trace_types_lock);
7532                 if (val) {
7533                         tracer_tracing_on(tr);
7534                         if (tr->current_trace->start)
7535                                 tr->current_trace->start(tr);
7536                 } else {
7537                         tracer_tracing_off(tr);
7538                         if (tr->current_trace->stop)
7539                                 tr->current_trace->stop(tr);
7540                 }
7541                 mutex_unlock(&trace_types_lock);
7542         }
7543
7544         (*ppos)++;
7545
7546         return cnt;
7547 }
7548
7549 static const struct file_operations rb_simple_fops = {
7550         .open           = tracing_open_generic_tr,
7551         .read           = rb_simple_read,
7552         .write          = rb_simple_write,
7553         .release        = tracing_release_generic_tr,
7554         .llseek         = default_llseek,
7555 };
7556
7557 struct dentry *trace_instance_dir;
7558
7559 static void
7560 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7561
7562 static int
7563 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7564 {
7565         enum ring_buffer_flags rb_flags;
7566
7567         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7568
7569         buf->tr = tr;
7570
7571         buf->buffer = ring_buffer_alloc(size, rb_flags);
7572         if (!buf->buffer)
7573                 return -ENOMEM;
7574
7575         buf->data = alloc_percpu(struct trace_array_cpu);
7576         if (!buf->data) {
7577                 ring_buffer_free(buf->buffer);
7578                 buf->buffer = NULL;
7579                 return -ENOMEM;
7580         }
7581
7582         /* Allocate the first page for all buffers */
7583         set_buffer_entries(&tr->trace_buffer,
7584                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7585
7586         return 0;
7587 }
7588
7589 static int allocate_trace_buffers(struct trace_array *tr, int size)
7590 {
7591         int ret;
7592
7593         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7594         if (ret)
7595                 return ret;
7596
7597 #ifdef CONFIG_TRACER_MAX_TRACE
7598         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7599                                     allocate_snapshot ? size : 1);
7600         if (WARN_ON(ret)) {
7601                 ring_buffer_free(tr->trace_buffer.buffer);
7602                 tr->trace_buffer.buffer = NULL;
7603                 free_percpu(tr->trace_buffer.data);
7604                 tr->trace_buffer.data = NULL;
7605                 return -ENOMEM;
7606         }
7607         tr->allocated_snapshot = allocate_snapshot;
7608
7609         /*
7610          * Only the top level trace array gets its snapshot allocated
7611          * from the kernel command line.
7612          */
7613         allocate_snapshot = false;
7614 #endif
7615         return 0;
7616 }
7617
7618 static void free_trace_buffer(struct trace_buffer *buf)
7619 {
7620         if (buf->buffer) {
7621                 ring_buffer_free(buf->buffer);
7622                 buf->buffer = NULL;
7623                 free_percpu(buf->data);
7624                 buf->data = NULL;
7625         }
7626 }
7627
7628 static void free_trace_buffers(struct trace_array *tr)
7629 {
7630         if (!tr)
7631                 return;
7632
7633         free_trace_buffer(&tr->trace_buffer);
7634
7635 #ifdef CONFIG_TRACER_MAX_TRACE
7636         free_trace_buffer(&tr->max_buffer);
7637 #endif
7638 }
7639
7640 static void init_trace_flags_index(struct trace_array *tr)
7641 {
7642         int i;
7643
7644         /* Used by the trace options files */
7645         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7646                 tr->trace_flags_index[i] = i;
7647 }
7648
7649 static void __update_tracer_options(struct trace_array *tr)
7650 {
7651         struct tracer *t;
7652
7653         for (t = trace_types; t; t = t->next)
7654                 add_tracer_options(tr, t);
7655 }
7656
7657 static void update_tracer_options(struct trace_array *tr)
7658 {
7659         mutex_lock(&trace_types_lock);
7660         __update_tracer_options(tr);
7661         mutex_unlock(&trace_types_lock);
7662 }
7663
7664 static int instance_mkdir(const char *name)
7665 {
7666         struct trace_array *tr;
7667         int ret;
7668
7669         mutex_lock(&event_mutex);
7670         mutex_lock(&trace_types_lock);
7671
7672         ret = -EEXIST;
7673         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7674                 if (tr->name && strcmp(tr->name, name) == 0)
7675                         goto out_unlock;
7676         }
7677
7678         ret = -ENOMEM;
7679         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7680         if (!tr)
7681                 goto out_unlock;
7682
7683         tr->name = kstrdup(name, GFP_KERNEL);
7684         if (!tr->name)
7685                 goto out_free_tr;
7686
7687         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7688                 goto out_free_tr;
7689
7690         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7691
7692         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7693
7694         raw_spin_lock_init(&tr->start_lock);
7695
7696         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7697
7698         tr->current_trace = &nop_trace;
7699
7700         INIT_LIST_HEAD(&tr->systems);
7701         INIT_LIST_HEAD(&tr->events);
7702
7703         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7704                 goto out_free_tr;
7705
7706         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7707         if (!tr->dir)
7708                 goto out_free_tr;
7709
7710         ret = event_trace_add_tracer(tr->dir, tr);
7711         if (ret) {
7712                 tracefs_remove_recursive(tr->dir);
7713                 goto out_free_tr;
7714         }
7715
7716         ftrace_init_trace_array(tr);
7717
7718         init_tracer_tracefs(tr, tr->dir);
7719         init_trace_flags_index(tr);
7720         __update_tracer_options(tr);
7721
7722         list_add(&tr->list, &ftrace_trace_arrays);
7723
7724         mutex_unlock(&trace_types_lock);
7725         mutex_unlock(&event_mutex);
7726
7727         return 0;
7728
7729  out_free_tr:
7730         free_trace_buffers(tr);
7731         free_cpumask_var(tr->tracing_cpumask);
7732         kfree(tr->name);
7733         kfree(tr);
7734
7735  out_unlock:
7736         mutex_unlock(&trace_types_lock);
7737         mutex_unlock(&event_mutex);
7738
7739         return ret;
7740
7741 }
7742
7743 static int instance_rmdir(const char *name)
7744 {
7745         struct trace_array *tr;
7746         int found = 0;
7747         int ret;
7748         int i;
7749
7750         mutex_lock(&event_mutex);
7751         mutex_lock(&trace_types_lock);
7752
7753         ret = -ENODEV;
7754         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7755                 if (tr->name && strcmp(tr->name, name) == 0) {
7756                         found = 1;
7757                         break;
7758                 }
7759         }
7760         if (!found)
7761                 goto out_unlock;
7762
7763         ret = -EBUSY;
7764         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7765                 goto out_unlock;
7766
7767         list_del(&tr->list);
7768
7769         /* Disable all the flags that were enabled coming in */
7770         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7771                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7772                         set_tracer_flag(tr, 1 << i, 0);
7773         }
7774
7775         tracing_set_nop(tr);
7776         clear_ftrace_function_probes(tr);
7777         event_trace_del_tracer(tr);
7778         ftrace_clear_pids(tr);
7779         ftrace_destroy_function_files(tr);
7780         tracefs_remove_recursive(tr->dir);
7781         free_trace_buffers(tr);
7782
7783         for (i = 0; i < tr->nr_topts; i++) {
7784                 kfree(tr->topts[i].topts);
7785         }
7786         kfree(tr->topts);
7787
7788         free_cpumask_var(tr->tracing_cpumask);
7789         kfree(tr->name);
7790         kfree(tr);
7791
7792         ret = 0;
7793
7794  out_unlock:
7795         mutex_unlock(&trace_types_lock);
7796         mutex_unlock(&event_mutex);
7797
7798         return ret;
7799 }
7800
7801 static __init void create_trace_instances(struct dentry *d_tracer)
7802 {
7803         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7804                                                          instance_mkdir,
7805                                                          instance_rmdir);
7806         if (WARN_ON(!trace_instance_dir))
7807                 return;
7808 }
7809
7810 static void
7811 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7812 {
7813         int cpu;
7814
7815         trace_create_file("available_tracers", 0444, d_tracer,
7816                         tr, &show_traces_fops);
7817
7818         trace_create_file("current_tracer", 0644, d_tracer,
7819                         tr, &set_tracer_fops);
7820
7821         trace_create_file("tracing_cpumask", 0644, d_tracer,
7822                           tr, &tracing_cpumask_fops);
7823
7824         trace_create_file("trace_options", 0644, d_tracer,
7825                           tr, &tracing_iter_fops);
7826
7827         trace_create_file("trace", 0644, d_tracer,
7828                           tr, &tracing_fops);
7829
7830         trace_create_file("trace_pipe", 0444, d_tracer,
7831                           tr, &tracing_pipe_fops);
7832
7833         trace_create_file("buffer_size_kb", 0644, d_tracer,
7834                           tr, &tracing_entries_fops);
7835
7836         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7837                           tr, &tracing_total_entries_fops);
7838
7839         trace_create_file("free_buffer", 0200, d_tracer,
7840                           tr, &tracing_free_buffer_fops);
7841
7842         trace_create_file("trace_marker", 0220, d_tracer,
7843                           tr, &tracing_mark_fops);
7844
7845         trace_create_file("trace_marker_raw", 0220, d_tracer,
7846                           tr, &tracing_mark_raw_fops);
7847
7848         trace_create_file("trace_clock", 0644, d_tracer, tr,
7849                           &trace_clock_fops);
7850
7851         trace_create_file("tracing_on", 0644, d_tracer,
7852                           tr, &rb_simple_fops);
7853
7854         create_trace_options_dir(tr);
7855
7856 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7857         trace_create_file("tracing_max_latency", 0644, d_tracer,
7858                         &tr->max_latency, &tracing_max_lat_fops);
7859 #endif
7860
7861         if (ftrace_create_function_files(tr, d_tracer))
7862                 WARN(1, "Could not allocate function filter files");
7863
7864 #ifdef CONFIG_TRACER_SNAPSHOT
7865         trace_create_file("snapshot", 0644, d_tracer,
7866                           tr, &snapshot_fops);
7867 #endif
7868
7869         for_each_tracing_cpu(cpu)
7870                 tracing_init_tracefs_percpu(tr, cpu);
7871
7872         ftrace_init_tracefs(tr, d_tracer);
7873 }
7874
7875 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7876 {
7877         struct vfsmount *mnt;
7878         struct file_system_type *type;
7879
7880         /*
7881          * To maintain backward compatibility for tools that mount
7882          * debugfs to get to the tracing facility, tracefs is automatically
7883          * mounted to the debugfs/tracing directory.
7884          */
7885         type = get_fs_type("tracefs");
7886         if (!type)
7887                 return NULL;
7888         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7889         put_filesystem(type);
7890         if (IS_ERR(mnt))
7891                 return NULL;
7892         mntget(mnt);
7893
7894         return mnt;
7895 }
7896
7897 /**
7898  * tracing_init_dentry - initialize top level trace array
7899  *
7900  * This is called when creating files or directories in the tracing
7901  * directory. It is called via fs_initcall() by any of the boot up code
7902  * and expects to return the dentry of the top level tracing directory.
7903  */
7904 struct dentry *tracing_init_dentry(void)
7905 {
7906         struct trace_array *tr = &global_trace;
7907
7908         /* The top level trace array uses  NULL as parent */
7909         if (tr->dir)
7910                 return NULL;
7911
7912         if (WARN_ON(!tracefs_initialized()) ||
7913                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7914                  WARN_ON(!debugfs_initialized())))
7915                 return ERR_PTR(-ENODEV);
7916
7917         /*
7918          * As there may still be users that expect the tracing
7919          * files to exist in debugfs/tracing, we must automount
7920          * the tracefs file system there, so older tools still
7921          * work with the newer kerenl.
7922          */
7923         tr->dir = debugfs_create_automount("tracing", NULL,
7924                                            trace_automount, NULL);
7925         if (!tr->dir) {
7926                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7927                 return ERR_PTR(-ENOMEM);
7928         }
7929
7930         return NULL;
7931 }
7932
7933 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7934 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7935
7936 static void __init trace_eval_init(void)
7937 {
7938         int len;
7939
7940         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7941         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7942 }
7943
7944 #ifdef CONFIG_MODULES
7945 static void trace_module_add_evals(struct module *mod)
7946 {
7947         if (!mod->num_trace_evals)
7948                 return;
7949
7950         /*
7951          * Modules with bad taint do not have events created, do
7952          * not bother with enums either.
7953          */
7954         if (trace_module_has_bad_taint(mod))
7955                 return;
7956
7957         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7958 }
7959
7960 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7961 static void trace_module_remove_evals(struct module *mod)
7962 {
7963         union trace_eval_map_item *map;
7964         union trace_eval_map_item **last = &trace_eval_maps;
7965
7966         if (!mod->num_trace_evals)
7967                 return;
7968
7969         mutex_lock(&trace_eval_mutex);
7970
7971         map = trace_eval_maps;
7972
7973         while (map) {
7974                 if (map->head.mod == mod)
7975                         break;
7976                 map = trace_eval_jmp_to_tail(map);
7977                 last = &map->tail.next;
7978                 map = map->tail.next;
7979         }
7980         if (!map)
7981                 goto out;
7982
7983         *last = trace_eval_jmp_to_tail(map)->tail.next;
7984         kfree(map);
7985  out:
7986         mutex_unlock(&trace_eval_mutex);
7987 }
7988 #else
7989 static inline void trace_module_remove_evals(struct module *mod) { }
7990 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7991
7992 static int trace_module_notify(struct notifier_block *self,
7993                                unsigned long val, void *data)
7994 {
7995         struct module *mod = data;
7996
7997         switch (val) {
7998         case MODULE_STATE_COMING:
7999                 trace_module_add_evals(mod);
8000                 break;
8001         case MODULE_STATE_GOING:
8002                 trace_module_remove_evals(mod);
8003                 break;
8004         }
8005
8006         return 0;
8007 }
8008
8009 static struct notifier_block trace_module_nb = {
8010         .notifier_call = trace_module_notify,
8011         .priority = 0,
8012 };
8013 #endif /* CONFIG_MODULES */
8014
8015 static __init int tracer_init_tracefs(void)
8016 {
8017         struct dentry *d_tracer;
8018
8019         trace_access_lock_init();
8020
8021         d_tracer = tracing_init_dentry();
8022         if (IS_ERR(d_tracer))
8023                 return 0;
8024
8025         init_tracer_tracefs(&global_trace, d_tracer);
8026         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8027
8028         trace_create_file("tracing_thresh", 0644, d_tracer,
8029                         &global_trace, &tracing_thresh_fops);
8030
8031         trace_create_file("README", 0444, d_tracer,
8032                         NULL, &tracing_readme_fops);
8033
8034         trace_create_file("saved_cmdlines", 0444, d_tracer,
8035                         NULL, &tracing_saved_cmdlines_fops);
8036
8037         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8038                           NULL, &tracing_saved_cmdlines_size_fops);
8039
8040         trace_create_file("saved_tgids", 0444, d_tracer,
8041                         NULL, &tracing_saved_tgids_fops);
8042
8043         trace_eval_init();
8044
8045         trace_create_eval_file(d_tracer);
8046
8047 #ifdef CONFIG_MODULES
8048         register_module_notifier(&trace_module_nb);
8049 #endif
8050
8051 #ifdef CONFIG_DYNAMIC_FTRACE
8052         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8053                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8054 #endif
8055
8056         create_trace_instances(d_tracer);
8057
8058         update_tracer_options(&global_trace);
8059
8060         return 0;
8061 }
8062
8063 static int trace_panic_handler(struct notifier_block *this,
8064                                unsigned long event, void *unused)
8065 {
8066         if (ftrace_dump_on_oops)
8067                 ftrace_dump(ftrace_dump_on_oops);
8068         return NOTIFY_OK;
8069 }
8070
8071 static struct notifier_block trace_panic_notifier = {
8072         .notifier_call  = trace_panic_handler,
8073         .next           = NULL,
8074         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8075 };
8076
8077 static int trace_die_handler(struct notifier_block *self,
8078                              unsigned long val,
8079                              void *data)
8080 {
8081         switch (val) {
8082         case DIE_OOPS:
8083                 if (ftrace_dump_on_oops)
8084                         ftrace_dump(ftrace_dump_on_oops);
8085                 break;
8086         default:
8087                 break;
8088         }
8089         return NOTIFY_OK;
8090 }
8091
8092 static struct notifier_block trace_die_notifier = {
8093         .notifier_call = trace_die_handler,
8094         .priority = 200
8095 };
8096
8097 /*
8098  * printk is set to max of 1024, we really don't need it that big.
8099  * Nothing should be printing 1000 characters anyway.
8100  */
8101 #define TRACE_MAX_PRINT         1000
8102
8103 /*
8104  * Define here KERN_TRACE so that we have one place to modify
8105  * it if we decide to change what log level the ftrace dump
8106  * should be at.
8107  */
8108 #define KERN_TRACE              KERN_EMERG
8109
8110 void
8111 trace_printk_seq(struct trace_seq *s)
8112 {
8113         /* Probably should print a warning here. */
8114         if (s->seq.len >= TRACE_MAX_PRINT)
8115                 s->seq.len = TRACE_MAX_PRINT;
8116
8117         /*
8118          * More paranoid code. Although the buffer size is set to
8119          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8120          * an extra layer of protection.
8121          */
8122         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8123                 s->seq.len = s->seq.size - 1;
8124
8125         /* should be zero ended, but we are paranoid. */
8126         s->buffer[s->seq.len] = 0;
8127
8128         printk(KERN_TRACE "%s", s->buffer);
8129
8130         trace_seq_init(s);
8131 }
8132
8133 void trace_init_global_iter(struct trace_iterator *iter)
8134 {
8135         iter->tr = &global_trace;
8136         iter->trace = iter->tr->current_trace;
8137         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8138         iter->trace_buffer = &global_trace.trace_buffer;
8139
8140         if (iter->trace && iter->trace->open)
8141                 iter->trace->open(iter);
8142
8143         /* Annotate start of buffers if we had overruns */
8144         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8145                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8146
8147         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8148         if (trace_clocks[iter->tr->clock_id].in_ns)
8149                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8150 }
8151
8152 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8153 {
8154         /* use static because iter can be a bit big for the stack */
8155         static struct trace_iterator iter;
8156         static atomic_t dump_running;
8157         struct trace_array *tr = &global_trace;
8158         unsigned int old_userobj;
8159         unsigned long flags;
8160         int cnt = 0, cpu;
8161
8162         /* Only allow one dump user at a time. */
8163         if (atomic_inc_return(&dump_running) != 1) {
8164                 atomic_dec(&dump_running);
8165                 return;
8166         }
8167
8168         /*
8169          * Always turn off tracing when we dump.
8170          * We don't need to show trace output of what happens
8171          * between multiple crashes.
8172          *
8173          * If the user does a sysrq-z, then they can re-enable
8174          * tracing with echo 1 > tracing_on.
8175          */
8176         tracing_off();
8177
8178         local_irq_save(flags);
8179
8180         /* Simulate the iterator */
8181         trace_init_global_iter(&iter);
8182
8183         for_each_tracing_cpu(cpu) {
8184                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8185         }
8186
8187         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8188
8189         /* don't look at user memory in panic mode */
8190         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8191
8192         switch (oops_dump_mode) {
8193         case DUMP_ALL:
8194                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8195                 break;
8196         case DUMP_ORIG:
8197                 iter.cpu_file = raw_smp_processor_id();
8198                 break;
8199         case DUMP_NONE:
8200                 goto out_enable;
8201         default:
8202                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8203                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8204         }
8205
8206         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8207
8208         /* Did function tracer already get disabled? */
8209         if (ftrace_is_dead()) {
8210                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8211                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8212         }
8213
8214         /*
8215          * We need to stop all tracing on all CPUS to read the
8216          * the next buffer. This is a bit expensive, but is
8217          * not done often. We fill all what we can read,
8218          * and then release the locks again.
8219          */
8220
8221         while (!trace_empty(&iter)) {
8222
8223                 if (!cnt)
8224                         printk(KERN_TRACE "---------------------------------\n");
8225
8226                 cnt++;
8227
8228                 /* reset all but tr, trace, and overruns */
8229                 memset(&iter.seq, 0,
8230                        sizeof(struct trace_iterator) -
8231                        offsetof(struct trace_iterator, seq));
8232                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8233                 iter.pos = -1;
8234
8235                 if (trace_find_next_entry_inc(&iter) != NULL) {
8236                         int ret;
8237
8238                         ret = print_trace_line(&iter);
8239                         if (ret != TRACE_TYPE_NO_CONSUME)
8240                                 trace_consume(&iter);
8241                 }
8242                 touch_nmi_watchdog();
8243
8244                 trace_printk_seq(&iter.seq);
8245         }
8246
8247         if (!cnt)
8248                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8249         else
8250                 printk(KERN_TRACE "---------------------------------\n");
8251
8252  out_enable:
8253         tr->trace_flags |= old_userobj;
8254
8255         for_each_tracing_cpu(cpu) {
8256                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8257         }
8258         atomic_dec(&dump_running);
8259         local_irq_restore(flags);
8260 }
8261 EXPORT_SYMBOL_GPL(ftrace_dump);
8262
8263 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8264 {
8265         char **argv;
8266         int argc, ret;
8267
8268         argc = 0;
8269         ret = 0;
8270         argv = argv_split(GFP_KERNEL, buf, &argc);
8271         if (!argv)
8272                 return -ENOMEM;
8273
8274         if (argc)
8275                 ret = createfn(argc, argv);
8276
8277         argv_free(argv);
8278
8279         return ret;
8280 }
8281
8282 #define WRITE_BUFSIZE  4096
8283
8284 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8285                                 size_t count, loff_t *ppos,
8286                                 int (*createfn)(int, char **))
8287 {
8288         char *kbuf, *buf, *tmp;
8289         int ret = 0;
8290         size_t done = 0;
8291         size_t size;
8292
8293         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8294         if (!kbuf)
8295                 return -ENOMEM;
8296
8297         while (done < count) {
8298                 size = count - done;
8299
8300                 if (size >= WRITE_BUFSIZE)
8301                         size = WRITE_BUFSIZE - 1;
8302
8303                 if (copy_from_user(kbuf, buffer + done, size)) {
8304                         ret = -EFAULT;
8305                         goto out;
8306                 }
8307                 kbuf[size] = '\0';
8308                 buf = kbuf;
8309                 do {
8310                         tmp = strchr(buf, '\n');
8311                         if (tmp) {
8312                                 *tmp = '\0';
8313                                 size = tmp - buf + 1;
8314                         } else {
8315                                 size = strlen(buf);
8316                                 if (done + size < count) {
8317                                         if (buf != kbuf)
8318                                                 break;
8319                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8320                                         pr_warn("Line length is too long: Should be less than %d\n",
8321                                                 WRITE_BUFSIZE - 2);
8322                                         ret = -EINVAL;
8323                                         goto out;
8324                                 }
8325                         }
8326                         done += size;
8327
8328                         /* Remove comments */
8329                         tmp = strchr(buf, '#');
8330
8331                         if (tmp)
8332                                 *tmp = '\0';
8333
8334                         ret = trace_run_command(buf, createfn);
8335                         if (ret)
8336                                 goto out;
8337                         buf += size;
8338
8339                 } while (done < count);
8340         }
8341         ret = done;
8342
8343 out:
8344         kfree(kbuf);
8345
8346         return ret;
8347 }
8348
8349 __init static int tracer_alloc_buffers(void)
8350 {
8351         int ring_buf_size;
8352         int ret = -ENOMEM;
8353
8354         /*
8355          * Make sure we don't accidently add more trace options
8356          * than we have bits for.
8357          */
8358         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8359
8360         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8361                 goto out;
8362
8363         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8364                 goto out_free_buffer_mask;
8365
8366         /* Only allocate trace_printk buffers if a trace_printk exists */
8367         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8368                 /* Must be called before global_trace.buffer is allocated */
8369                 trace_printk_init_buffers();
8370
8371         /* To save memory, keep the ring buffer size to its minimum */
8372         if (ring_buffer_expanded)
8373                 ring_buf_size = trace_buf_size;
8374         else
8375                 ring_buf_size = 1;
8376
8377         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8378         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8379
8380         raw_spin_lock_init(&global_trace.start_lock);
8381
8382         /*
8383          * The prepare callbacks allocates some memory for the ring buffer. We
8384          * don't free the buffer if the if the CPU goes down. If we were to free
8385          * the buffer, then the user would lose any trace that was in the
8386          * buffer. The memory will be removed once the "instance" is removed.
8387          */
8388         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8389                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8390                                       NULL);
8391         if (ret < 0)
8392                 goto out_free_cpumask;
8393         /* Used for event triggers */
8394         ret = -ENOMEM;
8395         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8396         if (!temp_buffer)
8397                 goto out_rm_hp_state;
8398
8399         if (trace_create_savedcmd() < 0)
8400                 goto out_free_temp_buffer;
8401
8402         /* TODO: make the number of buffers hot pluggable with CPUS */
8403         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8404                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8405                 WARN_ON(1);
8406                 goto out_free_savedcmd;
8407         }
8408
8409         if (global_trace.buffer_disabled)
8410                 tracing_off();
8411
8412         if (trace_boot_clock) {
8413                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8414                 if (ret < 0)
8415                         pr_warn("Trace clock %s not defined, going back to default\n",
8416                                 trace_boot_clock);
8417         }
8418
8419         /*
8420          * register_tracer() might reference current_trace, so it
8421          * needs to be set before we register anything. This is
8422          * just a bootstrap of current_trace anyway.
8423          */
8424         global_trace.current_trace = &nop_trace;
8425
8426         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8427
8428         ftrace_init_global_array_ops(&global_trace);
8429
8430         init_trace_flags_index(&global_trace);
8431
8432         register_tracer(&nop_trace);
8433
8434         /* Function tracing may start here (via kernel command line) */
8435         init_function_trace();
8436
8437         /* All seems OK, enable tracing */
8438         tracing_disabled = 0;
8439
8440         atomic_notifier_chain_register(&panic_notifier_list,
8441                                        &trace_panic_notifier);
8442
8443         register_die_notifier(&trace_die_notifier);
8444
8445         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8446
8447         INIT_LIST_HEAD(&global_trace.systems);
8448         INIT_LIST_HEAD(&global_trace.events);
8449         list_add(&global_trace.list, &ftrace_trace_arrays);
8450
8451         apply_trace_boot_options();
8452
8453         register_snapshot_cmd();
8454
8455         return 0;
8456
8457 out_free_savedcmd:
8458         free_saved_cmdlines_buffer(savedcmd);
8459 out_free_temp_buffer:
8460         ring_buffer_free(temp_buffer);
8461 out_rm_hp_state:
8462         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8463 out_free_cpumask:
8464         free_cpumask_var(global_trace.tracing_cpumask);
8465 out_free_buffer_mask:
8466         free_cpumask_var(tracing_buffer_mask);
8467 out:
8468         return ret;
8469 }
8470
8471 void __init early_trace_init(void)
8472 {
8473         if (tracepoint_printk) {
8474                 tracepoint_print_iter =
8475                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8476                 if (WARN_ON(!tracepoint_print_iter))
8477                         tracepoint_printk = 0;
8478                 else
8479                         static_key_enable(&tracepoint_printk_key.key);
8480         }
8481         tracer_alloc_buffers();
8482 }
8483
8484 void __init trace_init(void)
8485 {
8486         trace_event_init();
8487 }
8488
8489 __init static int clear_boot_tracer(void)
8490 {
8491         /*
8492          * The default tracer at boot buffer is an init section.
8493          * This function is called in lateinit. If we did not
8494          * find the boot tracer, then clear it out, to prevent
8495          * later registration from accessing the buffer that is
8496          * about to be freed.
8497          */
8498         if (!default_bootup_tracer)
8499                 return 0;
8500
8501         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8502                default_bootup_tracer);
8503         default_bootup_tracer = NULL;
8504
8505         return 0;
8506 }
8507
8508 fs_initcall(tracer_init_tracefs);
8509 late_initcall_sync(clear_boot_tracer);