nds32: fix build error "relocation truncated to fit: R_NDS32_25_PCREL_RELA" when
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46
47 #include "trace.h"
48 #include "trace_output.h"
49
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77         { }
78 };
79
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83         return 0;
84 }
85
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100
101 cpumask_var_t __read_mostly     tracing_buffer_mask;
102
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127         struct module                   *mod;
128         unsigned long                   length;
129 };
130
131 union trace_eval_map_item;
132
133 struct trace_eval_map_tail {
134         /*
135          * "end" is first and points to NULL as it must be different
136          * than "mod" or "eval_string"
137          */
138         union trace_eval_map_item       *next;
139         const char                      *end;   /* points to NULL */
140 };
141
142 static DEFINE_MUTEX(trace_eval_mutex);
143
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152         struct trace_eval_map           map;
153         struct trace_eval_map_head      head;
154         struct trace_eval_map_tail      tail;
155 };
156
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161
162 #define MAX_TRACER_SIZE         100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165
166 static bool allocate_snapshot;
167
168 static int __init set_cmdline_ftrace(char *str)
169 {
170         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171         default_bootup_tracer = bootup_tracer_buf;
172         /* We are using ftrace early, expand it */
173         ring_buffer_expanded = true;
174         return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180         if (*str++ != '=' || !*str) {
181                 ftrace_dump_on_oops = DUMP_ALL;
182                 return 1;
183         }
184
185         if (!strcmp("orig_cpu", str)) {
186                 ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193
194 static int __init stop_trace_on_warning(char *str)
195 {
196         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197                 __disable_trace_on_warning = 1;
198         return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201
202 static int __init boot_alloc_snapshot(char *str)
203 {
204         allocate_snapshot = true;
205         /* We also need the main ring buffer expanded */
206         ring_buffer_expanded = true;
207         return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210
211
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213
214 static int __init set_trace_boot_options(char *str)
215 {
216         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217         return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223
224 static int __init set_trace_boot_clock(char *str)
225 {
226         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227         trace_boot_clock = trace_boot_clock_buf;
228         return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231
232 static int __init set_tracepoint_printk(char *str)
233 {
234         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235                 tracepoint_printk = 1;
236         return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239
240 unsigned long long ns2usecs(u64 nsec)
241 {
242         nsec += 500;
243         do_div(nsec, 1000);
244         return nsec;
245 }
246
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS                                             \
249         (FUNCTION_DEFAULT_FLAGS |                                       \
250          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
251          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
252          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
253          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
257                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268         .trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270
271 LIST_HEAD(ftrace_trace_arrays);
272
273 int trace_array_get(struct trace_array *this_tr)
274 {
275         struct trace_array *tr;
276         int ret = -ENODEV;
277
278         mutex_lock(&trace_types_lock);
279         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280                 if (tr == this_tr) {
281                         tr->ref++;
282                         ret = 0;
283                         break;
284                 }
285         }
286         mutex_unlock(&trace_types_lock);
287
288         return ret;
289 }
290
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293         WARN_ON(!this_tr->ref);
294         this_tr->ref--;
295 }
296
297 void trace_array_put(struct trace_array *this_tr)
298 {
299         mutex_lock(&trace_types_lock);
300         __trace_array_put(this_tr);
301         mutex_unlock(&trace_types_lock);
302 }
303
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305                               struct ring_buffer *buffer,
306                               struct ring_buffer_event *event)
307 {
308         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309             !filter_match_preds(call->filter, rec)) {
310                 __trace_event_discard_commit(buffer, event);
311                 return 1;
312         }
313
314         return 0;
315 }
316
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319         vfree(pid_list->pids);
320         kfree(pid_list);
321 }
322
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333         /*
334          * If pid_max changed after filtered_pids was created, we
335          * by default ignore all pids greater than the previous pid_max.
336          */
337         if (search_pid >= filtered_pids->pid_max)
338                 return false;
339
340         return test_bit(search_pid, filtered_pids->pids);
341 }
342
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355         /*
356          * Return false, because if filtered_pids does not exist,
357          * all pids are good to trace.
358          */
359         if (!filtered_pids)
360                 return false;
361
362         return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378                                   struct task_struct *self,
379                                   struct task_struct *task)
380 {
381         if (!pid_list)
382                 return;
383
384         /* For forks, we only add if the forking task is listed */
385         if (self) {
386                 if (!trace_find_filtered_pid(pid_list, self->pid))
387                         return;
388         }
389
390         /* Sorry, but we don't support pid_max changing after setting */
391         if (task->pid >= pid_list->pid_max)
392                 return;
393
394         /* "self" is set for forks, and NULL for exits */
395         if (self)
396                 set_bit(task->pid, pid_list->pids);
397         else
398                 clear_bit(task->pid, pid_list->pids);
399 }
400
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415         unsigned long pid = (unsigned long)v;
416
417         (*pos)++;
418
419         /* pid already is +1 of the actual prevous bit */
420         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421
422         /* Return pid + 1 to allow zero to be represented */
423         if (pid < pid_list->pid_max)
424                 return (void *)(pid + 1);
425
426         return NULL;
427 }
428
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442         unsigned long pid;
443         loff_t l = 0;
444
445         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446         if (pid >= pid_list->pid_max)
447                 return NULL;
448
449         /* Return pid + 1 so that zero can be the exit value */
450         for (pid++; pid && l < *pos;
451              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452                 ;
453         return (void *)pid;
454 }
455
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466         unsigned long pid = (unsigned long)v - 1;
467
468         seq_printf(m, "%lu\n", pid);
469         return 0;
470 }
471
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE            127
474
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476                     struct trace_pid_list **new_pid_list,
477                     const char __user *ubuf, size_t cnt)
478 {
479         struct trace_pid_list *pid_list;
480         struct trace_parser parser;
481         unsigned long val;
482         int nr_pids = 0;
483         ssize_t read = 0;
484         ssize_t ret = 0;
485         loff_t pos;
486         pid_t pid;
487
488         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489                 return -ENOMEM;
490
491         /*
492          * Always recreate a new array. The write is an all or nothing
493          * operation. Always create a new array when adding new pids by
494          * the user. If the operation fails, then the current list is
495          * not modified.
496          */
497         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498         if (!pid_list)
499                 return -ENOMEM;
500
501         pid_list->pid_max = READ_ONCE(pid_max);
502
503         /* Only truncating will shrink pid_max */
504         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505                 pid_list->pid_max = filtered_pids->pid_max;
506
507         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508         if (!pid_list->pids) {
509                 kfree(pid_list);
510                 return -ENOMEM;
511         }
512
513         if (filtered_pids) {
514                 /* copy the current bits to the new max */
515                 for_each_set_bit(pid, filtered_pids->pids,
516                                  filtered_pids->pid_max) {
517                         set_bit(pid, pid_list->pids);
518                         nr_pids++;
519                 }
520         }
521
522         while (cnt > 0) {
523
524                 pos = 0;
525
526                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
527                 if (ret < 0 || !trace_parser_loaded(&parser))
528                         break;
529
530                 read += ret;
531                 ubuf += ret;
532                 cnt -= ret;
533
534                 ret = -EINVAL;
535                 if (kstrtoul(parser.buffer, 0, &val))
536                         break;
537                 if (val >= pid_list->pid_max)
538                         break;
539
540                 pid = (pid_t)val;
541
542                 set_bit(pid, pid_list->pids);
543                 nr_pids++;
544
545                 trace_parser_clear(&parser);
546                 ret = 0;
547         }
548         trace_parser_put(&parser);
549
550         if (ret < 0) {
551                 trace_free_pid_list(pid_list);
552                 return ret;
553         }
554
555         if (!nr_pids) {
556                 /* Cleared the list of pids */
557                 trace_free_pid_list(pid_list);
558                 read = ret;
559                 pid_list = NULL;
560         }
561
562         *new_pid_list = pid_list;
563
564         return read;
565 }
566
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569         u64 ts;
570
571         /* Early boot up does not have a buffer yet */
572         if (!buf->buffer)
573                 return trace_clock_local();
574
575         ts = ring_buffer_time_stamp(buf->buffer, cpu);
576         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577
578         return ts;
579 }
580
581 u64 ftrace_now(int cpu)
582 {
583         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597         /*
598          * For quick access (irqsoff uses this in fast path), just
599          * return the mirror variable of the state of the ring buffer.
600          * It's a little racy, but we don't really care.
601          */
602         smp_rmb();
603         return !global_trace.buffer_disabled;
604 }
605
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
617
618 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer            *trace_types __read_mostly;
622
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653
654 static inline void trace_access_lock(int cpu)
655 {
656         if (cpu == RING_BUFFER_ALL_CPUS) {
657                 /* gain it for accessing the whole ring buffer. */
658                 down_write(&all_cpu_access_lock);
659         } else {
660                 /* gain it for accessing a cpu ring buffer. */
661
662                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663                 down_read(&all_cpu_access_lock);
664
665                 /* Secondly block other access to this @cpu ring buffer. */
666                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
667         }
668 }
669
670 static inline void trace_access_unlock(int cpu)
671 {
672         if (cpu == RING_BUFFER_ALL_CPUS) {
673                 up_write(&all_cpu_access_lock);
674         } else {
675                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676                 up_read(&all_cpu_access_lock);
677         }
678 }
679
680 static inline void trace_access_lock_init(void)
681 {
682         int cpu;
683
684         for_each_possible_cpu(cpu)
685                 mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687
688 #else
689
690 static DEFINE_MUTEX(access_lock);
691
692 static inline void trace_access_lock(int cpu)
693 {
694         (void)cpu;
695         mutex_lock(&access_lock);
696 }
697
698 static inline void trace_access_unlock(int cpu)
699 {
700         (void)cpu;
701         mutex_unlock(&access_lock);
702 }
703
704 static inline void trace_access_lock_init(void)
705 {
706 }
707
708 #endif
709
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712                                  unsigned long flags,
713                                  int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715                                       struct ring_buffer *buffer,
716                                       unsigned long flags,
717                                       int skip, int pc, struct pt_regs *regs);
718
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                         unsigned long flags,
722                                         int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs)
729 {
730 }
731
732 #endif
733
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736                   int type, unsigned long flags, int pc)
737 {
738         struct trace_entry *ent = ring_buffer_event_data(event);
739
740         tracing_generic_entry_update(ent, flags, pc);
741         ent->type = type;
742 }
743
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746                           int type,
747                           unsigned long len,
748                           unsigned long flags, int pc)
749 {
750         struct ring_buffer_event *event;
751
752         event = ring_buffer_lock_reserve(buffer, len);
753         if (event != NULL)
754                 trace_event_setup(event, type, flags, pc);
755
756         return event;
757 }
758
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761         if (tr->trace_buffer.buffer)
762                 ring_buffer_record_on(tr->trace_buffer.buffer);
763         /*
764          * This flag is looked at when buffers haven't been allocated
765          * yet, or by some tracers (like irqsoff), that just want to
766          * know if the ring buffer has been disabled, but it can handle
767          * races of where it gets disabled but we still do a record.
768          * As the check is in the fast path of the tracers, it is more
769          * important to be fast than accurate.
770          */
771         tr->buffer_disabled = 0;
772         /* Make the flag seen by readers */
773         smp_wmb();
774 }
775
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784         tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787
788
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792         __this_cpu_write(trace_taskinfo_save, true);
793
794         /* If this is the temp buffer, we need to commit fully */
795         if (this_cpu_read(trace_buffered_event) == event) {
796                 /* Length is in event->array[0] */
797                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
798                 /* Release the temp buffer */
799                 this_cpu_dec(trace_buffered_event_cnt);
800         } else
801                 ring_buffer_unlock_commit(buffer, event);
802 }
803
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:    The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812         struct ring_buffer_event *event;
813         struct ring_buffer *buffer;
814         struct print_entry *entry;
815         unsigned long irq_flags;
816         int alloc;
817         int pc;
818
819         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820                 return 0;
821
822         pc = preempt_count();
823
824         if (unlikely(tracing_selftest_running || tracing_disabled))
825                 return 0;
826
827         alloc = sizeof(*entry) + size + 2; /* possible \n added */
828
829         local_save_flags(irq_flags);
830         buffer = global_trace.trace_buffer.buffer;
831         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
832                                             irq_flags, pc);
833         if (!event)
834                 return 0;
835
836         entry = ring_buffer_event_data(event);
837         entry->ip = ip;
838
839         memcpy(&entry->buf, str, size);
840
841         /* Add a newline if necessary */
842         if (entry->buf[size - 1] != '\n') {
843                 entry->buf[size] = '\n';
844                 entry->buf[size + 1] = '\0';
845         } else
846                 entry->buf[size] = '\0';
847
848         __buffer_unlock_commit(buffer, event);
849         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850
851         return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:    The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         struct bputs_entry *entry;
865         unsigned long irq_flags;
866         int size = sizeof(struct bputs_entry);
867         int pc;
868
869         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870                 return 0;
871
872         pc = preempt_count();
873
874         if (unlikely(tracing_selftest_running || tracing_disabled))
875                 return 0;
876
877         local_save_flags(irq_flags);
878         buffer = global_trace.trace_buffer.buffer;
879         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880                                             irq_flags, pc);
881         if (!event)
882                 return 0;
883
884         entry = ring_buffer_event_data(event);
885         entry->ip                       = ip;
886         entry->str                      = str;
887
888         __buffer_unlock_commit(buffer, event);
889         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890
891         return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898         struct tracer *tracer = tr->current_trace;
899         unsigned long flags;
900
901         if (in_nmi()) {
902                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903                 internal_trace_puts("*** snapshot is being ignored        ***\n");
904                 return;
905         }
906
907         if (!tr->allocated_snapshot) {
908                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909                 internal_trace_puts("*** stopping trace here!   ***\n");
910                 tracing_off();
911                 return;
912         }
913
914         /* Note, snapshot can not be used when the tracer uses it */
915         if (tracer->use_max_tr) {
916                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918                 return;
919         }
920
921         local_irq_save(flags);
922         update_max_tr(tr, current, smp_processor_id());
923         local_irq_restore(flags);
924 }
925
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942         struct trace_array *tr = &global_trace;
943
944         tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949                                         struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954         int ret;
955
956         if (!tr->allocated_snapshot) {
957
958                 /* allocate spare buffer */
959                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
960                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961                 if (ret < 0)
962                         return ret;
963
964                 tr->allocated_snapshot = true;
965         }
966
967         return 0;
968 }
969
970 static void free_snapshot(struct trace_array *tr)
971 {
972         /*
973          * We don't free the ring buffer. instead, resize it because
974          * The max_tr ring buffer has some state (e.g. ring->clock) and
975          * we want preserve it.
976          */
977         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978         set_buffer_entries(&tr->max_buffer, 1);
979         tracing_reset_online_cpus(&tr->max_buffer);
980         tr->allocated_snapshot = false;
981 }
982
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996         int ret;
997
998         ret = tracing_alloc_snapshot_instance(tr);
999         WARN_ON(ret < 0);
1000
1001         return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018         int ret;
1019
1020         ret = tracing_alloc_snapshot();
1021         if (ret < 0)
1022                 return;
1023
1024         tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036         return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041         /* Give warning */
1042         tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049         if (tr->trace_buffer.buffer)
1050                 ring_buffer_record_off(tr->trace_buffer.buffer);
1051         /*
1052          * This flag is looked at when buffers haven't been allocated
1053          * yet, or by some tracers (like irqsoff), that just want to
1054          * know if the ring buffer has been disabled, but it can handle
1055          * races of where it gets disabled but we still do a record.
1056          * As the check is in the fast path of the tracers, it is more
1057          * important to be fast than accurate.
1058          */
1059         tr->buffer_disabled = 1;
1060         /* Make the flag seen by readers */
1061         smp_wmb();
1062 }
1063
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074         tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077
1078 void disable_trace_on_warning(void)
1079 {
1080         if (__disable_trace_on_warning)
1081                 tracing_off();
1082 }
1083
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092         if (tr->trace_buffer.buffer)
1093                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094         return !tr->buffer_disabled;
1095 }
1096
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102         return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105
1106 static int __init set_buf_size(char *str)
1107 {
1108         unsigned long buf_size;
1109
1110         if (!str)
1111                 return 0;
1112         buf_size = memparse(str, &str);
1113         /* nr_entries can not be zero */
1114         if (buf_size == 0)
1115                 return 0;
1116         trace_buf_size = buf_size;
1117         return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123         unsigned long threshold;
1124         int ret;
1125
1126         if (!str)
1127                 return 0;
1128         ret = kstrtoul(str, 0, &threshold);
1129         if (ret < 0)
1130                 return 0;
1131         tracing_thresh = threshold * 1000;
1132         return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138         return nsecs / 1000;
1139 }
1140
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152         TRACE_FLAGS
1153         NULL
1154 };
1155
1156 static struct {
1157         u64 (*func)(void);
1158         const char *name;
1159         int in_ns;              /* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161         { trace_clock_local,            "local",        1 },
1162         { trace_clock_global,           "global",       1 },
1163         { trace_clock_counter,          "counter",      0 },
1164         { trace_clock_jiffies,          "uptime",       0 },
1165         { trace_clock,                  "perf",         1 },
1166         { ktime_get_mono_fast_ns,       "mono",         1 },
1167         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168         { ktime_get_boot_fast_ns,       "boot",         1 },
1169         ARCH_TRACE_CLOCKS
1170 };
1171
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174         if (trace_clocks[tr->clock_id].in_ns)
1175                 return true;
1176
1177         return false;
1178 }
1179
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185         memset(parser, 0, sizeof(*parser));
1186
1187         parser->buffer = kmalloc(size, GFP_KERNEL);
1188         if (!parser->buffer)
1189                 return 1;
1190
1191         parser->size = size;
1192         return 0;
1193 }
1194
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200         kfree(parser->buffer);
1201         parser->buffer = NULL;
1202 }
1203
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216         size_t cnt, loff_t *ppos)
1217 {
1218         char ch;
1219         size_t read = 0;
1220         ssize_t ret;
1221
1222         if (!*ppos)
1223                 trace_parser_clear(parser);
1224
1225         ret = get_user(ch, ubuf++);
1226         if (ret)
1227                 goto out;
1228
1229         read++;
1230         cnt--;
1231
1232         /*
1233          * The parser is not finished with the last write,
1234          * continue reading the user input without skipping spaces.
1235          */
1236         if (!parser->cont) {
1237                 /* skip white space */
1238                 while (cnt && isspace(ch)) {
1239                         ret = get_user(ch, ubuf++);
1240                         if (ret)
1241                                 goto out;
1242                         read++;
1243                         cnt--;
1244                 }
1245
1246                 parser->idx = 0;
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch) || !ch) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254         }
1255
1256         /* read the non-space input */
1257         while (cnt && !isspace(ch) && ch) {
1258                 if (parser->idx < parser->size - 1)
1259                         parser->buffer[parser->idx++] = ch;
1260                 else {
1261                         ret = -EINVAL;
1262                         goto out;
1263                 }
1264                 ret = get_user(ch, ubuf++);
1265                 if (ret)
1266                         goto out;
1267                 read++;
1268                 cnt--;
1269         }
1270
1271         /* We either got finished input or we have to wait for another call. */
1272         if (isspace(ch) || !ch) {
1273                 parser->buffer[parser->idx] = 0;
1274                 parser->cont = false;
1275         } else if (parser->idx < parser->size - 1) {
1276                 parser->cont = true;
1277                 parser->buffer[parser->idx++] = ch;
1278                 /* Make sure the parsed string always terminates with '\0'. */
1279                 parser->buffer[parser->idx] = 0;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364
1365         if (tr->stop_count)
1366                 return;
1367
1368         WARN_ON_ONCE(!irqs_disabled());
1369
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         buf = tr->trace_buffer.buffer;
1379         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380         tr->max_buffer.buffer = buf;
1381
1382         __update_max_tr(tr, tsk, cpu);
1383         arch_spin_unlock(&tr->max_lock);
1384 }
1385
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397         int ret;
1398
1399         if (tr->stop_count)
1400                 return;
1401
1402         WARN_ON_ONCE(!irqs_disabled());
1403         if (!tr->allocated_snapshot) {
1404                 /* Only the nop tracer should hit this when disabling */
1405                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406                 return;
1407         }
1408
1409         arch_spin_lock(&tr->max_lock);
1410
1411         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412
1413         if (ret == -EBUSY) {
1414                 /*
1415                  * We failed to swap the buffer due to a commit taking
1416                  * place on this CPU. We fail to record, but we reset
1417                  * the max trace buffer (no one writes directly to it)
1418                  * and flag that it failed.
1419                  */
1420                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421                         "Failed to swap buffers due to commit in progress\n");
1422         }
1423
1424         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425
1426         __update_max_tr(tr, tsk, cpu);
1427         arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433         /* Iterators are static, they should be filled or empty */
1434         if (trace_buffer_iter(iter, iter->cpu_file))
1435                 return 0;
1436
1437         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438                                 full);
1439 }
1440
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443
1444 struct trace_selftests {
1445         struct list_head                list;
1446         struct tracer                   *type;
1447 };
1448
1449 static LIST_HEAD(postponed_selftests);
1450
1451 static int save_selftest(struct tracer *type)
1452 {
1453         struct trace_selftests *selftest;
1454
1455         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456         if (!selftest)
1457                 return -ENOMEM;
1458
1459         selftest->type = type;
1460         list_add(&selftest->list, &postponed_selftests);
1461         return 0;
1462 }
1463
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466         struct trace_array *tr = &global_trace;
1467         struct tracer *saved_tracer = tr->current_trace;
1468         int ret;
1469
1470         if (!type->selftest || tracing_selftest_disabled)
1471                 return 0;
1472
1473         /*
1474          * If a tracer registers early in boot up (before scheduling is
1475          * initialized and such), then do not run its selftests yet.
1476          * Instead, run it a little later in the boot process.
1477          */
1478         if (!selftests_can_run)
1479                 return save_selftest(type);
1480
1481         /*
1482          * Run a selftest on this tracer.
1483          * Here we reset the trace buffer, and set the current
1484          * tracer to be this tracer. The tracer can then run some
1485          * internal tracing to verify that everything is in order.
1486          * If we fail, we do not register this tracer.
1487          */
1488         tracing_reset_online_cpus(&tr->trace_buffer);
1489
1490         tr->current_trace = type;
1491
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493         if (type->use_max_tr) {
1494                 /* If we expanded the buffers, make sure the max is expanded too */
1495                 if (ring_buffer_expanded)
1496                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497                                            RING_BUFFER_ALL_CPUS);
1498                 tr->allocated_snapshot = true;
1499         }
1500 #endif
1501
1502         /* the test is responsible for initializing and enabling */
1503         pr_info("Testing tracer %s: ", type->name);
1504         ret = type->selftest(type, tr);
1505         /* the test is responsible for resetting too */
1506         tr->current_trace = saved_tracer;
1507         if (ret) {
1508                 printk(KERN_CONT "FAILED!\n");
1509                 /* Add the warning after printing 'FAILED' */
1510                 WARN_ON(1);
1511                 return -1;
1512         }
1513         /* Only reset on passing, to avoid touching corrupted buffers */
1514         tracing_reset_online_cpus(&tr->trace_buffer);
1515
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517         if (type->use_max_tr) {
1518                 tr->allocated_snapshot = false;
1519
1520                 /* Shrink the max buffer again */
1521                 if (ring_buffer_expanded)
1522                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1523                                            RING_BUFFER_ALL_CPUS);
1524         }
1525 #endif
1526
1527         printk(KERN_CONT "PASSED\n");
1528         return 0;
1529 }
1530
1531 static __init int init_trace_selftests(void)
1532 {
1533         struct trace_selftests *p, *n;
1534         struct tracer *t, **last;
1535         int ret;
1536
1537         selftests_can_run = true;
1538
1539         mutex_lock(&trace_types_lock);
1540
1541         if (list_empty(&postponed_selftests))
1542                 goto out;
1543
1544         pr_info("Running postponed tracer tests:\n");
1545
1546         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547                 ret = run_tracer_selftest(p->type);
1548                 /* If the test fails, then warn and remove from available_tracers */
1549                 if (ret < 0) {
1550                         WARN(1, "tracer: %s failed selftest, disabling\n",
1551                              p->type->name);
1552                         last = &trace_types;
1553                         for (t = trace_types; t; t = t->next) {
1554                                 if (t == p->type) {
1555                                         *last = t->next;
1556                                         break;
1557                                 }
1558                                 last = &t->next;
1559                         }
1560                 }
1561                 list_del(&p->list);
1562                 kfree(p);
1563         }
1564
1565  out:
1566         mutex_unlock(&trace_types_lock);
1567
1568         return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574         return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579
1580 static void __init apply_trace_boot_options(void);
1581
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590         struct tracer *t;
1591         int ret = 0;
1592
1593         if (!type->name) {
1594                 pr_info("Tracer must have a name\n");
1595                 return -1;
1596         }
1597
1598         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600                 return -1;
1601         }
1602
1603         mutex_lock(&trace_types_lock);
1604
1605         tracing_selftest_running = true;
1606
1607         for (t = trace_types; t; t = t->next) {
1608                 if (strcmp(type->name, t->name) == 0) {
1609                         /* already found */
1610                         pr_info("Tracer %s already registered\n",
1611                                 type->name);
1612                         ret = -1;
1613                         goto out;
1614                 }
1615         }
1616
1617         if (!type->set_flag)
1618                 type->set_flag = &dummy_set_flag;
1619         if (!type->flags) {
1620                 /*allocate a dummy tracer_flags*/
1621                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622                 if (!type->flags) {
1623                         ret = -ENOMEM;
1624                         goto out;
1625                 }
1626                 type->flags->val = 0;
1627                 type->flags->opts = dummy_tracer_opt;
1628         } else
1629                 if (!type->flags->opts)
1630                         type->flags->opts = dummy_tracer_opt;
1631
1632         /* store the tracer for __set_tracer_option */
1633         type->flags->trace = type;
1634
1635         ret = run_tracer_selftest(type);
1636         if (ret < 0)
1637                 goto out;
1638
1639         type->next = trace_types;
1640         trace_types = type;
1641         add_tracer_options(&global_trace, type);
1642
1643  out:
1644         tracing_selftest_running = false;
1645         mutex_unlock(&trace_types_lock);
1646
1647         if (ret || !default_bootup_tracer)
1648                 goto out_unlock;
1649
1650         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651                 goto out_unlock;
1652
1653         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654         /* Do we want this tracer to start on bootup? */
1655         tracing_set_tracer(&global_trace, type->name);
1656         default_bootup_tracer = NULL;
1657
1658         apply_trace_boot_options();
1659
1660         /* disable other selftests, since this will break it. */
1661         tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664                type->name);
1665 #endif
1666
1667  out_unlock:
1668         return ret;
1669 }
1670
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673         struct ring_buffer *buffer = buf->buffer;
1674
1675         if (!buffer)
1676                 return;
1677
1678         ring_buffer_record_disable(buffer);
1679
1680         /* Make sure all commits have finished */
1681         synchronize_sched();
1682         ring_buffer_reset_cpu(buffer, cpu);
1683
1684         ring_buffer_record_enable(buffer);
1685 }
1686
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689         struct ring_buffer *buffer = buf->buffer;
1690         int cpu;
1691
1692         if (!buffer)
1693                 return;
1694
1695         ring_buffer_record_disable(buffer);
1696
1697         /* Make sure all commits have finished */
1698         synchronize_sched();
1699
1700         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701
1702         for_each_online_cpu(cpu)
1703                 ring_buffer_reset_cpu(buffer, cpu);
1704
1705         ring_buffer_record_enable(buffer);
1706 }
1707
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711         struct trace_array *tr;
1712
1713         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714                 if (!tr->clear_trace)
1715                         continue;
1716                 tr->clear_trace = false;
1717                 tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719                 tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721         }
1722 }
1723
1724 static int *tgid_map;
1725
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731         unsigned *map_cmdline_to_pid;
1732         unsigned cmdline_num;
1733         int cmdline_idx;
1734         char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752                                     struct saved_cmdlines_buffer *s)
1753 {
1754         s->map_cmdline_to_pid = kmalloc_array(val,
1755                                               sizeof(*s->map_cmdline_to_pid),
1756                                               GFP_KERNEL);
1757         if (!s->map_cmdline_to_pid)
1758                 return -ENOMEM;
1759
1760         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1761         if (!s->saved_cmdlines) {
1762                 kfree(s->map_cmdline_to_pid);
1763                 return -ENOMEM;
1764         }
1765
1766         s->cmdline_idx = 0;
1767         s->cmdline_num = val;
1768         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1769                sizeof(s->map_pid_to_cmdline));
1770         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1771                val * sizeof(*s->map_cmdline_to_pid));
1772
1773         return 0;
1774 }
1775
1776 static int trace_create_savedcmd(void)
1777 {
1778         int ret;
1779
1780         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1781         if (!savedcmd)
1782                 return -ENOMEM;
1783
1784         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1785         if (ret < 0) {
1786                 kfree(savedcmd);
1787                 savedcmd = NULL;
1788                 return -ENOMEM;
1789         }
1790
1791         return 0;
1792 }
1793
1794 int is_tracing_stopped(void)
1795 {
1796         return global_trace.stop_count;
1797 }
1798
1799 /**
1800  * tracing_start - quick start of the tracer
1801  *
1802  * If tracing is enabled but was stopped by tracing_stop,
1803  * this will start the tracer back up.
1804  */
1805 void tracing_start(void)
1806 {
1807         struct ring_buffer *buffer;
1808         unsigned long flags;
1809
1810         if (tracing_disabled)
1811                 return;
1812
1813         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1814         if (--global_trace.stop_count) {
1815                 if (global_trace.stop_count < 0) {
1816                         /* Someone screwed up their debugging */
1817                         WARN_ON_ONCE(1);
1818                         global_trace.stop_count = 0;
1819                 }
1820                 goto out;
1821         }
1822
1823         /* Prevent the buffers from switching */
1824         arch_spin_lock(&global_trace.max_lock);
1825
1826         buffer = global_trace.trace_buffer.buffer;
1827         if (buffer)
1828                 ring_buffer_record_enable(buffer);
1829
1830 #ifdef CONFIG_TRACER_MAX_TRACE
1831         buffer = global_trace.max_buffer.buffer;
1832         if (buffer)
1833                 ring_buffer_record_enable(buffer);
1834 #endif
1835
1836         arch_spin_unlock(&global_trace.max_lock);
1837
1838  out:
1839         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1840 }
1841
1842 static void tracing_start_tr(struct trace_array *tr)
1843 {
1844         struct ring_buffer *buffer;
1845         unsigned long flags;
1846
1847         if (tracing_disabled)
1848                 return;
1849
1850         /* If global, we need to also start the max tracer */
1851         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1852                 return tracing_start();
1853
1854         raw_spin_lock_irqsave(&tr->start_lock, flags);
1855
1856         if (--tr->stop_count) {
1857                 if (tr->stop_count < 0) {
1858                         /* Someone screwed up their debugging */
1859                         WARN_ON_ONCE(1);
1860                         tr->stop_count = 0;
1861                 }
1862                 goto out;
1863         }
1864
1865         buffer = tr->trace_buffer.buffer;
1866         if (buffer)
1867                 ring_buffer_record_enable(buffer);
1868
1869  out:
1870         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1871 }
1872
1873 /**
1874  * tracing_stop - quick stop of the tracer
1875  *
1876  * Light weight way to stop tracing. Use in conjunction with
1877  * tracing_start.
1878  */
1879 void tracing_stop(void)
1880 {
1881         struct ring_buffer *buffer;
1882         unsigned long flags;
1883
1884         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1885         if (global_trace.stop_count++)
1886                 goto out;
1887
1888         /* Prevent the buffers from switching */
1889         arch_spin_lock(&global_trace.max_lock);
1890
1891         buffer = global_trace.trace_buffer.buffer;
1892         if (buffer)
1893                 ring_buffer_record_disable(buffer);
1894
1895 #ifdef CONFIG_TRACER_MAX_TRACE
1896         buffer = global_trace.max_buffer.buffer;
1897         if (buffer)
1898                 ring_buffer_record_disable(buffer);
1899 #endif
1900
1901         arch_spin_unlock(&global_trace.max_lock);
1902
1903  out:
1904         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1905 }
1906
1907 static void tracing_stop_tr(struct trace_array *tr)
1908 {
1909         struct ring_buffer *buffer;
1910         unsigned long flags;
1911
1912         /* If global, we need to also stop the max tracer */
1913         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1914                 return tracing_stop();
1915
1916         raw_spin_lock_irqsave(&tr->start_lock, flags);
1917         if (tr->stop_count++)
1918                 goto out;
1919
1920         buffer = tr->trace_buffer.buffer;
1921         if (buffer)
1922                 ring_buffer_record_disable(buffer);
1923
1924  out:
1925         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1926 }
1927
1928 static int trace_save_cmdline(struct task_struct *tsk)
1929 {
1930         unsigned pid, idx;
1931
1932         /* treat recording of idle task as a success */
1933         if (!tsk->pid)
1934                 return 1;
1935
1936         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1937                 return 0;
1938
1939         /*
1940          * It's not the end of the world if we don't get
1941          * the lock, but we also don't want to spin
1942          * nor do we want to disable interrupts,
1943          * so if we miss here, then better luck next time.
1944          */
1945         if (!arch_spin_trylock(&trace_cmdline_lock))
1946                 return 0;
1947
1948         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1949         if (idx == NO_CMDLINE_MAP) {
1950                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1951
1952                 /*
1953                  * Check whether the cmdline buffer at idx has a pid
1954                  * mapped. We are going to overwrite that entry so we
1955                  * need to clear the map_pid_to_cmdline. Otherwise we
1956                  * would read the new comm for the old pid.
1957                  */
1958                 pid = savedcmd->map_cmdline_to_pid[idx];
1959                 if (pid != NO_CMDLINE_MAP)
1960                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1961
1962                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1963                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1964
1965                 savedcmd->cmdline_idx = idx;
1966         }
1967
1968         set_cmdline(idx, tsk->comm);
1969
1970         arch_spin_unlock(&trace_cmdline_lock);
1971
1972         return 1;
1973 }
1974
1975 static void __trace_find_cmdline(int pid, char comm[])
1976 {
1977         unsigned map;
1978
1979         if (!pid) {
1980                 strcpy(comm, "<idle>");
1981                 return;
1982         }
1983
1984         if (WARN_ON_ONCE(pid < 0)) {
1985                 strcpy(comm, "<XXX>");
1986                 return;
1987         }
1988
1989         if (pid > PID_MAX_DEFAULT) {
1990                 strcpy(comm, "<...>");
1991                 return;
1992         }
1993
1994         map = savedcmd->map_pid_to_cmdline[pid];
1995         if (map != NO_CMDLINE_MAP)
1996                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1997         else
1998                 strcpy(comm, "<...>");
1999 }
2000
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003         preempt_disable();
2004         arch_spin_lock(&trace_cmdline_lock);
2005
2006         __trace_find_cmdline(pid, comm);
2007
2008         arch_spin_unlock(&trace_cmdline_lock);
2009         preempt_enable();
2010 }
2011
2012 int trace_find_tgid(int pid)
2013 {
2014         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2015                 return 0;
2016
2017         return tgid_map[pid];
2018 }
2019
2020 static int trace_save_tgid(struct task_struct *tsk)
2021 {
2022         /* treat recording of idle task as a success */
2023         if (!tsk->pid)
2024                 return 1;
2025
2026         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2027                 return 0;
2028
2029         tgid_map[tsk->pid] = tsk->tgid;
2030         return 1;
2031 }
2032
2033 static bool tracing_record_taskinfo_skip(int flags)
2034 {
2035         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2036                 return true;
2037         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2038                 return true;
2039         if (!__this_cpu_read(trace_taskinfo_save))
2040                 return true;
2041         return false;
2042 }
2043
2044 /**
2045  * tracing_record_taskinfo - record the task info of a task
2046  *
2047  * @task  - task to record
2048  * @flags - TRACE_RECORD_CMDLINE for recording comm
2049  *        - TRACE_RECORD_TGID for recording tgid
2050  */
2051 void tracing_record_taskinfo(struct task_struct *task, int flags)
2052 {
2053         bool done;
2054
2055         if (tracing_record_taskinfo_skip(flags))
2056                 return;
2057
2058         /*
2059          * Record as much task information as possible. If some fail, continue
2060          * to try to record the others.
2061          */
2062         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2063         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2064
2065         /* If recording any information failed, retry again soon. */
2066         if (!done)
2067                 return;
2068
2069         __this_cpu_write(trace_taskinfo_save, false);
2070 }
2071
2072 /**
2073  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2074  *
2075  * @prev - previous task during sched_switch
2076  * @next - next task during sched_switch
2077  * @flags - TRACE_RECORD_CMDLINE for recording comm
2078  *          TRACE_RECORD_TGID for recording tgid
2079  */
2080 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2081                                           struct task_struct *next, int flags)
2082 {
2083         bool done;
2084
2085         if (tracing_record_taskinfo_skip(flags))
2086                 return;
2087
2088         /*
2089          * Record as much task information as possible. If some fail, continue
2090          * to try to record the others.
2091          */
2092         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2093         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2094         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2095         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2096
2097         /* If recording any information failed, retry again soon. */
2098         if (!done)
2099                 return;
2100
2101         __this_cpu_write(trace_taskinfo_save, false);
2102 }
2103
2104 /* Helpers to record a specific task information */
2105 void tracing_record_cmdline(struct task_struct *task)
2106 {
2107         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2108 }
2109
2110 void tracing_record_tgid(struct task_struct *task)
2111 {
2112         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2113 }
2114
2115 /*
2116  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2117  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2118  * simplifies those functions and keeps them in sync.
2119  */
2120 enum print_line_t trace_handle_return(struct trace_seq *s)
2121 {
2122         return trace_seq_has_overflowed(s) ?
2123                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2124 }
2125 EXPORT_SYMBOL_GPL(trace_handle_return);
2126
2127 void
2128 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2129                              int pc)
2130 {
2131         struct task_struct *tsk = current;
2132
2133         entry->preempt_count            = pc & 0xff;
2134         entry->pid                      = (tsk) ? tsk->pid : 0;
2135         entry->flags =
2136 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2137                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2138 #else
2139                 TRACE_FLAG_IRQS_NOSUPPORT |
2140 #endif
2141                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2142                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2143                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2144                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2145                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2146 }
2147 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2148
2149 struct ring_buffer_event *
2150 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2151                           int type,
2152                           unsigned long len,
2153                           unsigned long flags, int pc)
2154 {
2155         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2156 }
2157
2158 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2159 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2160 static int trace_buffered_event_ref;
2161
2162 /**
2163  * trace_buffered_event_enable - enable buffering events
2164  *
2165  * When events are being filtered, it is quicker to use a temporary
2166  * buffer to write the event data into if there's a likely chance
2167  * that it will not be committed. The discard of the ring buffer
2168  * is not as fast as committing, and is much slower than copying
2169  * a commit.
2170  *
2171  * When an event is to be filtered, allocate per cpu buffers to
2172  * write the event data into, and if the event is filtered and discarded
2173  * it is simply dropped, otherwise, the entire data is to be committed
2174  * in one shot.
2175  */
2176 void trace_buffered_event_enable(void)
2177 {
2178         struct ring_buffer_event *event;
2179         struct page *page;
2180         int cpu;
2181
2182         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2183
2184         if (trace_buffered_event_ref++)
2185                 return;
2186
2187         for_each_tracing_cpu(cpu) {
2188                 page = alloc_pages_node(cpu_to_node(cpu),
2189                                         GFP_KERNEL | __GFP_NORETRY, 0);
2190                 if (!page)
2191                         goto failed;
2192
2193                 event = page_address(page);
2194                 memset(event, 0, sizeof(*event));
2195
2196                 per_cpu(trace_buffered_event, cpu) = event;
2197
2198                 preempt_disable();
2199                 if (cpu == smp_processor_id() &&
2200                     this_cpu_read(trace_buffered_event) !=
2201                     per_cpu(trace_buffered_event, cpu))
2202                         WARN_ON_ONCE(1);
2203                 preempt_enable();
2204         }
2205
2206         return;
2207  failed:
2208         trace_buffered_event_disable();
2209 }
2210
2211 static void enable_trace_buffered_event(void *data)
2212 {
2213         /* Probably not needed, but do it anyway */
2214         smp_rmb();
2215         this_cpu_dec(trace_buffered_event_cnt);
2216 }
2217
2218 static void disable_trace_buffered_event(void *data)
2219 {
2220         this_cpu_inc(trace_buffered_event_cnt);
2221 }
2222
2223 /**
2224  * trace_buffered_event_disable - disable buffering events
2225  *
2226  * When a filter is removed, it is faster to not use the buffered
2227  * events, and to commit directly into the ring buffer. Free up
2228  * the temp buffers when there are no more users. This requires
2229  * special synchronization with current events.
2230  */
2231 void trace_buffered_event_disable(void)
2232 {
2233         int cpu;
2234
2235         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2236
2237         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2238                 return;
2239
2240         if (--trace_buffered_event_ref)
2241                 return;
2242
2243         preempt_disable();
2244         /* For each CPU, set the buffer as used. */
2245         smp_call_function_many(tracing_buffer_mask,
2246                                disable_trace_buffered_event, NULL, 1);
2247         preempt_enable();
2248
2249         /* Wait for all current users to finish */
2250         synchronize_sched();
2251
2252         for_each_tracing_cpu(cpu) {
2253                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2254                 per_cpu(trace_buffered_event, cpu) = NULL;
2255         }
2256         /*
2257          * Make sure trace_buffered_event is NULL before clearing
2258          * trace_buffered_event_cnt.
2259          */
2260         smp_wmb();
2261
2262         preempt_disable();
2263         /* Do the work on each cpu */
2264         smp_call_function_many(tracing_buffer_mask,
2265                                enable_trace_buffered_event, NULL, 1);
2266         preempt_enable();
2267 }
2268
2269 static struct ring_buffer *temp_buffer;
2270
2271 struct ring_buffer_event *
2272 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2273                           struct trace_event_file *trace_file,
2274                           int type, unsigned long len,
2275                           unsigned long flags, int pc)
2276 {
2277         struct ring_buffer_event *entry;
2278         int val;
2279
2280         *current_rb = trace_file->tr->trace_buffer.buffer;
2281
2282         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2283              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2284             (entry = this_cpu_read(trace_buffered_event))) {
2285                 /* Try to use the per cpu buffer first */
2286                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2287                 if (val == 1) {
2288                         trace_event_setup(entry, type, flags, pc);
2289                         entry->array[0] = len;
2290                         return entry;
2291                 }
2292                 this_cpu_dec(trace_buffered_event_cnt);
2293         }
2294
2295         entry = __trace_buffer_lock_reserve(*current_rb,
2296                                             type, len, flags, pc);
2297         /*
2298          * If tracing is off, but we have triggers enabled
2299          * we still need to look at the event data. Use the temp_buffer
2300          * to store the trace event for the tigger to use. It's recusive
2301          * safe and will not be recorded anywhere.
2302          */
2303         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2304                 *current_rb = temp_buffer;
2305                 entry = __trace_buffer_lock_reserve(*current_rb,
2306                                                     type, len, flags, pc);
2307         }
2308         return entry;
2309 }
2310 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2311
2312 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2313 static DEFINE_MUTEX(tracepoint_printk_mutex);
2314
2315 static void output_printk(struct trace_event_buffer *fbuffer)
2316 {
2317         struct trace_event_call *event_call;
2318         struct trace_event *event;
2319         unsigned long flags;
2320         struct trace_iterator *iter = tracepoint_print_iter;
2321
2322         /* We should never get here if iter is NULL */
2323         if (WARN_ON_ONCE(!iter))
2324                 return;
2325
2326         event_call = fbuffer->trace_file->event_call;
2327         if (!event_call || !event_call->event.funcs ||
2328             !event_call->event.funcs->trace)
2329                 return;
2330
2331         event = &fbuffer->trace_file->event_call->event;
2332
2333         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2334         trace_seq_init(&iter->seq);
2335         iter->ent = fbuffer->entry;
2336         event_call->event.funcs->trace(iter, 0, event);
2337         trace_seq_putc(&iter->seq, 0);
2338         printk("%s", iter->seq.buffer);
2339
2340         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2341 }
2342
2343 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2344                              void __user *buffer, size_t *lenp,
2345                              loff_t *ppos)
2346 {
2347         int save_tracepoint_printk;
2348         int ret;
2349
2350         mutex_lock(&tracepoint_printk_mutex);
2351         save_tracepoint_printk = tracepoint_printk;
2352
2353         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2354
2355         /*
2356          * This will force exiting early, as tracepoint_printk
2357          * is always zero when tracepoint_printk_iter is not allocated
2358          */
2359         if (!tracepoint_print_iter)
2360                 tracepoint_printk = 0;
2361
2362         if (save_tracepoint_printk == tracepoint_printk)
2363                 goto out;
2364
2365         if (tracepoint_printk)
2366                 static_key_enable(&tracepoint_printk_key.key);
2367         else
2368                 static_key_disable(&tracepoint_printk_key.key);
2369
2370  out:
2371         mutex_unlock(&tracepoint_printk_mutex);
2372
2373         return ret;
2374 }
2375
2376 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2377 {
2378         if (static_key_false(&tracepoint_printk_key.key))
2379                 output_printk(fbuffer);
2380
2381         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2382                                     fbuffer->event, fbuffer->entry,
2383                                     fbuffer->flags, fbuffer->pc);
2384 }
2385 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2386
2387 /*
2388  * Skip 3:
2389  *
2390  *   trace_buffer_unlock_commit_regs()
2391  *   trace_event_buffer_commit()
2392  *   trace_event_raw_event_xxx()
2393  */
2394 # define STACK_SKIP 3
2395
2396 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2397                                      struct ring_buffer *buffer,
2398                                      struct ring_buffer_event *event,
2399                                      unsigned long flags, int pc,
2400                                      struct pt_regs *regs)
2401 {
2402         __buffer_unlock_commit(buffer, event);
2403
2404         /*
2405          * If regs is not set, then skip the necessary functions.
2406          * Note, we can still get here via blktrace, wakeup tracer
2407          * and mmiotrace, but that's ok if they lose a function or
2408          * two. They are not that meaningful.
2409          */
2410         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2411         ftrace_trace_userstack(buffer, flags, pc);
2412 }
2413
2414 /*
2415  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2416  */
2417 void
2418 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2419                                    struct ring_buffer_event *event)
2420 {
2421         __buffer_unlock_commit(buffer, event);
2422 }
2423
2424 static void
2425 trace_process_export(struct trace_export *export,
2426                struct ring_buffer_event *event)
2427 {
2428         struct trace_entry *entry;
2429         unsigned int size = 0;
2430
2431         entry = ring_buffer_event_data(event);
2432         size = ring_buffer_event_length(event);
2433         export->write(export, entry, size);
2434 }
2435
2436 static DEFINE_MUTEX(ftrace_export_lock);
2437
2438 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2439
2440 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2441
2442 static inline void ftrace_exports_enable(void)
2443 {
2444         static_branch_enable(&ftrace_exports_enabled);
2445 }
2446
2447 static inline void ftrace_exports_disable(void)
2448 {
2449         static_branch_disable(&ftrace_exports_enabled);
2450 }
2451
2452 void ftrace_exports(struct ring_buffer_event *event)
2453 {
2454         struct trace_export *export;
2455
2456         preempt_disable_notrace();
2457
2458         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2459         while (export) {
2460                 trace_process_export(export, event);
2461                 export = rcu_dereference_raw_notrace(export->next);
2462         }
2463
2464         preempt_enable_notrace();
2465 }
2466
2467 static inline void
2468 add_trace_export(struct trace_export **list, struct trace_export *export)
2469 {
2470         rcu_assign_pointer(export->next, *list);
2471         /*
2472          * We are entering export into the list but another
2473          * CPU might be walking that list. We need to make sure
2474          * the export->next pointer is valid before another CPU sees
2475          * the export pointer included into the list.
2476          */
2477         rcu_assign_pointer(*list, export);
2478 }
2479
2480 static inline int
2481 rm_trace_export(struct trace_export **list, struct trace_export *export)
2482 {
2483         struct trace_export **p;
2484
2485         for (p = list; *p != NULL; p = &(*p)->next)
2486                 if (*p == export)
2487                         break;
2488
2489         if (*p != export)
2490                 return -1;
2491
2492         rcu_assign_pointer(*p, (*p)->next);
2493
2494         return 0;
2495 }
2496
2497 static inline void
2498 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2499 {
2500         if (*list == NULL)
2501                 ftrace_exports_enable();
2502
2503         add_trace_export(list, export);
2504 }
2505
2506 static inline int
2507 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2508 {
2509         int ret;
2510
2511         ret = rm_trace_export(list, export);
2512         if (*list == NULL)
2513                 ftrace_exports_disable();
2514
2515         return ret;
2516 }
2517
2518 int register_ftrace_export(struct trace_export *export)
2519 {
2520         if (WARN_ON_ONCE(!export->write))
2521                 return -1;
2522
2523         mutex_lock(&ftrace_export_lock);
2524
2525         add_ftrace_export(&ftrace_exports_list, export);
2526
2527         mutex_unlock(&ftrace_export_lock);
2528
2529         return 0;
2530 }
2531 EXPORT_SYMBOL_GPL(register_ftrace_export);
2532
2533 int unregister_ftrace_export(struct trace_export *export)
2534 {
2535         int ret;
2536
2537         mutex_lock(&ftrace_export_lock);
2538
2539         ret = rm_ftrace_export(&ftrace_exports_list, export);
2540
2541         mutex_unlock(&ftrace_export_lock);
2542
2543         return ret;
2544 }
2545 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2546
2547 void
2548 trace_function(struct trace_array *tr,
2549                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2550                int pc)
2551 {
2552         struct trace_event_call *call = &event_function;
2553         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2554         struct ring_buffer_event *event;
2555         struct ftrace_entry *entry;
2556
2557         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2558                                             flags, pc);
2559         if (!event)
2560                 return;
2561         entry   = ring_buffer_event_data(event);
2562         entry->ip                       = ip;
2563         entry->parent_ip                = parent_ip;
2564
2565         if (!call_filter_check_discard(call, entry, buffer, event)) {
2566                 if (static_branch_unlikely(&ftrace_exports_enabled))
2567                         ftrace_exports(event);
2568                 __buffer_unlock_commit(buffer, event);
2569         }
2570 }
2571
2572 #ifdef CONFIG_STACKTRACE
2573
2574 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2575 struct ftrace_stack {
2576         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2577 };
2578
2579 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2580 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2581
2582 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2583                                  unsigned long flags,
2584                                  int skip, int pc, struct pt_regs *regs)
2585 {
2586         struct trace_event_call *call = &event_kernel_stack;
2587         struct ring_buffer_event *event;
2588         struct stack_entry *entry;
2589         struct stack_trace trace;
2590         int use_stack;
2591         int size = FTRACE_STACK_ENTRIES;
2592
2593         trace.nr_entries        = 0;
2594         trace.skip              = skip;
2595
2596         /*
2597          * Add one, for this function and the call to save_stack_trace()
2598          * If regs is set, then these functions will not be in the way.
2599          */
2600 #ifndef CONFIG_UNWINDER_ORC
2601         if (!regs)
2602                 trace.skip++;
2603 #endif
2604
2605         /*
2606          * Since events can happen in NMIs there's no safe way to
2607          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2608          * or NMI comes in, it will just have to use the default
2609          * FTRACE_STACK_SIZE.
2610          */
2611         preempt_disable_notrace();
2612
2613         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2614         /*
2615          * We don't need any atomic variables, just a barrier.
2616          * If an interrupt comes in, we don't care, because it would
2617          * have exited and put the counter back to what we want.
2618          * We just need a barrier to keep gcc from moving things
2619          * around.
2620          */
2621         barrier();
2622         if (use_stack == 1) {
2623                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2624                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2625
2626                 if (regs)
2627                         save_stack_trace_regs(regs, &trace);
2628                 else
2629                         save_stack_trace(&trace);
2630
2631                 if (trace.nr_entries > size)
2632                         size = trace.nr_entries;
2633         } else
2634                 /* From now on, use_stack is a boolean */
2635                 use_stack = 0;
2636
2637         size *= sizeof(unsigned long);
2638
2639         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2640                                             sizeof(*entry) + size, flags, pc);
2641         if (!event)
2642                 goto out;
2643         entry = ring_buffer_event_data(event);
2644
2645         memset(&entry->caller, 0, size);
2646
2647         if (use_stack)
2648                 memcpy(&entry->caller, trace.entries,
2649                        trace.nr_entries * sizeof(unsigned long));
2650         else {
2651                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2652                 trace.entries           = entry->caller;
2653                 if (regs)
2654                         save_stack_trace_regs(regs, &trace);
2655                 else
2656                         save_stack_trace(&trace);
2657         }
2658
2659         entry->size = trace.nr_entries;
2660
2661         if (!call_filter_check_discard(call, entry, buffer, event))
2662                 __buffer_unlock_commit(buffer, event);
2663
2664  out:
2665         /* Again, don't let gcc optimize things here */
2666         barrier();
2667         __this_cpu_dec(ftrace_stack_reserve);
2668         preempt_enable_notrace();
2669
2670 }
2671
2672 static inline void ftrace_trace_stack(struct trace_array *tr,
2673                                       struct ring_buffer *buffer,
2674                                       unsigned long flags,
2675                                       int skip, int pc, struct pt_regs *regs)
2676 {
2677         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2678                 return;
2679
2680         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2681 }
2682
2683 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2684                    int pc)
2685 {
2686         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2687
2688         if (rcu_is_watching()) {
2689                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2690                 return;
2691         }
2692
2693         /*
2694          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2695          * but if the above rcu_is_watching() failed, then the NMI
2696          * triggered someplace critical, and rcu_irq_enter() should
2697          * not be called from NMI.
2698          */
2699         if (unlikely(in_nmi()))
2700                 return;
2701
2702         rcu_irq_enter_irqson();
2703         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2704         rcu_irq_exit_irqson();
2705 }
2706
2707 /**
2708  * trace_dump_stack - record a stack back trace in the trace buffer
2709  * @skip: Number of functions to skip (helper handlers)
2710  */
2711 void trace_dump_stack(int skip)
2712 {
2713         unsigned long flags;
2714
2715         if (tracing_disabled || tracing_selftest_running)
2716                 return;
2717
2718         local_save_flags(flags);
2719
2720 #ifndef CONFIG_UNWINDER_ORC
2721         /* Skip 1 to skip this function. */
2722         skip++;
2723 #endif
2724         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2725                              flags, skip, preempt_count(), NULL);
2726 }
2727
2728 static DEFINE_PER_CPU(int, user_stack_count);
2729
2730 void
2731 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2732 {
2733         struct trace_event_call *call = &event_user_stack;
2734         struct ring_buffer_event *event;
2735         struct userstack_entry *entry;
2736         struct stack_trace trace;
2737
2738         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2739                 return;
2740
2741         /*
2742          * NMIs can not handle page faults, even with fix ups.
2743          * The save user stack can (and often does) fault.
2744          */
2745         if (unlikely(in_nmi()))
2746                 return;
2747
2748         /*
2749          * prevent recursion, since the user stack tracing may
2750          * trigger other kernel events.
2751          */
2752         preempt_disable();
2753         if (__this_cpu_read(user_stack_count))
2754                 goto out;
2755
2756         __this_cpu_inc(user_stack_count);
2757
2758         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2759                                             sizeof(*entry), flags, pc);
2760         if (!event)
2761                 goto out_drop_count;
2762         entry   = ring_buffer_event_data(event);
2763
2764         entry->tgid             = current->tgid;
2765         memset(&entry->caller, 0, sizeof(entry->caller));
2766
2767         trace.nr_entries        = 0;
2768         trace.max_entries       = FTRACE_STACK_ENTRIES;
2769         trace.skip              = 0;
2770         trace.entries           = entry->caller;
2771
2772         save_stack_trace_user(&trace);
2773         if (!call_filter_check_discard(call, entry, buffer, event))
2774                 __buffer_unlock_commit(buffer, event);
2775
2776  out_drop_count:
2777         __this_cpu_dec(user_stack_count);
2778  out:
2779         preempt_enable();
2780 }
2781
2782 #ifdef UNUSED
2783 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2784 {
2785         ftrace_trace_userstack(tr, flags, preempt_count());
2786 }
2787 #endif /* UNUSED */
2788
2789 #endif /* CONFIG_STACKTRACE */
2790
2791 /* created for use with alloc_percpu */
2792 struct trace_buffer_struct {
2793         int nesting;
2794         char buffer[4][TRACE_BUF_SIZE];
2795 };
2796
2797 static struct trace_buffer_struct *trace_percpu_buffer;
2798
2799 /*
2800  * Thise allows for lockless recording.  If we're nested too deeply, then
2801  * this returns NULL.
2802  */
2803 static char *get_trace_buf(void)
2804 {
2805         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2806
2807         if (!buffer || buffer->nesting >= 4)
2808                 return NULL;
2809
2810         buffer->nesting++;
2811
2812         /* Interrupts must see nesting incremented before we use the buffer */
2813         barrier();
2814         return &buffer->buffer[buffer->nesting][0];
2815 }
2816
2817 static void put_trace_buf(void)
2818 {
2819         /* Don't let the decrement of nesting leak before this */
2820         barrier();
2821         this_cpu_dec(trace_percpu_buffer->nesting);
2822 }
2823
2824 static int alloc_percpu_trace_buffer(void)
2825 {
2826         struct trace_buffer_struct *buffers;
2827
2828         buffers = alloc_percpu(struct trace_buffer_struct);
2829         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2830                 return -ENOMEM;
2831
2832         trace_percpu_buffer = buffers;
2833         return 0;
2834 }
2835
2836 static int buffers_allocated;
2837
2838 void trace_printk_init_buffers(void)
2839 {
2840         if (buffers_allocated)
2841                 return;
2842
2843         if (alloc_percpu_trace_buffer())
2844                 return;
2845
2846         /* trace_printk() is for debug use only. Don't use it in production. */
2847
2848         pr_warn("\n");
2849         pr_warn("**********************************************************\n");
2850         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2851         pr_warn("**                                                      **\n");
2852         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2853         pr_warn("**                                                      **\n");
2854         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2855         pr_warn("** unsafe for production use.                           **\n");
2856         pr_warn("**                                                      **\n");
2857         pr_warn("** If you see this message and you are not debugging    **\n");
2858         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2859         pr_warn("**                                                      **\n");
2860         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2861         pr_warn("**********************************************************\n");
2862
2863         /* Expand the buffers to set size */
2864         tracing_update_buffers();
2865
2866         buffers_allocated = 1;
2867
2868         /*
2869          * trace_printk_init_buffers() can be called by modules.
2870          * If that happens, then we need to start cmdline recording
2871          * directly here. If the global_trace.buffer is already
2872          * allocated here, then this was called by module code.
2873          */
2874         if (global_trace.trace_buffer.buffer)
2875                 tracing_start_cmdline_record();
2876 }
2877
2878 void trace_printk_start_comm(void)
2879 {
2880         /* Start tracing comms if trace printk is set */
2881         if (!buffers_allocated)
2882                 return;
2883         tracing_start_cmdline_record();
2884 }
2885
2886 static void trace_printk_start_stop_comm(int enabled)
2887 {
2888         if (!buffers_allocated)
2889                 return;
2890
2891         if (enabled)
2892                 tracing_start_cmdline_record();
2893         else
2894                 tracing_stop_cmdline_record();
2895 }
2896
2897 /**
2898  * trace_vbprintk - write binary msg to tracing buffer
2899  *
2900  */
2901 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2902 {
2903         struct trace_event_call *call = &event_bprint;
2904         struct ring_buffer_event *event;
2905         struct ring_buffer *buffer;
2906         struct trace_array *tr = &global_trace;
2907         struct bprint_entry *entry;
2908         unsigned long flags;
2909         char *tbuffer;
2910         int len = 0, size, pc;
2911
2912         if (unlikely(tracing_selftest_running || tracing_disabled))
2913                 return 0;
2914
2915         /* Don't pollute graph traces with trace_vprintk internals */
2916         pause_graph_tracing();
2917
2918         pc = preempt_count();
2919         preempt_disable_notrace();
2920
2921         tbuffer = get_trace_buf();
2922         if (!tbuffer) {
2923                 len = 0;
2924                 goto out_nobuffer;
2925         }
2926
2927         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2928
2929         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2930                 goto out;
2931
2932         local_save_flags(flags);
2933         size = sizeof(*entry) + sizeof(u32) * len;
2934         buffer = tr->trace_buffer.buffer;
2935         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2936                                             flags, pc);
2937         if (!event)
2938                 goto out;
2939         entry = ring_buffer_event_data(event);
2940         entry->ip                       = ip;
2941         entry->fmt                      = fmt;
2942
2943         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2944         if (!call_filter_check_discard(call, entry, buffer, event)) {
2945                 __buffer_unlock_commit(buffer, event);
2946                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2947         }
2948
2949 out:
2950         put_trace_buf();
2951
2952 out_nobuffer:
2953         preempt_enable_notrace();
2954         unpause_graph_tracing();
2955
2956         return len;
2957 }
2958 EXPORT_SYMBOL_GPL(trace_vbprintk);
2959
2960 static int
2961 __trace_array_vprintk(struct ring_buffer *buffer,
2962                       unsigned long ip, const char *fmt, va_list args)
2963 {
2964         struct trace_event_call *call = &event_print;
2965         struct ring_buffer_event *event;
2966         int len = 0, size, pc;
2967         struct print_entry *entry;
2968         unsigned long flags;
2969         char *tbuffer;
2970
2971         if (tracing_disabled || tracing_selftest_running)
2972                 return 0;
2973
2974         /* Don't pollute graph traces with trace_vprintk internals */
2975         pause_graph_tracing();
2976
2977         pc = preempt_count();
2978         preempt_disable_notrace();
2979
2980
2981         tbuffer = get_trace_buf();
2982         if (!tbuffer) {
2983                 len = 0;
2984                 goto out_nobuffer;
2985         }
2986
2987         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2988
2989         local_save_flags(flags);
2990         size = sizeof(*entry) + len + 1;
2991         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2992                                             flags, pc);
2993         if (!event)
2994                 goto out;
2995         entry = ring_buffer_event_data(event);
2996         entry->ip = ip;
2997
2998         memcpy(&entry->buf, tbuffer, len + 1);
2999         if (!call_filter_check_discard(call, entry, buffer, event)) {
3000                 __buffer_unlock_commit(buffer, event);
3001                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3002         }
3003
3004 out:
3005         put_trace_buf();
3006
3007 out_nobuffer:
3008         preempt_enable_notrace();
3009         unpause_graph_tracing();
3010
3011         return len;
3012 }
3013
3014 int trace_array_vprintk(struct trace_array *tr,
3015                         unsigned long ip, const char *fmt, va_list args)
3016 {
3017         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3018 }
3019
3020 int trace_array_printk(struct trace_array *tr,
3021                        unsigned long ip, const char *fmt, ...)
3022 {
3023         int ret;
3024         va_list ap;
3025
3026         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3027                 return 0;
3028
3029         va_start(ap, fmt);
3030         ret = trace_array_vprintk(tr, ip, fmt, ap);
3031         va_end(ap);
3032         return ret;
3033 }
3034
3035 int trace_array_printk_buf(struct ring_buffer *buffer,
3036                            unsigned long ip, const char *fmt, ...)
3037 {
3038         int ret;
3039         va_list ap;
3040
3041         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3042                 return 0;
3043
3044         va_start(ap, fmt);
3045         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3046         va_end(ap);
3047         return ret;
3048 }
3049
3050 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3051 {
3052         return trace_array_vprintk(&global_trace, ip, fmt, args);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_vprintk);
3055
3056 static void trace_iterator_increment(struct trace_iterator *iter)
3057 {
3058         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3059
3060         iter->idx++;
3061         if (buf_iter)
3062                 ring_buffer_read(buf_iter, NULL);
3063 }
3064
3065 static struct trace_entry *
3066 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3067                 unsigned long *lost_events)
3068 {
3069         struct ring_buffer_event *event;
3070         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3071
3072         if (buf_iter)
3073                 event = ring_buffer_iter_peek(buf_iter, ts);
3074         else
3075                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3076                                          lost_events);
3077
3078         if (event) {
3079                 iter->ent_size = ring_buffer_event_length(event);
3080                 return ring_buffer_event_data(event);
3081         }
3082         iter->ent_size = 0;
3083         return NULL;
3084 }
3085
3086 static struct trace_entry *
3087 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3088                   unsigned long *missing_events, u64 *ent_ts)
3089 {
3090         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3091         struct trace_entry *ent, *next = NULL;
3092         unsigned long lost_events = 0, next_lost = 0;
3093         int cpu_file = iter->cpu_file;
3094         u64 next_ts = 0, ts;
3095         int next_cpu = -1;
3096         int next_size = 0;
3097         int cpu;
3098
3099         /*
3100          * If we are in a per_cpu trace file, don't bother by iterating over
3101          * all cpu and peek directly.
3102          */
3103         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3104                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3105                         return NULL;
3106                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3107                 if (ent_cpu)
3108                         *ent_cpu = cpu_file;
3109
3110                 return ent;
3111         }
3112
3113         for_each_tracing_cpu(cpu) {
3114
3115                 if (ring_buffer_empty_cpu(buffer, cpu))
3116                         continue;
3117
3118                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3119
3120                 /*
3121                  * Pick the entry with the smallest timestamp:
3122                  */
3123                 if (ent && (!next || ts < next_ts)) {
3124                         next = ent;
3125                         next_cpu = cpu;
3126                         next_ts = ts;
3127                         next_lost = lost_events;
3128                         next_size = iter->ent_size;
3129                 }
3130         }
3131
3132         iter->ent_size = next_size;
3133
3134         if (ent_cpu)
3135                 *ent_cpu = next_cpu;
3136
3137         if (ent_ts)
3138                 *ent_ts = next_ts;
3139
3140         if (missing_events)
3141                 *missing_events = next_lost;
3142
3143         return next;
3144 }
3145
3146 /* Find the next real entry, without updating the iterator itself */
3147 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3148                                           int *ent_cpu, u64 *ent_ts)
3149 {
3150         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3151 }
3152
3153 /* Find the next real entry, and increment the iterator to the next entry */
3154 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3155 {
3156         iter->ent = __find_next_entry(iter, &iter->cpu,
3157                                       &iter->lost_events, &iter->ts);
3158
3159         if (iter->ent)
3160                 trace_iterator_increment(iter);
3161
3162         return iter->ent ? iter : NULL;
3163 }
3164
3165 static void trace_consume(struct trace_iterator *iter)
3166 {
3167         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3168                             &iter->lost_events);
3169 }
3170
3171 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3172 {
3173         struct trace_iterator *iter = m->private;
3174         int i = (int)*pos;
3175         void *ent;
3176
3177         WARN_ON_ONCE(iter->leftover);
3178
3179         (*pos)++;
3180
3181         /* can't go backwards */
3182         if (iter->idx > i)
3183                 return NULL;
3184
3185         if (iter->idx < 0)
3186                 ent = trace_find_next_entry_inc(iter);
3187         else
3188                 ent = iter;
3189
3190         while (ent && iter->idx < i)
3191                 ent = trace_find_next_entry_inc(iter);
3192
3193         iter->pos = *pos;
3194
3195         return ent;
3196 }
3197
3198 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3199 {
3200         struct ring_buffer_event *event;
3201         struct ring_buffer_iter *buf_iter;
3202         unsigned long entries = 0;
3203         u64 ts;
3204
3205         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3206
3207         buf_iter = trace_buffer_iter(iter, cpu);
3208         if (!buf_iter)
3209                 return;
3210
3211         ring_buffer_iter_reset(buf_iter);
3212
3213         /*
3214          * We could have the case with the max latency tracers
3215          * that a reset never took place on a cpu. This is evident
3216          * by the timestamp being before the start of the buffer.
3217          */
3218         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3219                 if (ts >= iter->trace_buffer->time_start)
3220                         break;
3221                 entries++;
3222                 ring_buffer_read(buf_iter, NULL);
3223         }
3224
3225         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3226 }
3227
3228 /*
3229  * The current tracer is copied to avoid a global locking
3230  * all around.
3231  */
3232 static void *s_start(struct seq_file *m, loff_t *pos)
3233 {
3234         struct trace_iterator *iter = m->private;
3235         struct trace_array *tr = iter->tr;
3236         int cpu_file = iter->cpu_file;
3237         void *p = NULL;
3238         loff_t l = 0;
3239         int cpu;
3240
3241         /*
3242          * copy the tracer to avoid using a global lock all around.
3243          * iter->trace is a copy of current_trace, the pointer to the
3244          * name may be used instead of a strcmp(), as iter->trace->name
3245          * will point to the same string as current_trace->name.
3246          */
3247         mutex_lock(&trace_types_lock);
3248         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3249                 *iter->trace = *tr->current_trace;
3250         mutex_unlock(&trace_types_lock);
3251
3252 #ifdef CONFIG_TRACER_MAX_TRACE
3253         if (iter->snapshot && iter->trace->use_max_tr)
3254                 return ERR_PTR(-EBUSY);
3255 #endif
3256
3257         if (!iter->snapshot)
3258                 atomic_inc(&trace_record_taskinfo_disabled);
3259
3260         if (*pos != iter->pos) {
3261                 iter->ent = NULL;
3262                 iter->cpu = 0;
3263                 iter->idx = -1;
3264
3265                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3266                         for_each_tracing_cpu(cpu)
3267                                 tracing_iter_reset(iter, cpu);
3268                 } else
3269                         tracing_iter_reset(iter, cpu_file);
3270
3271                 iter->leftover = 0;
3272                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3273                         ;
3274
3275         } else {
3276                 /*
3277                  * If we overflowed the seq_file before, then we want
3278                  * to just reuse the trace_seq buffer again.
3279                  */
3280                 if (iter->leftover)
3281                         p = iter;
3282                 else {
3283                         l = *pos - 1;
3284                         p = s_next(m, p, &l);
3285                 }
3286         }
3287
3288         trace_event_read_lock();
3289         trace_access_lock(cpu_file);
3290         return p;
3291 }
3292
3293 static void s_stop(struct seq_file *m, void *p)
3294 {
3295         struct trace_iterator *iter = m->private;
3296
3297 #ifdef CONFIG_TRACER_MAX_TRACE
3298         if (iter->snapshot && iter->trace->use_max_tr)
3299                 return;
3300 #endif
3301
3302         if (!iter->snapshot)
3303                 atomic_dec(&trace_record_taskinfo_disabled);
3304
3305         trace_access_unlock(iter->cpu_file);
3306         trace_event_read_unlock();
3307 }
3308
3309 static void
3310 get_total_entries(struct trace_buffer *buf,
3311                   unsigned long *total, unsigned long *entries)
3312 {
3313         unsigned long count;
3314         int cpu;
3315
3316         *total = 0;
3317         *entries = 0;
3318
3319         for_each_tracing_cpu(cpu) {
3320                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3321                 /*
3322                  * If this buffer has skipped entries, then we hold all
3323                  * entries for the trace and we need to ignore the
3324                  * ones before the time stamp.
3325                  */
3326                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3327                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3328                         /* total is the same as the entries */
3329                         *total += count;
3330                 } else
3331                         *total += count +
3332                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3333                 *entries += count;
3334         }
3335 }
3336
3337 static void print_lat_help_header(struct seq_file *m)
3338 {
3339         seq_puts(m, "#                  _------=> CPU#            \n"
3340                     "#                 / _-----=> irqs-off        \n"
3341                     "#                | / _----=> need-resched    \n"
3342                     "#                || / _---=> hardirq/softirq \n"
3343                     "#                ||| / _--=> preempt-depth   \n"
3344                     "#                |||| /     delay            \n"
3345                     "#  cmd     pid   ||||| time  |   caller      \n"
3346                     "#     \\   /      |||||  \\    |   /         \n");
3347 }
3348
3349 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3350 {
3351         unsigned long total;
3352         unsigned long entries;
3353
3354         get_total_entries(buf, &total, &entries);
3355         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3356                    entries, total, num_online_cpus());
3357         seq_puts(m, "#\n");
3358 }
3359
3360 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3361                                    unsigned int flags)
3362 {
3363         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3364
3365         print_event_info(buf, m);
3366
3367         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3368         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3369 }
3370
3371 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3372                                        unsigned int flags)
3373 {
3374         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375         const char tgid_space[] = "          ";
3376         const char space[] = "  ";
3377
3378         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3379                    tgid ? tgid_space : space);
3380         seq_printf(m, "#                          %s / _----=> need-resched\n",
3381                    tgid ? tgid_space : space);
3382         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3383                    tgid ? tgid_space : space);
3384         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3385                    tgid ? tgid_space : space);
3386         seq_printf(m, "#                          %s||| /     delay\n",
3387                    tgid ? tgid_space : space);
3388         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3389                    tgid ? "   TGID   " : space);
3390         seq_printf(m, "#              | |       | %s||||       |         |\n",
3391                    tgid ? "     |    " : space);
3392 }
3393
3394 void
3395 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3396 {
3397         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3398         struct trace_buffer *buf = iter->trace_buffer;
3399         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3400         struct tracer *type = iter->trace;
3401         unsigned long entries;
3402         unsigned long total;
3403         const char *name = "preemption";
3404
3405         name = type->name;
3406
3407         get_total_entries(buf, &total, &entries);
3408
3409         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3410                    name, UTS_RELEASE);
3411         seq_puts(m, "# -----------------------------------"
3412                  "---------------------------------\n");
3413         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3414                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3415                    nsecs_to_usecs(data->saved_latency),
3416                    entries,
3417                    total,
3418                    buf->cpu,
3419 #if defined(CONFIG_PREEMPT_NONE)
3420                    "server",
3421 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3422                    "desktop",
3423 #elif defined(CONFIG_PREEMPT)
3424                    "preempt",
3425 #else
3426                    "unknown",
3427 #endif
3428                    /* These are reserved for later use */
3429                    0, 0, 0, 0);
3430 #ifdef CONFIG_SMP
3431         seq_printf(m, " #P:%d)\n", num_online_cpus());
3432 #else
3433         seq_puts(m, ")\n");
3434 #endif
3435         seq_puts(m, "#    -----------------\n");
3436         seq_printf(m, "#    | task: %.16s-%d "
3437                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3438                    data->comm, data->pid,
3439                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3440                    data->policy, data->rt_priority);
3441         seq_puts(m, "#    -----------------\n");
3442
3443         if (data->critical_start) {
3444                 seq_puts(m, "#  => started at: ");
3445                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3446                 trace_print_seq(m, &iter->seq);
3447                 seq_puts(m, "\n#  => ended at:   ");
3448                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3449                 trace_print_seq(m, &iter->seq);
3450                 seq_puts(m, "\n#\n");
3451         }
3452
3453         seq_puts(m, "#\n");
3454 }
3455
3456 static void test_cpu_buff_start(struct trace_iterator *iter)
3457 {
3458         struct trace_seq *s = &iter->seq;
3459         struct trace_array *tr = iter->tr;
3460
3461         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3462                 return;
3463
3464         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3465                 return;
3466
3467         if (cpumask_available(iter->started) &&
3468             cpumask_test_cpu(iter->cpu, iter->started))
3469                 return;
3470
3471         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3472                 return;
3473
3474         if (cpumask_available(iter->started))
3475                 cpumask_set_cpu(iter->cpu, iter->started);
3476
3477         /* Don't print started cpu buffer for the first entry of the trace */
3478         if (iter->idx > 1)
3479                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3480                                 iter->cpu);
3481 }
3482
3483 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3484 {
3485         struct trace_array *tr = iter->tr;
3486         struct trace_seq *s = &iter->seq;
3487         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3488         struct trace_entry *entry;
3489         struct trace_event *event;
3490
3491         entry = iter->ent;
3492
3493         test_cpu_buff_start(iter);
3494
3495         event = ftrace_find_event(entry->type);
3496
3497         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3498                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3499                         trace_print_lat_context(iter);
3500                 else
3501                         trace_print_context(iter);
3502         }
3503
3504         if (trace_seq_has_overflowed(s))
3505                 return TRACE_TYPE_PARTIAL_LINE;
3506
3507         if (event)
3508                 return event->funcs->trace(iter, sym_flags, event);
3509
3510         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3511
3512         return trace_handle_return(s);
3513 }
3514
3515 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3516 {
3517         struct trace_array *tr = iter->tr;
3518         struct trace_seq *s = &iter->seq;
3519         struct trace_entry *entry;
3520         struct trace_event *event;
3521
3522         entry = iter->ent;
3523
3524         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3525                 trace_seq_printf(s, "%d %d %llu ",
3526                                  entry->pid, iter->cpu, iter->ts);
3527
3528         if (trace_seq_has_overflowed(s))
3529                 return TRACE_TYPE_PARTIAL_LINE;
3530
3531         event = ftrace_find_event(entry->type);
3532         if (event)
3533                 return event->funcs->raw(iter, 0, event);
3534
3535         trace_seq_printf(s, "%d ?\n", entry->type);
3536
3537         return trace_handle_return(s);
3538 }
3539
3540 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3541 {
3542         struct trace_array *tr = iter->tr;
3543         struct trace_seq *s = &iter->seq;
3544         unsigned char newline = '\n';
3545         struct trace_entry *entry;
3546         struct trace_event *event;
3547
3548         entry = iter->ent;
3549
3550         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3551                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3552                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3553                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3554                 if (trace_seq_has_overflowed(s))
3555                         return TRACE_TYPE_PARTIAL_LINE;
3556         }
3557
3558         event = ftrace_find_event(entry->type);
3559         if (event) {
3560                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3561                 if (ret != TRACE_TYPE_HANDLED)
3562                         return ret;
3563         }
3564
3565         SEQ_PUT_FIELD(s, newline);
3566
3567         return trace_handle_return(s);
3568 }
3569
3570 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3571 {
3572         struct trace_array *tr = iter->tr;
3573         struct trace_seq *s = &iter->seq;
3574         struct trace_entry *entry;
3575         struct trace_event *event;
3576
3577         entry = iter->ent;
3578
3579         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3580                 SEQ_PUT_FIELD(s, entry->pid);
3581                 SEQ_PUT_FIELD(s, iter->cpu);
3582                 SEQ_PUT_FIELD(s, iter->ts);
3583                 if (trace_seq_has_overflowed(s))
3584                         return TRACE_TYPE_PARTIAL_LINE;
3585         }
3586
3587         event = ftrace_find_event(entry->type);
3588         return event ? event->funcs->binary(iter, 0, event) :
3589                 TRACE_TYPE_HANDLED;
3590 }
3591
3592 int trace_empty(struct trace_iterator *iter)
3593 {
3594         struct ring_buffer_iter *buf_iter;
3595         int cpu;
3596
3597         /* If we are looking at one CPU buffer, only check that one */
3598         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3599                 cpu = iter->cpu_file;
3600                 buf_iter = trace_buffer_iter(iter, cpu);
3601                 if (buf_iter) {
3602                         if (!ring_buffer_iter_empty(buf_iter))
3603                                 return 0;
3604                 } else {
3605                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3606                                 return 0;
3607                 }
3608                 return 1;
3609         }
3610
3611         for_each_tracing_cpu(cpu) {
3612                 buf_iter = trace_buffer_iter(iter, cpu);
3613                 if (buf_iter) {
3614                         if (!ring_buffer_iter_empty(buf_iter))
3615                                 return 0;
3616                 } else {
3617                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3618                                 return 0;
3619                 }
3620         }
3621
3622         return 1;
3623 }
3624
3625 /*  Called with trace_event_read_lock() held. */
3626 enum print_line_t print_trace_line(struct trace_iterator *iter)
3627 {
3628         struct trace_array *tr = iter->tr;
3629         unsigned long trace_flags = tr->trace_flags;
3630         enum print_line_t ret;
3631
3632         if (iter->lost_events) {
3633                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3634                                  iter->cpu, iter->lost_events);
3635                 if (trace_seq_has_overflowed(&iter->seq))
3636                         return TRACE_TYPE_PARTIAL_LINE;
3637         }
3638
3639         if (iter->trace && iter->trace->print_line) {
3640                 ret = iter->trace->print_line(iter);
3641                 if (ret != TRACE_TYPE_UNHANDLED)
3642                         return ret;
3643         }
3644
3645         if (iter->ent->type == TRACE_BPUTS &&
3646                         trace_flags & TRACE_ITER_PRINTK &&
3647                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3648                 return trace_print_bputs_msg_only(iter);
3649
3650         if (iter->ent->type == TRACE_BPRINT &&
3651                         trace_flags & TRACE_ITER_PRINTK &&
3652                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3653                 return trace_print_bprintk_msg_only(iter);
3654
3655         if (iter->ent->type == TRACE_PRINT &&
3656                         trace_flags & TRACE_ITER_PRINTK &&
3657                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3658                 return trace_print_printk_msg_only(iter);
3659
3660         if (trace_flags & TRACE_ITER_BIN)
3661                 return print_bin_fmt(iter);
3662
3663         if (trace_flags & TRACE_ITER_HEX)
3664                 return print_hex_fmt(iter);
3665
3666         if (trace_flags & TRACE_ITER_RAW)
3667                 return print_raw_fmt(iter);
3668
3669         return print_trace_fmt(iter);
3670 }
3671
3672 void trace_latency_header(struct seq_file *m)
3673 {
3674         struct trace_iterator *iter = m->private;
3675         struct trace_array *tr = iter->tr;
3676
3677         /* print nothing if the buffers are empty */
3678         if (trace_empty(iter))
3679                 return;
3680
3681         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3682                 print_trace_header(m, iter);
3683
3684         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3685                 print_lat_help_header(m);
3686 }
3687
3688 void trace_default_header(struct seq_file *m)
3689 {
3690         struct trace_iterator *iter = m->private;
3691         struct trace_array *tr = iter->tr;
3692         unsigned long trace_flags = tr->trace_flags;
3693
3694         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3695                 return;
3696
3697         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3698                 /* print nothing if the buffers are empty */
3699                 if (trace_empty(iter))
3700                         return;
3701                 print_trace_header(m, iter);
3702                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3703                         print_lat_help_header(m);
3704         } else {
3705                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3706                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3707                                 print_func_help_header_irq(iter->trace_buffer,
3708                                                            m, trace_flags);
3709                         else
3710                                 print_func_help_header(iter->trace_buffer, m,
3711                                                        trace_flags);
3712                 }
3713         }
3714 }
3715
3716 static void test_ftrace_alive(struct seq_file *m)
3717 {
3718         if (!ftrace_is_dead())
3719                 return;
3720         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3721                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3722 }
3723
3724 #ifdef CONFIG_TRACER_MAX_TRACE
3725 static void show_snapshot_main_help(struct seq_file *m)
3726 {
3727         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3728                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3729                     "#                      Takes a snapshot of the main buffer.\n"
3730                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3731                     "#                      (Doesn't have to be '2' works with any number that\n"
3732                     "#                       is not a '0' or '1')\n");
3733 }
3734
3735 static void show_snapshot_percpu_help(struct seq_file *m)
3736 {
3737         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3738 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3739         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3741 #else
3742         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3743                     "#                     Must use main snapshot file to allocate.\n");
3744 #endif
3745         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3746                     "#                      (Doesn't have to be '2' works with any number that\n"
3747                     "#                       is not a '0' or '1')\n");
3748 }
3749
3750 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3751 {
3752         if (iter->tr->allocated_snapshot)
3753                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3754         else
3755                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3756
3757         seq_puts(m, "# Snapshot commands:\n");
3758         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3759                 show_snapshot_main_help(m);
3760         else
3761                 show_snapshot_percpu_help(m);
3762 }
3763 #else
3764 /* Should never be called */
3765 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3766 #endif
3767
3768 static int s_show(struct seq_file *m, void *v)
3769 {
3770         struct trace_iterator *iter = v;
3771         int ret;
3772
3773         if (iter->ent == NULL) {
3774                 if (iter->tr) {
3775                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3776                         seq_puts(m, "#\n");
3777                         test_ftrace_alive(m);
3778                 }
3779                 if (iter->snapshot && trace_empty(iter))
3780                         print_snapshot_help(m, iter);
3781                 else if (iter->trace && iter->trace->print_header)
3782                         iter->trace->print_header(m);
3783                 else
3784                         trace_default_header(m);
3785
3786         } else if (iter->leftover) {
3787                 /*
3788                  * If we filled the seq_file buffer earlier, we
3789                  * want to just show it now.
3790                  */
3791                 ret = trace_print_seq(m, &iter->seq);
3792
3793                 /* ret should this time be zero, but you never know */
3794                 iter->leftover = ret;
3795
3796         } else {
3797                 print_trace_line(iter);
3798                 ret = trace_print_seq(m, &iter->seq);
3799                 /*
3800                  * If we overflow the seq_file buffer, then it will
3801                  * ask us for this data again at start up.
3802                  * Use that instead.
3803                  *  ret is 0 if seq_file write succeeded.
3804                  *        -1 otherwise.
3805                  */
3806                 iter->leftover = ret;
3807         }
3808
3809         return 0;
3810 }
3811
3812 /*
3813  * Should be used after trace_array_get(), trace_types_lock
3814  * ensures that i_cdev was already initialized.
3815  */
3816 static inline int tracing_get_cpu(struct inode *inode)
3817 {
3818         if (inode->i_cdev) /* See trace_create_cpu_file() */
3819                 return (long)inode->i_cdev - 1;
3820         return RING_BUFFER_ALL_CPUS;
3821 }
3822
3823 static const struct seq_operations tracer_seq_ops = {
3824         .start          = s_start,
3825         .next           = s_next,
3826         .stop           = s_stop,
3827         .show           = s_show,
3828 };
3829
3830 static struct trace_iterator *
3831 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3832 {
3833         struct trace_array *tr = inode->i_private;
3834         struct trace_iterator *iter;
3835         int cpu;
3836
3837         if (tracing_disabled)
3838                 return ERR_PTR(-ENODEV);
3839
3840         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3841         if (!iter)
3842                 return ERR_PTR(-ENOMEM);
3843
3844         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3845                                     GFP_KERNEL);
3846         if (!iter->buffer_iter)
3847                 goto release;
3848
3849         /*
3850          * We make a copy of the current tracer to avoid concurrent
3851          * changes on it while we are reading.
3852          */
3853         mutex_lock(&trace_types_lock);
3854         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3855         if (!iter->trace)
3856                 goto fail;
3857
3858         *iter->trace = *tr->current_trace;
3859
3860         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3861                 goto fail;
3862
3863         iter->tr = tr;
3864
3865 #ifdef CONFIG_TRACER_MAX_TRACE
3866         /* Currently only the top directory has a snapshot */
3867         if (tr->current_trace->print_max || snapshot)
3868                 iter->trace_buffer = &tr->max_buffer;
3869         else
3870 #endif
3871                 iter->trace_buffer = &tr->trace_buffer;
3872         iter->snapshot = snapshot;
3873         iter->pos = -1;
3874         iter->cpu_file = tracing_get_cpu(inode);
3875         mutex_init(&iter->mutex);
3876
3877         /* Notify the tracer early; before we stop tracing. */
3878         if (iter->trace && iter->trace->open)
3879                 iter->trace->open(iter);
3880
3881         /* Annotate start of buffers if we had overruns */
3882         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3883                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3884
3885         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3886         if (trace_clocks[tr->clock_id].in_ns)
3887                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3888
3889         /* stop the trace while dumping if we are not opening "snapshot" */
3890         if (!iter->snapshot)
3891                 tracing_stop_tr(tr);
3892
3893         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3894                 for_each_tracing_cpu(cpu) {
3895                         iter->buffer_iter[cpu] =
3896                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3897                 }
3898                 ring_buffer_read_prepare_sync();
3899                 for_each_tracing_cpu(cpu) {
3900                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3901                         tracing_iter_reset(iter, cpu);
3902                 }
3903         } else {
3904                 cpu = iter->cpu_file;
3905                 iter->buffer_iter[cpu] =
3906                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3907                 ring_buffer_read_prepare_sync();
3908                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3909                 tracing_iter_reset(iter, cpu);
3910         }
3911
3912         mutex_unlock(&trace_types_lock);
3913
3914         return iter;
3915
3916  fail:
3917         mutex_unlock(&trace_types_lock);
3918         kfree(iter->trace);
3919         kfree(iter->buffer_iter);
3920 release:
3921         seq_release_private(inode, file);
3922         return ERR_PTR(-ENOMEM);
3923 }
3924
3925 int tracing_open_generic(struct inode *inode, struct file *filp)
3926 {
3927         if (tracing_disabled)
3928                 return -ENODEV;
3929
3930         filp->private_data = inode->i_private;
3931         return 0;
3932 }
3933
3934 bool tracing_is_disabled(void)
3935 {
3936         return (tracing_disabled) ? true: false;
3937 }
3938
3939 /*
3940  * Open and update trace_array ref count.
3941  * Must have the current trace_array passed to it.
3942  */
3943 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3944 {
3945         struct trace_array *tr = inode->i_private;
3946
3947         if (tracing_disabled)
3948                 return -ENODEV;
3949
3950         if (trace_array_get(tr) < 0)
3951                 return -ENODEV;
3952
3953         filp->private_data = inode->i_private;
3954
3955         return 0;
3956 }
3957
3958 static int tracing_release(struct inode *inode, struct file *file)
3959 {
3960         struct trace_array *tr = inode->i_private;
3961         struct seq_file *m = file->private_data;
3962         struct trace_iterator *iter;
3963         int cpu;
3964
3965         if (!(file->f_mode & FMODE_READ)) {
3966                 trace_array_put(tr);
3967                 return 0;
3968         }
3969
3970         /* Writes do not use seq_file */
3971         iter = m->private;
3972         mutex_lock(&trace_types_lock);
3973
3974         for_each_tracing_cpu(cpu) {
3975                 if (iter->buffer_iter[cpu])
3976                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3977         }
3978
3979         if (iter->trace && iter->trace->close)
3980                 iter->trace->close(iter);
3981
3982         if (!iter->snapshot)
3983                 /* reenable tracing if it was previously enabled */
3984                 tracing_start_tr(tr);
3985
3986         __trace_array_put(tr);
3987
3988         mutex_unlock(&trace_types_lock);
3989
3990         mutex_destroy(&iter->mutex);
3991         free_cpumask_var(iter->started);
3992         kfree(iter->trace);
3993         kfree(iter->buffer_iter);
3994         seq_release_private(inode, file);
3995
3996         return 0;
3997 }
3998
3999 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4000 {
4001         struct trace_array *tr = inode->i_private;
4002
4003         trace_array_put(tr);
4004         return 0;
4005 }
4006
4007 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4008 {
4009         struct trace_array *tr = inode->i_private;
4010
4011         trace_array_put(tr);
4012
4013         return single_release(inode, file);
4014 }
4015
4016 static int tracing_open(struct inode *inode, struct file *file)
4017 {
4018         struct trace_array *tr = inode->i_private;
4019         struct trace_iterator *iter;
4020         int ret = 0;
4021
4022         if (trace_array_get(tr) < 0)
4023                 return -ENODEV;
4024
4025         /* If this file was open for write, then erase contents */
4026         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4027                 int cpu = tracing_get_cpu(inode);
4028                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4029
4030 #ifdef CONFIG_TRACER_MAX_TRACE
4031                 if (tr->current_trace->print_max)
4032                         trace_buf = &tr->max_buffer;
4033 #endif
4034
4035                 if (cpu == RING_BUFFER_ALL_CPUS)
4036                         tracing_reset_online_cpus(trace_buf);
4037                 else
4038                         tracing_reset(trace_buf, cpu);
4039         }
4040
4041         if (file->f_mode & FMODE_READ) {
4042                 iter = __tracing_open(inode, file, false);
4043                 if (IS_ERR(iter))
4044                         ret = PTR_ERR(iter);
4045                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4046                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4047         }
4048
4049         if (ret < 0)
4050                 trace_array_put(tr);
4051
4052         return ret;
4053 }
4054
4055 /*
4056  * Some tracers are not suitable for instance buffers.
4057  * A tracer is always available for the global array (toplevel)
4058  * or if it explicitly states that it is.
4059  */
4060 static bool
4061 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4062 {
4063         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4064 }
4065
4066 /* Find the next tracer that this trace array may use */
4067 static struct tracer *
4068 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4069 {
4070         while (t && !trace_ok_for_array(t, tr))
4071                 t = t->next;
4072
4073         return t;
4074 }
4075
4076 static void *
4077 t_next(struct seq_file *m, void *v, loff_t *pos)
4078 {
4079         struct trace_array *tr = m->private;
4080         struct tracer *t = v;
4081
4082         (*pos)++;
4083
4084         if (t)
4085                 t = get_tracer_for_array(tr, t->next);
4086
4087         return t;
4088 }
4089
4090 static void *t_start(struct seq_file *m, loff_t *pos)
4091 {
4092         struct trace_array *tr = m->private;
4093         struct tracer *t;
4094         loff_t l = 0;
4095
4096         mutex_lock(&trace_types_lock);
4097
4098         t = get_tracer_for_array(tr, trace_types);
4099         for (; t && l < *pos; t = t_next(m, t, &l))
4100                         ;
4101
4102         return t;
4103 }
4104
4105 static void t_stop(struct seq_file *m, void *p)
4106 {
4107         mutex_unlock(&trace_types_lock);
4108 }
4109
4110 static int t_show(struct seq_file *m, void *v)
4111 {
4112         struct tracer *t = v;
4113
4114         if (!t)
4115                 return 0;
4116
4117         seq_puts(m, t->name);
4118         if (t->next)
4119                 seq_putc(m, ' ');
4120         else
4121                 seq_putc(m, '\n');
4122
4123         return 0;
4124 }
4125
4126 static const struct seq_operations show_traces_seq_ops = {
4127         .start          = t_start,
4128         .next           = t_next,
4129         .stop           = t_stop,
4130         .show           = t_show,
4131 };
4132
4133 static int show_traces_open(struct inode *inode, struct file *file)
4134 {
4135         struct trace_array *tr = inode->i_private;
4136         struct seq_file *m;
4137         int ret;
4138
4139         if (tracing_disabled)
4140                 return -ENODEV;
4141
4142         ret = seq_open(file, &show_traces_seq_ops);
4143         if (ret)
4144                 return ret;
4145
4146         m = file->private_data;
4147         m->private = tr;
4148
4149         return 0;
4150 }
4151
4152 static ssize_t
4153 tracing_write_stub(struct file *filp, const char __user *ubuf,
4154                    size_t count, loff_t *ppos)
4155 {
4156         return count;
4157 }
4158
4159 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4160 {
4161         int ret;
4162
4163         if (file->f_mode & FMODE_READ)
4164                 ret = seq_lseek(file, offset, whence);
4165         else
4166                 file->f_pos = ret = 0;
4167
4168         return ret;
4169 }
4170
4171 static const struct file_operations tracing_fops = {
4172         .open           = tracing_open,
4173         .read           = seq_read,
4174         .write          = tracing_write_stub,
4175         .llseek         = tracing_lseek,
4176         .release        = tracing_release,
4177 };
4178
4179 static const struct file_operations show_traces_fops = {
4180         .open           = show_traces_open,
4181         .read           = seq_read,
4182         .release        = seq_release,
4183         .llseek         = seq_lseek,
4184 };
4185
4186 static ssize_t
4187 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4188                      size_t count, loff_t *ppos)
4189 {
4190         struct trace_array *tr = file_inode(filp)->i_private;
4191         char *mask_str;
4192         int len;
4193
4194         len = snprintf(NULL, 0, "%*pb\n",
4195                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4196         mask_str = kmalloc(len, GFP_KERNEL);
4197         if (!mask_str)
4198                 return -ENOMEM;
4199
4200         len = snprintf(mask_str, len, "%*pb\n",
4201                        cpumask_pr_args(tr->tracing_cpumask));
4202         if (len >= count) {
4203                 count = -EINVAL;
4204                 goto out_err;
4205         }
4206         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4207
4208 out_err:
4209         kfree(mask_str);
4210
4211         return count;
4212 }
4213
4214 static ssize_t
4215 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4216                       size_t count, loff_t *ppos)
4217 {
4218         struct trace_array *tr = file_inode(filp)->i_private;
4219         cpumask_var_t tracing_cpumask_new;
4220         int err, cpu;
4221
4222         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4223                 return -ENOMEM;
4224
4225         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4226         if (err)
4227                 goto err_unlock;
4228
4229         local_irq_disable();
4230         arch_spin_lock(&tr->max_lock);
4231         for_each_tracing_cpu(cpu) {
4232                 /*
4233                  * Increase/decrease the disabled counter if we are
4234                  * about to flip a bit in the cpumask:
4235                  */
4236                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4237                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4238                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4239                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4240                 }
4241                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4242                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4243                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4244                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4245                 }
4246         }
4247         arch_spin_unlock(&tr->max_lock);
4248         local_irq_enable();
4249
4250         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4251         free_cpumask_var(tracing_cpumask_new);
4252
4253         return count;
4254
4255 err_unlock:
4256         free_cpumask_var(tracing_cpumask_new);
4257
4258         return err;
4259 }
4260
4261 static const struct file_operations tracing_cpumask_fops = {
4262         .open           = tracing_open_generic_tr,
4263         .read           = tracing_cpumask_read,
4264         .write          = tracing_cpumask_write,
4265         .release        = tracing_release_generic_tr,
4266         .llseek         = generic_file_llseek,
4267 };
4268
4269 static int tracing_trace_options_show(struct seq_file *m, void *v)
4270 {
4271         struct tracer_opt *trace_opts;
4272         struct trace_array *tr = m->private;
4273         u32 tracer_flags;
4274         int i;
4275
4276         mutex_lock(&trace_types_lock);
4277         tracer_flags = tr->current_trace->flags->val;
4278         trace_opts = tr->current_trace->flags->opts;
4279
4280         for (i = 0; trace_options[i]; i++) {
4281                 if (tr->trace_flags & (1 << i))
4282                         seq_printf(m, "%s\n", trace_options[i]);
4283                 else
4284                         seq_printf(m, "no%s\n", trace_options[i]);
4285         }
4286
4287         for (i = 0; trace_opts[i].name; i++) {
4288                 if (tracer_flags & trace_opts[i].bit)
4289                         seq_printf(m, "%s\n", trace_opts[i].name);
4290                 else
4291                         seq_printf(m, "no%s\n", trace_opts[i].name);
4292         }
4293         mutex_unlock(&trace_types_lock);
4294
4295         return 0;
4296 }
4297
4298 static int __set_tracer_option(struct trace_array *tr,
4299                                struct tracer_flags *tracer_flags,
4300                                struct tracer_opt *opts, int neg)
4301 {
4302         struct tracer *trace = tracer_flags->trace;
4303         int ret;
4304
4305         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4306         if (ret)
4307                 return ret;
4308
4309         if (neg)
4310                 tracer_flags->val &= ~opts->bit;
4311         else
4312                 tracer_flags->val |= opts->bit;
4313         return 0;
4314 }
4315
4316 /* Try to assign a tracer specific option */
4317 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4318 {
4319         struct tracer *trace = tr->current_trace;
4320         struct tracer_flags *tracer_flags = trace->flags;
4321         struct tracer_opt *opts = NULL;
4322         int i;
4323
4324         for (i = 0; tracer_flags->opts[i].name; i++) {
4325                 opts = &tracer_flags->opts[i];
4326
4327                 if (strcmp(cmp, opts->name) == 0)
4328                         return __set_tracer_option(tr, trace->flags, opts, neg);
4329         }
4330
4331         return -EINVAL;
4332 }
4333
4334 /* Some tracers require overwrite to stay enabled */
4335 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4336 {
4337         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4338                 return -1;
4339
4340         return 0;
4341 }
4342
4343 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4344 {
4345         /* do nothing if flag is already set */
4346         if (!!(tr->trace_flags & mask) == !!enabled)
4347                 return 0;
4348
4349         /* Give the tracer a chance to approve the change */
4350         if (tr->current_trace->flag_changed)
4351                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4352                         return -EINVAL;
4353
4354         if (enabled)
4355                 tr->trace_flags |= mask;
4356         else
4357                 tr->trace_flags &= ~mask;
4358
4359         if (mask == TRACE_ITER_RECORD_CMD)
4360                 trace_event_enable_cmd_record(enabled);
4361
4362         if (mask == TRACE_ITER_RECORD_TGID) {
4363                 if (!tgid_map)
4364                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4365                                            sizeof(*tgid_map),
4366                                            GFP_KERNEL);
4367                 if (!tgid_map) {
4368                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4369                         return -ENOMEM;
4370                 }
4371
4372                 trace_event_enable_tgid_record(enabled);
4373         }
4374
4375         if (mask == TRACE_ITER_EVENT_FORK)
4376                 trace_event_follow_fork(tr, enabled);
4377
4378         if (mask == TRACE_ITER_FUNC_FORK)
4379                 ftrace_pid_follow_fork(tr, enabled);
4380
4381         if (mask == TRACE_ITER_OVERWRITE) {
4382                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4383 #ifdef CONFIG_TRACER_MAX_TRACE
4384                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4385 #endif
4386         }
4387
4388         if (mask == TRACE_ITER_PRINTK) {
4389                 trace_printk_start_stop_comm(enabled);
4390                 trace_printk_control(enabled);
4391         }
4392
4393         return 0;
4394 }
4395
4396 static int trace_set_options(struct trace_array *tr, char *option)
4397 {
4398         char *cmp;
4399         int neg = 0;
4400         int ret;
4401         size_t orig_len = strlen(option);
4402
4403         cmp = strstrip(option);
4404
4405         if (strncmp(cmp, "no", 2) == 0) {
4406                 neg = 1;
4407                 cmp += 2;
4408         }
4409
4410         mutex_lock(&trace_types_lock);
4411
4412         ret = match_string(trace_options, -1, cmp);
4413         /* If no option could be set, test the specific tracer options */
4414         if (ret < 0)
4415                 ret = set_tracer_option(tr, cmp, neg);
4416         else
4417                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4418
4419         mutex_unlock(&trace_types_lock);
4420
4421         /*
4422          * If the first trailing whitespace is replaced with '\0' by strstrip,
4423          * turn it back into a space.
4424          */
4425         if (orig_len > strlen(option))
4426                 option[strlen(option)] = ' ';
4427
4428         return ret;
4429 }
4430
4431 static void __init apply_trace_boot_options(void)
4432 {
4433         char *buf = trace_boot_options_buf;
4434         char *option;
4435
4436         while (true) {
4437                 option = strsep(&buf, ",");
4438
4439                 if (!option)
4440                         break;
4441
4442                 if (*option)
4443                         trace_set_options(&global_trace, option);
4444
4445                 /* Put back the comma to allow this to be called again */
4446                 if (buf)
4447                         *(buf - 1) = ',';
4448         }
4449 }
4450
4451 static ssize_t
4452 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4453                         size_t cnt, loff_t *ppos)
4454 {
4455         struct seq_file *m = filp->private_data;
4456         struct trace_array *tr = m->private;
4457         char buf[64];
4458         int ret;
4459
4460         if (cnt >= sizeof(buf))
4461                 return -EINVAL;
4462
4463         if (copy_from_user(buf, ubuf, cnt))
4464                 return -EFAULT;
4465
4466         buf[cnt] = 0;
4467
4468         ret = trace_set_options(tr, buf);
4469         if (ret < 0)
4470                 return ret;
4471
4472         *ppos += cnt;
4473
4474         return cnt;
4475 }
4476
4477 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4478 {
4479         struct trace_array *tr = inode->i_private;
4480         int ret;
4481
4482         if (tracing_disabled)
4483                 return -ENODEV;
4484
4485         if (trace_array_get(tr) < 0)
4486                 return -ENODEV;
4487
4488         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4489         if (ret < 0)
4490                 trace_array_put(tr);
4491
4492         return ret;
4493 }
4494
4495 static const struct file_operations tracing_iter_fops = {
4496         .open           = tracing_trace_options_open,
4497         .read           = seq_read,
4498         .llseek         = seq_lseek,
4499         .release        = tracing_single_release_tr,
4500         .write          = tracing_trace_options_write,
4501 };
4502
4503 static const char readme_msg[] =
4504         "tracing mini-HOWTO:\n\n"
4505         "# echo 0 > tracing_on : quick way to disable tracing\n"
4506         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4507         " Important files:\n"
4508         "  trace\t\t\t- The static contents of the buffer\n"
4509         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4510         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4511         "  current_tracer\t- function and latency tracers\n"
4512         "  available_tracers\t- list of configured tracers for current_tracer\n"
4513         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4514         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4515         "  trace_clock\t\t-change the clock used to order events\n"
4516         "       local:   Per cpu clock but may not be synced across CPUs\n"
4517         "      global:   Synced across CPUs but slows tracing down.\n"
4518         "     counter:   Not a clock, but just an increment\n"
4519         "      uptime:   Jiffy counter from time of boot\n"
4520         "        perf:   Same clock that perf events use\n"
4521 #ifdef CONFIG_X86_64
4522         "     x86-tsc:   TSC cycle counter\n"
4523 #endif
4524         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4525         "       delta:   Delta difference against a buffer-wide timestamp\n"
4526         "    absolute:   Absolute (standalone) timestamp\n"
4527         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4528         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4529         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4530         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4531         "\t\t\t  Remove sub-buffer with rmdir\n"
4532         "  trace_options\t\t- Set format or modify how tracing happens\n"
4533         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4534         "\t\t\t  option name\n"
4535         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4536 #ifdef CONFIG_DYNAMIC_FTRACE
4537         "\n  available_filter_functions - list of functions that can be filtered on\n"
4538         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4539         "\t\t\t  functions\n"
4540         "\t     accepts: func_full_name or glob-matching-pattern\n"
4541         "\t     modules: Can select a group via module\n"
4542         "\t      Format: :mod:<module-name>\n"
4543         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4544         "\t    triggers: a command to perform when function is hit\n"
4545         "\t      Format: <function>:<trigger>[:count]\n"
4546         "\t     trigger: traceon, traceoff\n"
4547         "\t\t      enable_event:<system>:<event>\n"
4548         "\t\t      disable_event:<system>:<event>\n"
4549 #ifdef CONFIG_STACKTRACE
4550         "\t\t      stacktrace\n"
4551 #endif
4552 #ifdef CONFIG_TRACER_SNAPSHOT
4553         "\t\t      snapshot\n"
4554 #endif
4555         "\t\t      dump\n"
4556         "\t\t      cpudump\n"
4557         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4558         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4559         "\t     The first one will disable tracing every time do_fault is hit\n"
4560         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4561         "\t       The first time do trap is hit and it disables tracing, the\n"
4562         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4563         "\t       the counter will not decrement. It only decrements when the\n"
4564         "\t       trigger did work\n"
4565         "\t     To remove trigger without count:\n"
4566         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4567         "\t     To remove trigger with a count:\n"
4568         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4569         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4570         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4571         "\t    modules: Can select a group via module command :mod:\n"
4572         "\t    Does not accept triggers\n"
4573 #endif /* CONFIG_DYNAMIC_FTRACE */
4574 #ifdef CONFIG_FUNCTION_TRACER
4575         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4576         "\t\t    (function)\n"
4577 #endif
4578 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4579         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4580         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4581         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4582 #endif
4583 #ifdef CONFIG_TRACER_SNAPSHOT
4584         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4585         "\t\t\t  snapshot buffer. Read the contents for more\n"
4586         "\t\t\t  information\n"
4587 #endif
4588 #ifdef CONFIG_STACK_TRACER
4589         "  stack_trace\t\t- Shows the max stack trace when active\n"
4590         "  stack_max_size\t- Shows current max stack size that was traced\n"
4591         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4592         "\t\t\t  new trace)\n"
4593 #ifdef CONFIG_DYNAMIC_FTRACE
4594         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4595         "\t\t\t  traces\n"
4596 #endif
4597 #endif /* CONFIG_STACK_TRACER */
4598 #ifdef CONFIG_KPROBE_EVENTS
4599         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4600         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4601 #endif
4602 #ifdef CONFIG_UPROBE_EVENTS
4603         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4604         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4605 #endif
4606 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4607         "\t  accepts: event-definitions (one definition per line)\n"
4608         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4609         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4610         "\t           -:[<group>/]<event>\n"
4611 #ifdef CONFIG_KPROBE_EVENTS
4612         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4613   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4614 #endif
4615 #ifdef CONFIG_UPROBE_EVENTS
4616         "\t    place: <path>:<offset>\n"
4617 #endif
4618         "\t     args: <name>=fetcharg[:type]\n"
4619         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4620         "\t           $stack<index>, $stack, $retval, $comm\n"
4621         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4622         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4623 #endif
4624         "  events/\t\t- Directory containing all trace event subsystems:\n"
4625         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4626         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4627         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4628         "\t\t\t  events\n"
4629         "      filter\t\t- If set, only events passing filter are traced\n"
4630         "  events/<system>/<event>/\t- Directory containing control files for\n"
4631         "\t\t\t  <event>:\n"
4632         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4633         "      filter\t\t- If set, only events passing filter are traced\n"
4634         "      trigger\t\t- If set, a command to perform when event is hit\n"
4635         "\t    Format: <trigger>[:count][if <filter>]\n"
4636         "\t   trigger: traceon, traceoff\n"
4637         "\t            enable_event:<system>:<event>\n"
4638         "\t            disable_event:<system>:<event>\n"
4639 #ifdef CONFIG_HIST_TRIGGERS
4640         "\t            enable_hist:<system>:<event>\n"
4641         "\t            disable_hist:<system>:<event>\n"
4642 #endif
4643 #ifdef CONFIG_STACKTRACE
4644         "\t\t    stacktrace\n"
4645 #endif
4646 #ifdef CONFIG_TRACER_SNAPSHOT
4647         "\t\t    snapshot\n"
4648 #endif
4649 #ifdef CONFIG_HIST_TRIGGERS
4650         "\t\t    hist (see below)\n"
4651 #endif
4652         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4653         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4654         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4655         "\t                  events/block/block_unplug/trigger\n"
4656         "\t   The first disables tracing every time block_unplug is hit.\n"
4657         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4658         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4659         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4660         "\t   Like function triggers, the counter is only decremented if it\n"
4661         "\t    enabled or disabled tracing.\n"
4662         "\t   To remove a trigger without a count:\n"
4663         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4664         "\t   To remove a trigger with a count:\n"
4665         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4666         "\t   Filters can be ignored when removing a trigger.\n"
4667 #ifdef CONFIG_HIST_TRIGGERS
4668         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4669         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4670         "\t            [:values=<field1[,field2,...]>]\n"
4671         "\t            [:sort=<field1[,field2,...]>]\n"
4672         "\t            [:size=#entries]\n"
4673         "\t            [:pause][:continue][:clear]\n"
4674         "\t            [:name=histname1]\n"
4675         "\t            [if <filter>]\n\n"
4676         "\t    When a matching event is hit, an entry is added to a hash\n"
4677         "\t    table using the key(s) and value(s) named, and the value of a\n"
4678         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4679         "\t    correspond to fields in the event's format description.  Keys\n"
4680         "\t    can be any field, or the special string 'stacktrace'.\n"
4681         "\t    Compound keys consisting of up to two fields can be specified\n"
4682         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4683         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4684         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4685         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4686         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4687         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4688         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4689         "\t    its histogram data will be shared with other triggers of the\n"
4690         "\t    same name, and trigger hits will update this common data.\n\n"
4691         "\t    Reading the 'hist' file for the event will dump the hash\n"
4692         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4693         "\t    triggers attached to an event, there will be a table for each\n"
4694         "\t    trigger in the output.  The table displayed for a named\n"
4695         "\t    trigger will be the same as any other instance having the\n"
4696         "\t    same name.  The default format used to display a given field\n"
4697         "\t    can be modified by appending any of the following modifiers\n"
4698         "\t    to the field name, as applicable:\n\n"
4699         "\t            .hex        display a number as a hex value\n"
4700         "\t            .sym        display an address as a symbol\n"
4701         "\t            .sym-offset display an address as a symbol and offset\n"
4702         "\t            .execname   display a common_pid as a program name\n"
4703         "\t            .syscall    display a syscall id as a syscall name\n"
4704         "\t            .log2       display log2 value rather than raw number\n"
4705         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4706         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4707         "\t    trigger or to start a hist trigger but not log any events\n"
4708         "\t    until told to do so.  'continue' can be used to start or\n"
4709         "\t    restart a paused hist trigger.\n\n"
4710         "\t    The 'clear' parameter will clear the contents of a running\n"
4711         "\t    hist trigger and leave its current paused/active state\n"
4712         "\t    unchanged.\n\n"
4713         "\t    The enable_hist and disable_hist triggers can be used to\n"
4714         "\t    have one event conditionally start and stop another event's\n"
4715         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4716         "\t    the enable_event and disable_event triggers.\n"
4717 #endif
4718 ;
4719
4720 static ssize_t
4721 tracing_readme_read(struct file *filp, char __user *ubuf,
4722                        size_t cnt, loff_t *ppos)
4723 {
4724         return simple_read_from_buffer(ubuf, cnt, ppos,
4725                                         readme_msg, strlen(readme_msg));
4726 }
4727
4728 static const struct file_operations tracing_readme_fops = {
4729         .open           = tracing_open_generic,
4730         .read           = tracing_readme_read,
4731         .llseek         = generic_file_llseek,
4732 };
4733
4734 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4735 {
4736         int *ptr = v;
4737
4738         if (*pos || m->count)
4739                 ptr++;
4740
4741         (*pos)++;
4742
4743         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4744                 if (trace_find_tgid(*ptr))
4745                         return ptr;
4746         }
4747
4748         return NULL;
4749 }
4750
4751 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4752 {
4753         void *v;
4754         loff_t l = 0;
4755
4756         if (!tgid_map)
4757                 return NULL;
4758
4759         v = &tgid_map[0];
4760         while (l <= *pos) {
4761                 v = saved_tgids_next(m, v, &l);
4762                 if (!v)
4763                         return NULL;
4764         }
4765
4766         return v;
4767 }
4768
4769 static void saved_tgids_stop(struct seq_file *m, void *v)
4770 {
4771 }
4772
4773 static int saved_tgids_show(struct seq_file *m, void *v)
4774 {
4775         int pid = (int *)v - tgid_map;
4776
4777         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4778         return 0;
4779 }
4780
4781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4782         .start          = saved_tgids_start,
4783         .stop           = saved_tgids_stop,
4784         .next           = saved_tgids_next,
4785         .show           = saved_tgids_show,
4786 };
4787
4788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4789 {
4790         if (tracing_disabled)
4791                 return -ENODEV;
4792
4793         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4794 }
4795
4796
4797 static const struct file_operations tracing_saved_tgids_fops = {
4798         .open           = tracing_saved_tgids_open,
4799         .read           = seq_read,
4800         .llseek         = seq_lseek,
4801         .release        = seq_release,
4802 };
4803
4804 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4805 {
4806         unsigned int *ptr = v;
4807
4808         if (*pos || m->count)
4809                 ptr++;
4810
4811         (*pos)++;
4812
4813         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4814              ptr++) {
4815                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4816                         continue;
4817
4818                 return ptr;
4819         }
4820
4821         return NULL;
4822 }
4823
4824 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4825 {
4826         void *v;
4827         loff_t l = 0;
4828
4829         preempt_disable();
4830         arch_spin_lock(&trace_cmdline_lock);
4831
4832         v = &savedcmd->map_cmdline_to_pid[0];
4833         while (l <= *pos) {
4834                 v = saved_cmdlines_next(m, v, &l);
4835                 if (!v)
4836                         return NULL;
4837         }
4838
4839         return v;
4840 }
4841
4842 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4843 {
4844         arch_spin_unlock(&trace_cmdline_lock);
4845         preempt_enable();
4846 }
4847
4848 static int saved_cmdlines_show(struct seq_file *m, void *v)
4849 {
4850         char buf[TASK_COMM_LEN];
4851         unsigned int *pid = v;
4852
4853         __trace_find_cmdline(*pid, buf);
4854         seq_printf(m, "%d %s\n", *pid, buf);
4855         return 0;
4856 }
4857
4858 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4859         .start          = saved_cmdlines_start,
4860         .next           = saved_cmdlines_next,
4861         .stop           = saved_cmdlines_stop,
4862         .show           = saved_cmdlines_show,
4863 };
4864
4865 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4866 {
4867         if (tracing_disabled)
4868                 return -ENODEV;
4869
4870         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4871 }
4872
4873 static const struct file_operations tracing_saved_cmdlines_fops = {
4874         .open           = tracing_saved_cmdlines_open,
4875         .read           = seq_read,
4876         .llseek         = seq_lseek,
4877         .release        = seq_release,
4878 };
4879
4880 static ssize_t
4881 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4882                                  size_t cnt, loff_t *ppos)
4883 {
4884         char buf[64];
4885         int r;
4886
4887         arch_spin_lock(&trace_cmdline_lock);
4888         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4889         arch_spin_unlock(&trace_cmdline_lock);
4890
4891         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4892 }
4893
4894 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4895 {
4896         kfree(s->saved_cmdlines);
4897         kfree(s->map_cmdline_to_pid);
4898         kfree(s);
4899 }
4900
4901 static int tracing_resize_saved_cmdlines(unsigned int val)
4902 {
4903         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4904
4905         s = kmalloc(sizeof(*s), GFP_KERNEL);
4906         if (!s)
4907                 return -ENOMEM;
4908
4909         if (allocate_cmdlines_buffer(val, s) < 0) {
4910                 kfree(s);
4911                 return -ENOMEM;
4912         }
4913
4914         arch_spin_lock(&trace_cmdline_lock);
4915         savedcmd_temp = savedcmd;
4916         savedcmd = s;
4917         arch_spin_unlock(&trace_cmdline_lock);
4918         free_saved_cmdlines_buffer(savedcmd_temp);
4919
4920         return 0;
4921 }
4922
4923 static ssize_t
4924 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4925                                   size_t cnt, loff_t *ppos)
4926 {
4927         unsigned long val;
4928         int ret;
4929
4930         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4931         if (ret)
4932                 return ret;
4933
4934         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4935         if (!val || val > PID_MAX_DEFAULT)
4936                 return -EINVAL;
4937
4938         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4939         if (ret < 0)
4940                 return ret;
4941
4942         *ppos += cnt;
4943
4944         return cnt;
4945 }
4946
4947 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4948         .open           = tracing_open_generic,
4949         .read           = tracing_saved_cmdlines_size_read,
4950         .write          = tracing_saved_cmdlines_size_write,
4951 };
4952
4953 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4954 static union trace_eval_map_item *
4955 update_eval_map(union trace_eval_map_item *ptr)
4956 {
4957         if (!ptr->map.eval_string) {
4958                 if (ptr->tail.next) {
4959                         ptr = ptr->tail.next;
4960                         /* Set ptr to the next real item (skip head) */
4961                         ptr++;
4962                 } else
4963                         return NULL;
4964         }
4965         return ptr;
4966 }
4967
4968 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4969 {
4970         union trace_eval_map_item *ptr = v;
4971
4972         /*
4973          * Paranoid! If ptr points to end, we don't want to increment past it.
4974          * This really should never happen.
4975          */
4976         ptr = update_eval_map(ptr);
4977         if (WARN_ON_ONCE(!ptr))
4978                 return NULL;
4979
4980         ptr++;
4981
4982         (*pos)++;
4983
4984         ptr = update_eval_map(ptr);
4985
4986         return ptr;
4987 }
4988
4989 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4990 {
4991         union trace_eval_map_item *v;
4992         loff_t l = 0;
4993
4994         mutex_lock(&trace_eval_mutex);
4995
4996         v = trace_eval_maps;
4997         if (v)
4998                 v++;
4999
5000         while (v && l < *pos) {
5001                 v = eval_map_next(m, v, &l);
5002         }
5003
5004         return v;
5005 }
5006
5007 static void eval_map_stop(struct seq_file *m, void *v)
5008 {
5009         mutex_unlock(&trace_eval_mutex);
5010 }
5011
5012 static int eval_map_show(struct seq_file *m, void *v)
5013 {
5014         union trace_eval_map_item *ptr = v;
5015
5016         seq_printf(m, "%s %ld (%s)\n",
5017                    ptr->map.eval_string, ptr->map.eval_value,
5018                    ptr->map.system);
5019
5020         return 0;
5021 }
5022
5023 static const struct seq_operations tracing_eval_map_seq_ops = {
5024         .start          = eval_map_start,
5025         .next           = eval_map_next,
5026         .stop           = eval_map_stop,
5027         .show           = eval_map_show,
5028 };
5029
5030 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5031 {
5032         if (tracing_disabled)
5033                 return -ENODEV;
5034
5035         return seq_open(filp, &tracing_eval_map_seq_ops);
5036 }
5037
5038 static const struct file_operations tracing_eval_map_fops = {
5039         .open           = tracing_eval_map_open,
5040         .read           = seq_read,
5041         .llseek         = seq_lseek,
5042         .release        = seq_release,
5043 };
5044
5045 static inline union trace_eval_map_item *
5046 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5047 {
5048         /* Return tail of array given the head */
5049         return ptr + ptr->head.length + 1;
5050 }
5051
5052 static void
5053 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5054                            int len)
5055 {
5056         struct trace_eval_map **stop;
5057         struct trace_eval_map **map;
5058         union trace_eval_map_item *map_array;
5059         union trace_eval_map_item *ptr;
5060
5061         stop = start + len;
5062
5063         /*
5064          * The trace_eval_maps contains the map plus a head and tail item,
5065          * where the head holds the module and length of array, and the
5066          * tail holds a pointer to the next list.
5067          */
5068         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5069         if (!map_array) {
5070                 pr_warn("Unable to allocate trace eval mapping\n");
5071                 return;
5072         }
5073
5074         mutex_lock(&trace_eval_mutex);
5075
5076         if (!trace_eval_maps)
5077                 trace_eval_maps = map_array;
5078         else {
5079                 ptr = trace_eval_maps;
5080                 for (;;) {
5081                         ptr = trace_eval_jmp_to_tail(ptr);
5082                         if (!ptr->tail.next)
5083                                 break;
5084                         ptr = ptr->tail.next;
5085
5086                 }
5087                 ptr->tail.next = map_array;
5088         }
5089         map_array->head.mod = mod;
5090         map_array->head.length = len;
5091         map_array++;
5092
5093         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5094                 map_array->map = **map;
5095                 map_array++;
5096         }
5097         memset(map_array, 0, sizeof(*map_array));
5098
5099         mutex_unlock(&trace_eval_mutex);
5100 }
5101
5102 static void trace_create_eval_file(struct dentry *d_tracer)
5103 {
5104         trace_create_file("eval_map", 0444, d_tracer,
5105                           NULL, &tracing_eval_map_fops);
5106 }
5107
5108 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5109 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5110 static inline void trace_insert_eval_map_file(struct module *mod,
5111                               struct trace_eval_map **start, int len) { }
5112 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5113
5114 static void trace_insert_eval_map(struct module *mod,
5115                                   struct trace_eval_map **start, int len)
5116 {
5117         struct trace_eval_map **map;
5118
5119         if (len <= 0)
5120                 return;
5121
5122         map = start;
5123
5124         trace_event_eval_update(map, len);
5125
5126         trace_insert_eval_map_file(mod, start, len);
5127 }
5128
5129 static ssize_t
5130 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5131                        size_t cnt, loff_t *ppos)
5132 {
5133         struct trace_array *tr = filp->private_data;
5134         char buf[MAX_TRACER_SIZE+2];
5135         int r;
5136
5137         mutex_lock(&trace_types_lock);
5138         r = sprintf(buf, "%s\n", tr->current_trace->name);
5139         mutex_unlock(&trace_types_lock);
5140
5141         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5142 }
5143
5144 int tracer_init(struct tracer *t, struct trace_array *tr)
5145 {
5146         tracing_reset_online_cpus(&tr->trace_buffer);
5147         return t->init(tr);
5148 }
5149
5150 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5151 {
5152         int cpu;
5153
5154         for_each_tracing_cpu(cpu)
5155                 per_cpu_ptr(buf->data, cpu)->entries = val;
5156 }
5157
5158 #ifdef CONFIG_TRACER_MAX_TRACE
5159 /* resize @tr's buffer to the size of @size_tr's entries */
5160 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5161                                         struct trace_buffer *size_buf, int cpu_id)
5162 {
5163         int cpu, ret = 0;
5164
5165         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5166                 for_each_tracing_cpu(cpu) {
5167                         ret = ring_buffer_resize(trace_buf->buffer,
5168                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5169                         if (ret < 0)
5170                                 break;
5171                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5172                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5173                 }
5174         } else {
5175                 ret = ring_buffer_resize(trace_buf->buffer,
5176                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5177                 if (ret == 0)
5178                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5179                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5180         }
5181
5182         return ret;
5183 }
5184 #endif /* CONFIG_TRACER_MAX_TRACE */
5185
5186 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5187                                         unsigned long size, int cpu)
5188 {
5189         int ret;
5190
5191         /*
5192          * If kernel or user changes the size of the ring buffer
5193          * we use the size that was given, and we can forget about
5194          * expanding it later.
5195          */
5196         ring_buffer_expanded = true;
5197
5198         /* May be called before buffers are initialized */
5199         if (!tr->trace_buffer.buffer)
5200                 return 0;
5201
5202         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5203         if (ret < 0)
5204                 return ret;
5205
5206 #ifdef CONFIG_TRACER_MAX_TRACE
5207         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5208             !tr->current_trace->use_max_tr)
5209                 goto out;
5210
5211         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5212         if (ret < 0) {
5213                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5214                                                      &tr->trace_buffer, cpu);
5215                 if (r < 0) {
5216                         /*
5217                          * AARGH! We are left with different
5218                          * size max buffer!!!!
5219                          * The max buffer is our "snapshot" buffer.
5220                          * When a tracer needs a snapshot (one of the
5221                          * latency tracers), it swaps the max buffer
5222                          * with the saved snap shot. We succeeded to
5223                          * update the size of the main buffer, but failed to
5224                          * update the size of the max buffer. But when we tried
5225                          * to reset the main buffer to the original size, we
5226                          * failed there too. This is very unlikely to
5227                          * happen, but if it does, warn and kill all
5228                          * tracing.
5229                          */
5230                         WARN_ON(1);
5231                         tracing_disabled = 1;
5232                 }
5233                 return ret;
5234         }
5235
5236         if (cpu == RING_BUFFER_ALL_CPUS)
5237                 set_buffer_entries(&tr->max_buffer, size);
5238         else
5239                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5240
5241  out:
5242 #endif /* CONFIG_TRACER_MAX_TRACE */
5243
5244         if (cpu == RING_BUFFER_ALL_CPUS)
5245                 set_buffer_entries(&tr->trace_buffer, size);
5246         else
5247                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5248
5249         return ret;
5250 }
5251
5252 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5253                                           unsigned long size, int cpu_id)
5254 {
5255         int ret = size;
5256
5257         mutex_lock(&trace_types_lock);
5258
5259         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5260                 /* make sure, this cpu is enabled in the mask */
5261                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5262                         ret = -EINVAL;
5263                         goto out;
5264                 }
5265         }
5266
5267         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5268         if (ret < 0)
5269                 ret = -ENOMEM;
5270
5271 out:
5272         mutex_unlock(&trace_types_lock);
5273
5274         return ret;
5275 }
5276
5277
5278 /**
5279  * tracing_update_buffers - used by tracing facility to expand ring buffers
5280  *
5281  * To save on memory when the tracing is never used on a system with it
5282  * configured in. The ring buffers are set to a minimum size. But once
5283  * a user starts to use the tracing facility, then they need to grow
5284  * to their default size.
5285  *
5286  * This function is to be called when a tracer is about to be used.
5287  */
5288 int tracing_update_buffers(void)
5289 {
5290         int ret = 0;
5291
5292         mutex_lock(&trace_types_lock);
5293         if (!ring_buffer_expanded)
5294                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5295                                                 RING_BUFFER_ALL_CPUS);
5296         mutex_unlock(&trace_types_lock);
5297
5298         return ret;
5299 }
5300
5301 struct trace_option_dentry;
5302
5303 static void
5304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5305
5306 /*
5307  * Used to clear out the tracer before deletion of an instance.
5308  * Must have trace_types_lock held.
5309  */
5310 static void tracing_set_nop(struct trace_array *tr)
5311 {
5312         if (tr->current_trace == &nop_trace)
5313                 return;
5314         
5315         tr->current_trace->enabled--;
5316
5317         if (tr->current_trace->reset)
5318                 tr->current_trace->reset(tr);
5319
5320         tr->current_trace = &nop_trace;
5321 }
5322
5323 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5324 {
5325         /* Only enable if the directory has been created already. */
5326         if (!tr->dir)
5327                 return;
5328
5329         create_trace_option_files(tr, t);
5330 }
5331
5332 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5333 {
5334         struct tracer *t;
5335 #ifdef CONFIG_TRACER_MAX_TRACE
5336         bool had_max_tr;
5337 #endif
5338         int ret = 0;
5339
5340         mutex_lock(&trace_types_lock);
5341
5342         if (!ring_buffer_expanded) {
5343                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5344                                                 RING_BUFFER_ALL_CPUS);
5345                 if (ret < 0)
5346                         goto out;
5347                 ret = 0;
5348         }
5349
5350         for (t = trace_types; t; t = t->next) {
5351                 if (strcmp(t->name, buf) == 0)
5352                         break;
5353         }
5354         if (!t) {
5355                 ret = -EINVAL;
5356                 goto out;
5357         }
5358         if (t == tr->current_trace)
5359                 goto out;
5360
5361         /* Some tracers won't work on kernel command line */
5362         if (system_state < SYSTEM_RUNNING && t->noboot) {
5363                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5364                         t->name);
5365                 goto out;
5366         }
5367
5368         /* Some tracers are only allowed for the top level buffer */
5369         if (!trace_ok_for_array(t, tr)) {
5370                 ret = -EINVAL;
5371                 goto out;
5372         }
5373
5374         /* If trace pipe files are being read, we can't change the tracer */
5375         if (tr->current_trace->ref) {
5376                 ret = -EBUSY;
5377                 goto out;
5378         }
5379
5380         trace_branch_disable();
5381
5382         tr->current_trace->enabled--;
5383
5384         if (tr->current_trace->reset)
5385                 tr->current_trace->reset(tr);
5386
5387         /* Current trace needs to be nop_trace before synchronize_sched */
5388         tr->current_trace = &nop_trace;
5389
5390 #ifdef CONFIG_TRACER_MAX_TRACE
5391         had_max_tr = tr->allocated_snapshot;
5392
5393         if (had_max_tr && !t->use_max_tr) {
5394                 /*
5395                  * We need to make sure that the update_max_tr sees that
5396                  * current_trace changed to nop_trace to keep it from
5397                  * swapping the buffers after we resize it.
5398                  * The update_max_tr is called from interrupts disabled
5399                  * so a synchronized_sched() is sufficient.
5400                  */
5401                 synchronize_sched();
5402                 free_snapshot(tr);
5403         }
5404 #endif
5405
5406 #ifdef CONFIG_TRACER_MAX_TRACE
5407         if (t->use_max_tr && !had_max_tr) {
5408                 ret = tracing_alloc_snapshot_instance(tr);
5409                 if (ret < 0)
5410                         goto out;
5411         }
5412 #endif
5413
5414         if (t->init) {
5415                 ret = tracer_init(t, tr);
5416                 if (ret)
5417                         goto out;
5418         }
5419
5420         tr->current_trace = t;
5421         tr->current_trace->enabled++;
5422         trace_branch_enable(tr);
5423  out:
5424         mutex_unlock(&trace_types_lock);
5425
5426         return ret;
5427 }
5428
5429 static ssize_t
5430 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5431                         size_t cnt, loff_t *ppos)
5432 {
5433         struct trace_array *tr = filp->private_data;
5434         char buf[MAX_TRACER_SIZE+1];
5435         int i;
5436         size_t ret;
5437         int err;
5438
5439         ret = cnt;
5440
5441         if (cnt > MAX_TRACER_SIZE)
5442                 cnt = MAX_TRACER_SIZE;
5443
5444         if (copy_from_user(buf, ubuf, cnt))
5445                 return -EFAULT;
5446
5447         buf[cnt] = 0;
5448
5449         /* strip ending whitespace. */
5450         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5451                 buf[i] = 0;
5452
5453         err = tracing_set_tracer(tr, buf);
5454         if (err)
5455                 return err;
5456
5457         *ppos += ret;
5458
5459         return ret;
5460 }
5461
5462 static ssize_t
5463 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5464                    size_t cnt, loff_t *ppos)
5465 {
5466         char buf[64];
5467         int r;
5468
5469         r = snprintf(buf, sizeof(buf), "%ld\n",
5470                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5471         if (r > sizeof(buf))
5472                 r = sizeof(buf);
5473         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5474 }
5475
5476 static ssize_t
5477 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5478                     size_t cnt, loff_t *ppos)
5479 {
5480         unsigned long val;
5481         int ret;
5482
5483         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5484         if (ret)
5485                 return ret;
5486
5487         *ptr = val * 1000;
5488
5489         return cnt;
5490 }
5491
5492 static ssize_t
5493 tracing_thresh_read(struct file *filp, char __user *ubuf,
5494                     size_t cnt, loff_t *ppos)
5495 {
5496         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5497 }
5498
5499 static ssize_t
5500 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5501                      size_t cnt, loff_t *ppos)
5502 {
5503         struct trace_array *tr = filp->private_data;
5504         int ret;
5505
5506         mutex_lock(&trace_types_lock);
5507         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5508         if (ret < 0)
5509                 goto out;
5510
5511         if (tr->current_trace->update_thresh) {
5512                 ret = tr->current_trace->update_thresh(tr);
5513                 if (ret < 0)
5514                         goto out;
5515         }
5516
5517         ret = cnt;
5518 out:
5519         mutex_unlock(&trace_types_lock);
5520
5521         return ret;
5522 }
5523
5524 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5525
5526 static ssize_t
5527 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5528                      size_t cnt, loff_t *ppos)
5529 {
5530         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5531 }
5532
5533 static ssize_t
5534 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5535                       size_t cnt, loff_t *ppos)
5536 {
5537         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5538 }
5539
5540 #endif
5541
5542 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5543 {
5544         struct trace_array *tr = inode->i_private;
5545         struct trace_iterator *iter;
5546         int ret = 0;
5547
5548         if (tracing_disabled)
5549                 return -ENODEV;
5550
5551         if (trace_array_get(tr) < 0)
5552                 return -ENODEV;
5553
5554         mutex_lock(&trace_types_lock);
5555
5556         /* create a buffer to store the information to pass to userspace */
5557         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5558         if (!iter) {
5559                 ret = -ENOMEM;
5560                 __trace_array_put(tr);
5561                 goto out;
5562         }
5563
5564         trace_seq_init(&iter->seq);
5565         iter->trace = tr->current_trace;
5566
5567         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5568                 ret = -ENOMEM;
5569                 goto fail;
5570         }
5571
5572         /* trace pipe does not show start of buffer */
5573         cpumask_setall(iter->started);
5574
5575         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5576                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5577
5578         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5579         if (trace_clocks[tr->clock_id].in_ns)
5580                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5581
5582         iter->tr = tr;
5583         iter->trace_buffer = &tr->trace_buffer;
5584         iter->cpu_file = tracing_get_cpu(inode);
5585         mutex_init(&iter->mutex);
5586         filp->private_data = iter;
5587
5588         if (iter->trace->pipe_open)
5589                 iter->trace->pipe_open(iter);
5590
5591         nonseekable_open(inode, filp);
5592
5593         tr->current_trace->ref++;
5594 out:
5595         mutex_unlock(&trace_types_lock);
5596         return ret;
5597
5598 fail:
5599         kfree(iter->trace);
5600         kfree(iter);
5601         __trace_array_put(tr);
5602         mutex_unlock(&trace_types_lock);
5603         return ret;
5604 }
5605
5606 static int tracing_release_pipe(struct inode *inode, struct file *file)
5607 {
5608         struct trace_iterator *iter = file->private_data;
5609         struct trace_array *tr = inode->i_private;
5610
5611         mutex_lock(&trace_types_lock);
5612
5613         tr->current_trace->ref--;
5614
5615         if (iter->trace->pipe_close)
5616                 iter->trace->pipe_close(iter);
5617
5618         mutex_unlock(&trace_types_lock);
5619
5620         free_cpumask_var(iter->started);
5621         mutex_destroy(&iter->mutex);
5622         kfree(iter);
5623
5624         trace_array_put(tr);
5625
5626         return 0;
5627 }
5628
5629 static __poll_t
5630 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5631 {
5632         struct trace_array *tr = iter->tr;
5633
5634         /* Iterators are static, they should be filled or empty */
5635         if (trace_buffer_iter(iter, iter->cpu_file))
5636                 return EPOLLIN | EPOLLRDNORM;
5637
5638         if (tr->trace_flags & TRACE_ITER_BLOCK)
5639                 /*
5640                  * Always select as readable when in blocking mode
5641                  */
5642                 return EPOLLIN | EPOLLRDNORM;
5643         else
5644                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5645                                              filp, poll_table);
5646 }
5647
5648 static __poll_t
5649 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5650 {
5651         struct trace_iterator *iter = filp->private_data;
5652
5653         return trace_poll(iter, filp, poll_table);
5654 }
5655
5656 /* Must be called with iter->mutex held. */
5657 static int tracing_wait_pipe(struct file *filp)
5658 {
5659         struct trace_iterator *iter = filp->private_data;
5660         int ret;
5661
5662         while (trace_empty(iter)) {
5663
5664                 if ((filp->f_flags & O_NONBLOCK)) {
5665                         return -EAGAIN;
5666                 }
5667
5668                 /*
5669                  * We block until we read something and tracing is disabled.
5670                  * We still block if tracing is disabled, but we have never
5671                  * read anything. This allows a user to cat this file, and
5672                  * then enable tracing. But after we have read something,
5673                  * we give an EOF when tracing is again disabled.
5674                  *
5675                  * iter->pos will be 0 if we haven't read anything.
5676                  */
5677                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5678                         break;
5679
5680                 mutex_unlock(&iter->mutex);
5681
5682                 ret = wait_on_pipe(iter, false);
5683
5684                 mutex_lock(&iter->mutex);
5685
5686                 if (ret)
5687                         return ret;
5688         }
5689
5690         return 1;
5691 }
5692
5693 /*
5694  * Consumer reader.
5695  */
5696 static ssize_t
5697 tracing_read_pipe(struct file *filp, char __user *ubuf,
5698                   size_t cnt, loff_t *ppos)
5699 {
5700         struct trace_iterator *iter = filp->private_data;
5701         ssize_t sret;
5702
5703         /*
5704          * Avoid more than one consumer on a single file descriptor
5705          * This is just a matter of traces coherency, the ring buffer itself
5706          * is protected.
5707          */
5708         mutex_lock(&iter->mutex);
5709
5710         /* return any leftover data */
5711         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5712         if (sret != -EBUSY)
5713                 goto out;
5714
5715         trace_seq_init(&iter->seq);
5716
5717         if (iter->trace->read) {
5718                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5719                 if (sret)
5720                         goto out;
5721         }
5722
5723 waitagain:
5724         sret = tracing_wait_pipe(filp);
5725         if (sret <= 0)
5726                 goto out;
5727
5728         /* stop when tracing is finished */
5729         if (trace_empty(iter)) {
5730                 sret = 0;
5731                 goto out;
5732         }
5733
5734         if (cnt >= PAGE_SIZE)
5735                 cnt = PAGE_SIZE - 1;
5736
5737         /* reset all but tr, trace, and overruns */
5738         memset(&iter->seq, 0,
5739                sizeof(struct trace_iterator) -
5740                offsetof(struct trace_iterator, seq));
5741         cpumask_clear(iter->started);
5742         iter->pos = -1;
5743
5744         trace_event_read_lock();
5745         trace_access_lock(iter->cpu_file);
5746         while (trace_find_next_entry_inc(iter) != NULL) {
5747                 enum print_line_t ret;
5748                 int save_len = iter->seq.seq.len;
5749
5750                 ret = print_trace_line(iter);
5751                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5752                         /* don't print partial lines */
5753                         iter->seq.seq.len = save_len;
5754                         break;
5755                 }
5756                 if (ret != TRACE_TYPE_NO_CONSUME)
5757                         trace_consume(iter);
5758
5759                 if (trace_seq_used(&iter->seq) >= cnt)
5760                         break;
5761
5762                 /*
5763                  * Setting the full flag means we reached the trace_seq buffer
5764                  * size and we should leave by partial output condition above.
5765                  * One of the trace_seq_* functions is not used properly.
5766                  */
5767                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5768                           iter->ent->type);
5769         }
5770         trace_access_unlock(iter->cpu_file);
5771         trace_event_read_unlock();
5772
5773         /* Now copy what we have to the user */
5774         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5775         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5776                 trace_seq_init(&iter->seq);
5777
5778         /*
5779          * If there was nothing to send to user, in spite of consuming trace
5780          * entries, go back to wait for more entries.
5781          */
5782         if (sret == -EBUSY)
5783                 goto waitagain;
5784
5785 out:
5786         mutex_unlock(&iter->mutex);
5787
5788         return sret;
5789 }
5790
5791 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5792                                      unsigned int idx)
5793 {
5794         __free_page(spd->pages[idx]);
5795 }
5796
5797 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5798         .can_merge              = 0,
5799         .confirm                = generic_pipe_buf_confirm,
5800         .release                = generic_pipe_buf_release,
5801         .steal                  = generic_pipe_buf_steal,
5802         .get                    = generic_pipe_buf_get,
5803 };
5804
5805 static size_t
5806 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5807 {
5808         size_t count;
5809         int save_len;
5810         int ret;
5811
5812         /* Seq buffer is page-sized, exactly what we need. */
5813         for (;;) {
5814                 save_len = iter->seq.seq.len;
5815                 ret = print_trace_line(iter);
5816
5817                 if (trace_seq_has_overflowed(&iter->seq)) {
5818                         iter->seq.seq.len = save_len;
5819                         break;
5820                 }
5821
5822                 /*
5823                  * This should not be hit, because it should only
5824                  * be set if the iter->seq overflowed. But check it
5825                  * anyway to be safe.
5826                  */
5827                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5828                         iter->seq.seq.len = save_len;
5829                         break;
5830                 }
5831
5832                 count = trace_seq_used(&iter->seq) - save_len;
5833                 if (rem < count) {
5834                         rem = 0;
5835                         iter->seq.seq.len = save_len;
5836                         break;
5837                 }
5838
5839                 if (ret != TRACE_TYPE_NO_CONSUME)
5840                         trace_consume(iter);
5841                 rem -= count;
5842                 if (!trace_find_next_entry_inc(iter))   {
5843                         rem = 0;
5844                         iter->ent = NULL;
5845                         break;
5846                 }
5847         }
5848
5849         return rem;
5850 }
5851
5852 static ssize_t tracing_splice_read_pipe(struct file *filp,
5853                                         loff_t *ppos,
5854                                         struct pipe_inode_info *pipe,
5855                                         size_t len,
5856                                         unsigned int flags)
5857 {
5858         struct page *pages_def[PIPE_DEF_BUFFERS];
5859         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5860         struct trace_iterator *iter = filp->private_data;
5861         struct splice_pipe_desc spd = {
5862                 .pages          = pages_def,
5863                 .partial        = partial_def,
5864                 .nr_pages       = 0, /* This gets updated below. */
5865                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5866                 .ops            = &tracing_pipe_buf_ops,
5867                 .spd_release    = tracing_spd_release_pipe,
5868         };
5869         ssize_t ret;
5870         size_t rem;
5871         unsigned int i;
5872
5873         if (splice_grow_spd(pipe, &spd))
5874                 return -ENOMEM;
5875
5876         mutex_lock(&iter->mutex);
5877
5878         if (iter->trace->splice_read) {
5879                 ret = iter->trace->splice_read(iter, filp,
5880                                                ppos, pipe, len, flags);
5881                 if (ret)
5882                         goto out_err;
5883         }
5884
5885         ret = tracing_wait_pipe(filp);
5886         if (ret <= 0)
5887                 goto out_err;
5888
5889         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5890                 ret = -EFAULT;
5891                 goto out_err;
5892         }
5893
5894         trace_event_read_lock();
5895         trace_access_lock(iter->cpu_file);
5896
5897         /* Fill as many pages as possible. */
5898         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5899                 spd.pages[i] = alloc_page(GFP_KERNEL);
5900                 if (!spd.pages[i])
5901                         break;
5902
5903                 rem = tracing_fill_pipe_page(rem, iter);
5904
5905                 /* Copy the data into the page, so we can start over. */
5906                 ret = trace_seq_to_buffer(&iter->seq,
5907                                           page_address(spd.pages[i]),
5908                                           trace_seq_used(&iter->seq));
5909                 if (ret < 0) {
5910                         __free_page(spd.pages[i]);
5911                         break;
5912                 }
5913                 spd.partial[i].offset = 0;
5914                 spd.partial[i].len = trace_seq_used(&iter->seq);
5915
5916                 trace_seq_init(&iter->seq);
5917         }
5918
5919         trace_access_unlock(iter->cpu_file);
5920         trace_event_read_unlock();
5921         mutex_unlock(&iter->mutex);
5922
5923         spd.nr_pages = i;
5924
5925         if (i)
5926                 ret = splice_to_pipe(pipe, &spd);
5927         else
5928                 ret = 0;
5929 out:
5930         splice_shrink_spd(&spd);
5931         return ret;
5932
5933 out_err:
5934         mutex_unlock(&iter->mutex);
5935         goto out;
5936 }
5937
5938 static ssize_t
5939 tracing_entries_read(struct file *filp, char __user *ubuf,
5940                      size_t cnt, loff_t *ppos)
5941 {
5942         struct inode *inode = file_inode(filp);
5943         struct trace_array *tr = inode->i_private;
5944         int cpu = tracing_get_cpu(inode);
5945         char buf[64];
5946         int r = 0;
5947         ssize_t ret;
5948
5949         mutex_lock(&trace_types_lock);
5950
5951         if (cpu == RING_BUFFER_ALL_CPUS) {
5952                 int cpu, buf_size_same;
5953                 unsigned long size;
5954
5955                 size = 0;
5956                 buf_size_same = 1;
5957                 /* check if all cpu sizes are same */
5958                 for_each_tracing_cpu(cpu) {
5959                         /* fill in the size from first enabled cpu */
5960                         if (size == 0)
5961                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5962                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5963                                 buf_size_same = 0;
5964                                 break;
5965                         }
5966                 }
5967
5968                 if (buf_size_same) {
5969                         if (!ring_buffer_expanded)
5970                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5971                                             size >> 10,
5972                                             trace_buf_size >> 10);
5973                         else
5974                                 r = sprintf(buf, "%lu\n", size >> 10);
5975                 } else
5976                         r = sprintf(buf, "X\n");
5977         } else
5978                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5979
5980         mutex_unlock(&trace_types_lock);
5981
5982         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5983         return ret;
5984 }
5985
5986 static ssize_t
5987 tracing_entries_write(struct file *filp, const char __user *ubuf,
5988                       size_t cnt, loff_t *ppos)
5989 {
5990         struct inode *inode = file_inode(filp);
5991         struct trace_array *tr = inode->i_private;
5992         unsigned long val;
5993         int ret;
5994
5995         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5996         if (ret)
5997                 return ret;
5998
5999         /* must have at least 1 entry */
6000         if (!val)
6001                 return -EINVAL;
6002
6003         /* value is in KB */
6004         val <<= 10;
6005         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6006         if (ret < 0)
6007                 return ret;
6008
6009         *ppos += cnt;
6010
6011         return cnt;
6012 }
6013
6014 static ssize_t
6015 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6016                                 size_t cnt, loff_t *ppos)
6017 {
6018         struct trace_array *tr = filp->private_data;
6019         char buf[64];
6020         int r, cpu;
6021         unsigned long size = 0, expanded_size = 0;
6022
6023         mutex_lock(&trace_types_lock);
6024         for_each_tracing_cpu(cpu) {
6025                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6026                 if (!ring_buffer_expanded)
6027                         expanded_size += trace_buf_size >> 10;
6028         }
6029         if (ring_buffer_expanded)
6030                 r = sprintf(buf, "%lu\n", size);
6031         else
6032                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6033         mutex_unlock(&trace_types_lock);
6034
6035         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6036 }
6037
6038 static ssize_t
6039 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6040                           size_t cnt, loff_t *ppos)
6041 {
6042         /*
6043          * There is no need to read what the user has written, this function
6044          * is just to make sure that there is no error when "echo" is used
6045          */
6046
6047         *ppos += cnt;
6048
6049         return cnt;
6050 }
6051
6052 static int
6053 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6054 {
6055         struct trace_array *tr = inode->i_private;
6056
6057         /* disable tracing ? */
6058         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6059                 tracer_tracing_off(tr);
6060         /* resize the ring buffer to 0 */
6061         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6062
6063         trace_array_put(tr);
6064
6065         return 0;
6066 }
6067
6068 static ssize_t
6069 tracing_mark_write(struct file *filp, const char __user *ubuf,
6070                                         size_t cnt, loff_t *fpos)
6071 {
6072         struct trace_array *tr = filp->private_data;
6073         struct ring_buffer_event *event;
6074         enum event_trigger_type tt = ETT_NONE;
6075         struct ring_buffer *buffer;
6076         struct print_entry *entry;
6077         unsigned long irq_flags;
6078         const char faulted[] = "<faulted>";
6079         ssize_t written;
6080         int size;
6081         int len;
6082
6083 /* Used in tracing_mark_raw_write() as well */
6084 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6085
6086         if (tracing_disabled)
6087                 return -EINVAL;
6088
6089         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6090                 return -EINVAL;
6091
6092         if (cnt > TRACE_BUF_SIZE)
6093                 cnt = TRACE_BUF_SIZE;
6094
6095         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6096
6097         local_save_flags(irq_flags);
6098         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6099
6100         /* If less than "<faulted>", then make sure we can still add that */
6101         if (cnt < FAULTED_SIZE)
6102                 size += FAULTED_SIZE - cnt;
6103
6104         buffer = tr->trace_buffer.buffer;
6105         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6106                                             irq_flags, preempt_count());
6107         if (unlikely(!event))
6108                 /* Ring buffer disabled, return as if not open for write */
6109                 return -EBADF;
6110
6111         entry = ring_buffer_event_data(event);
6112         entry->ip = _THIS_IP_;
6113
6114         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6115         if (len) {
6116                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6117                 cnt = FAULTED_SIZE;
6118                 written = -EFAULT;
6119         } else
6120                 written = cnt;
6121         len = cnt;
6122
6123         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6124                 /* do not add \n before testing triggers, but add \0 */
6125                 entry->buf[cnt] = '\0';
6126                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6127         }
6128
6129         if (entry->buf[cnt - 1] != '\n') {
6130                 entry->buf[cnt] = '\n';
6131                 entry->buf[cnt + 1] = '\0';
6132         } else
6133                 entry->buf[cnt] = '\0';
6134
6135         __buffer_unlock_commit(buffer, event);
6136
6137         if (tt)
6138                 event_triggers_post_call(tr->trace_marker_file, tt);
6139
6140         if (written > 0)
6141                 *fpos += written;
6142
6143         return written;
6144 }
6145
6146 /* Limit it for now to 3K (including tag) */
6147 #define RAW_DATA_MAX_SIZE (1024*3)
6148
6149 static ssize_t
6150 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6151                                         size_t cnt, loff_t *fpos)
6152 {
6153         struct trace_array *tr = filp->private_data;
6154         struct ring_buffer_event *event;
6155         struct ring_buffer *buffer;
6156         struct raw_data_entry *entry;
6157         const char faulted[] = "<faulted>";
6158         unsigned long irq_flags;
6159         ssize_t written;
6160         int size;
6161         int len;
6162
6163 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6164
6165         if (tracing_disabled)
6166                 return -EINVAL;
6167
6168         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6169                 return -EINVAL;
6170
6171         /* The marker must at least have a tag id */
6172         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6173                 return -EINVAL;
6174
6175         if (cnt > TRACE_BUF_SIZE)
6176                 cnt = TRACE_BUF_SIZE;
6177
6178         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6179
6180         local_save_flags(irq_flags);
6181         size = sizeof(*entry) + cnt;
6182         if (cnt < FAULT_SIZE_ID)
6183                 size += FAULT_SIZE_ID - cnt;
6184
6185         buffer = tr->trace_buffer.buffer;
6186         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6187                                             irq_flags, preempt_count());
6188         if (!event)
6189                 /* Ring buffer disabled, return as if not open for write */
6190                 return -EBADF;
6191
6192         entry = ring_buffer_event_data(event);
6193
6194         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6195         if (len) {
6196                 entry->id = -1;
6197                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6198                 written = -EFAULT;
6199         } else
6200                 written = cnt;
6201
6202         __buffer_unlock_commit(buffer, event);
6203
6204         if (written > 0)
6205                 *fpos += written;
6206
6207         return written;
6208 }
6209
6210 static int tracing_clock_show(struct seq_file *m, void *v)
6211 {
6212         struct trace_array *tr = m->private;
6213         int i;
6214
6215         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6216                 seq_printf(m,
6217                         "%s%s%s%s", i ? " " : "",
6218                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6219                         i == tr->clock_id ? "]" : "");
6220         seq_putc(m, '\n');
6221
6222         return 0;
6223 }
6224
6225 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6226 {
6227         int i;
6228
6229         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6230                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6231                         break;
6232         }
6233         if (i == ARRAY_SIZE(trace_clocks))
6234                 return -EINVAL;
6235
6236         mutex_lock(&trace_types_lock);
6237
6238         tr->clock_id = i;
6239
6240         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6241
6242         /*
6243          * New clock may not be consistent with the previous clock.
6244          * Reset the buffer so that it doesn't have incomparable timestamps.
6245          */
6246         tracing_reset_online_cpus(&tr->trace_buffer);
6247
6248 #ifdef CONFIG_TRACER_MAX_TRACE
6249         if (tr->max_buffer.buffer)
6250                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6251         tracing_reset_online_cpus(&tr->max_buffer);
6252 #endif
6253
6254         mutex_unlock(&trace_types_lock);
6255
6256         return 0;
6257 }
6258
6259 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6260                                    size_t cnt, loff_t *fpos)
6261 {
6262         struct seq_file *m = filp->private_data;
6263         struct trace_array *tr = m->private;
6264         char buf[64];
6265         const char *clockstr;
6266         int ret;
6267
6268         if (cnt >= sizeof(buf))
6269                 return -EINVAL;
6270
6271         if (copy_from_user(buf, ubuf, cnt))
6272                 return -EFAULT;
6273
6274         buf[cnt] = 0;
6275
6276         clockstr = strstrip(buf);
6277
6278         ret = tracing_set_clock(tr, clockstr);
6279         if (ret)
6280                 return ret;
6281
6282         *fpos += cnt;
6283
6284         return cnt;
6285 }
6286
6287 static int tracing_clock_open(struct inode *inode, struct file *file)
6288 {
6289         struct trace_array *tr = inode->i_private;
6290         int ret;
6291
6292         if (tracing_disabled)
6293                 return -ENODEV;
6294
6295         if (trace_array_get(tr))
6296                 return -ENODEV;
6297
6298         ret = single_open(file, tracing_clock_show, inode->i_private);
6299         if (ret < 0)
6300                 trace_array_put(tr);
6301
6302         return ret;
6303 }
6304
6305 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6306 {
6307         struct trace_array *tr = m->private;
6308
6309         mutex_lock(&trace_types_lock);
6310
6311         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6312                 seq_puts(m, "delta [absolute]\n");
6313         else
6314                 seq_puts(m, "[delta] absolute\n");
6315
6316         mutex_unlock(&trace_types_lock);
6317
6318         return 0;
6319 }
6320
6321 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6322 {
6323         struct trace_array *tr = inode->i_private;
6324         int ret;
6325
6326         if (tracing_disabled)
6327                 return -ENODEV;
6328
6329         if (trace_array_get(tr))
6330                 return -ENODEV;
6331
6332         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6333         if (ret < 0)
6334                 trace_array_put(tr);
6335
6336         return ret;
6337 }
6338
6339 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6340 {
6341         int ret = 0;
6342
6343         mutex_lock(&trace_types_lock);
6344
6345         if (abs && tr->time_stamp_abs_ref++)
6346                 goto out;
6347
6348         if (!abs) {
6349                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6350                         ret = -EINVAL;
6351                         goto out;
6352                 }
6353
6354                 if (--tr->time_stamp_abs_ref)
6355                         goto out;
6356         }
6357
6358         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6359
6360 #ifdef CONFIG_TRACER_MAX_TRACE
6361         if (tr->max_buffer.buffer)
6362                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6363 #endif
6364  out:
6365         mutex_unlock(&trace_types_lock);
6366
6367         return ret;
6368 }
6369
6370 struct ftrace_buffer_info {
6371         struct trace_iterator   iter;
6372         void                    *spare;
6373         unsigned int            spare_cpu;
6374         unsigned int            read;
6375 };
6376
6377 #ifdef CONFIG_TRACER_SNAPSHOT
6378 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6379 {
6380         struct trace_array *tr = inode->i_private;
6381         struct trace_iterator *iter;
6382         struct seq_file *m;
6383         int ret = 0;
6384
6385         if (trace_array_get(tr) < 0)
6386                 return -ENODEV;
6387
6388         if (file->f_mode & FMODE_READ) {
6389                 iter = __tracing_open(inode, file, true);
6390                 if (IS_ERR(iter))
6391                         ret = PTR_ERR(iter);
6392         } else {
6393                 /* Writes still need the seq_file to hold the private data */
6394                 ret = -ENOMEM;
6395                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6396                 if (!m)
6397                         goto out;
6398                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6399                 if (!iter) {
6400                         kfree(m);
6401                         goto out;
6402                 }
6403                 ret = 0;
6404
6405                 iter->tr = tr;
6406                 iter->trace_buffer = &tr->max_buffer;
6407                 iter->cpu_file = tracing_get_cpu(inode);
6408                 m->private = iter;
6409                 file->private_data = m;
6410         }
6411 out:
6412         if (ret < 0)
6413                 trace_array_put(tr);
6414
6415         return ret;
6416 }
6417
6418 static ssize_t
6419 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6420                        loff_t *ppos)
6421 {
6422         struct seq_file *m = filp->private_data;
6423         struct trace_iterator *iter = m->private;
6424         struct trace_array *tr = iter->tr;
6425         unsigned long val;
6426         int ret;
6427
6428         ret = tracing_update_buffers();
6429         if (ret < 0)
6430                 return ret;
6431
6432         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6433         if (ret)
6434                 return ret;
6435
6436         mutex_lock(&trace_types_lock);
6437
6438         if (tr->current_trace->use_max_tr) {
6439                 ret = -EBUSY;
6440                 goto out;
6441         }
6442
6443         switch (val) {
6444         case 0:
6445                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6446                         ret = -EINVAL;
6447                         break;
6448                 }
6449                 if (tr->allocated_snapshot)
6450                         free_snapshot(tr);
6451                 break;
6452         case 1:
6453 /* Only allow per-cpu swap if the ring buffer supports it */
6454 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6455                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6456                         ret = -EINVAL;
6457                         break;
6458                 }
6459 #endif
6460                 if (!tr->allocated_snapshot) {
6461                         ret = tracing_alloc_snapshot_instance(tr);
6462                         if (ret < 0)
6463                                 break;
6464                 }
6465                 local_irq_disable();
6466                 /* Now, we're going to swap */
6467                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6468                         update_max_tr(tr, current, smp_processor_id());
6469                 else
6470                         update_max_tr_single(tr, current, iter->cpu_file);
6471                 local_irq_enable();
6472                 break;
6473         default:
6474                 if (tr->allocated_snapshot) {
6475                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6476                                 tracing_reset_online_cpus(&tr->max_buffer);
6477                         else
6478                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6479                 }
6480                 break;
6481         }
6482
6483         if (ret >= 0) {
6484                 *ppos += cnt;
6485                 ret = cnt;
6486         }
6487 out:
6488         mutex_unlock(&trace_types_lock);
6489         return ret;
6490 }
6491
6492 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6493 {
6494         struct seq_file *m = file->private_data;
6495         int ret;
6496
6497         ret = tracing_release(inode, file);
6498
6499         if (file->f_mode & FMODE_READ)
6500                 return ret;
6501
6502         /* If write only, the seq_file is just a stub */
6503         if (m)
6504                 kfree(m->private);
6505         kfree(m);
6506
6507         return 0;
6508 }
6509
6510 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6511 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6512                                     size_t count, loff_t *ppos);
6513 static int tracing_buffers_release(struct inode *inode, struct file *file);
6514 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6515                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6516
6517 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6518 {
6519         struct ftrace_buffer_info *info;
6520         int ret;
6521
6522         ret = tracing_buffers_open(inode, filp);
6523         if (ret < 0)
6524                 return ret;
6525
6526         info = filp->private_data;
6527
6528         if (info->iter.trace->use_max_tr) {
6529                 tracing_buffers_release(inode, filp);
6530                 return -EBUSY;
6531         }
6532
6533         info->iter.snapshot = true;
6534         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6535
6536         return ret;
6537 }
6538
6539 #endif /* CONFIG_TRACER_SNAPSHOT */
6540
6541
6542 static const struct file_operations tracing_thresh_fops = {
6543         .open           = tracing_open_generic,
6544         .read           = tracing_thresh_read,
6545         .write          = tracing_thresh_write,
6546         .llseek         = generic_file_llseek,
6547 };
6548
6549 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6550 static const struct file_operations tracing_max_lat_fops = {
6551         .open           = tracing_open_generic,
6552         .read           = tracing_max_lat_read,
6553         .write          = tracing_max_lat_write,
6554         .llseek         = generic_file_llseek,
6555 };
6556 #endif
6557
6558 static const struct file_operations set_tracer_fops = {
6559         .open           = tracing_open_generic,
6560         .read           = tracing_set_trace_read,
6561         .write          = tracing_set_trace_write,
6562         .llseek         = generic_file_llseek,
6563 };
6564
6565 static const struct file_operations tracing_pipe_fops = {
6566         .open           = tracing_open_pipe,
6567         .poll           = tracing_poll_pipe,
6568         .read           = tracing_read_pipe,
6569         .splice_read    = tracing_splice_read_pipe,
6570         .release        = tracing_release_pipe,
6571         .llseek         = no_llseek,
6572 };
6573
6574 static const struct file_operations tracing_entries_fops = {
6575         .open           = tracing_open_generic_tr,
6576         .read           = tracing_entries_read,
6577         .write          = tracing_entries_write,
6578         .llseek         = generic_file_llseek,
6579         .release        = tracing_release_generic_tr,
6580 };
6581
6582 static const struct file_operations tracing_total_entries_fops = {
6583         .open           = tracing_open_generic_tr,
6584         .read           = tracing_total_entries_read,
6585         .llseek         = generic_file_llseek,
6586         .release        = tracing_release_generic_tr,
6587 };
6588
6589 static const struct file_operations tracing_free_buffer_fops = {
6590         .open           = tracing_open_generic_tr,
6591         .write          = tracing_free_buffer_write,
6592         .release        = tracing_free_buffer_release,
6593 };
6594
6595 static const struct file_operations tracing_mark_fops = {
6596         .open           = tracing_open_generic_tr,
6597         .write          = tracing_mark_write,
6598         .llseek         = generic_file_llseek,
6599         .release        = tracing_release_generic_tr,
6600 };
6601
6602 static const struct file_operations tracing_mark_raw_fops = {
6603         .open           = tracing_open_generic_tr,
6604         .write          = tracing_mark_raw_write,
6605         .llseek         = generic_file_llseek,
6606         .release        = tracing_release_generic_tr,
6607 };
6608
6609 static const struct file_operations trace_clock_fops = {
6610         .open           = tracing_clock_open,
6611         .read           = seq_read,
6612         .llseek         = seq_lseek,
6613         .release        = tracing_single_release_tr,
6614         .write          = tracing_clock_write,
6615 };
6616
6617 static const struct file_operations trace_time_stamp_mode_fops = {
6618         .open           = tracing_time_stamp_mode_open,
6619         .read           = seq_read,
6620         .llseek         = seq_lseek,
6621         .release        = tracing_single_release_tr,
6622 };
6623
6624 #ifdef CONFIG_TRACER_SNAPSHOT
6625 static const struct file_operations snapshot_fops = {
6626         .open           = tracing_snapshot_open,
6627         .read           = seq_read,
6628         .write          = tracing_snapshot_write,
6629         .llseek         = tracing_lseek,
6630         .release        = tracing_snapshot_release,
6631 };
6632
6633 static const struct file_operations snapshot_raw_fops = {
6634         .open           = snapshot_raw_open,
6635         .read           = tracing_buffers_read,
6636         .release        = tracing_buffers_release,
6637         .splice_read    = tracing_buffers_splice_read,
6638         .llseek         = no_llseek,
6639 };
6640
6641 #endif /* CONFIG_TRACER_SNAPSHOT */
6642
6643 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6644 {
6645         struct trace_array *tr = inode->i_private;
6646         struct ftrace_buffer_info *info;
6647         int ret;
6648
6649         if (tracing_disabled)
6650                 return -ENODEV;
6651
6652         if (trace_array_get(tr) < 0)
6653                 return -ENODEV;
6654
6655         info = kzalloc(sizeof(*info), GFP_KERNEL);
6656         if (!info) {
6657                 trace_array_put(tr);
6658                 return -ENOMEM;
6659         }
6660
6661         mutex_lock(&trace_types_lock);
6662
6663         info->iter.tr           = tr;
6664         info->iter.cpu_file     = tracing_get_cpu(inode);
6665         info->iter.trace        = tr->current_trace;
6666         info->iter.trace_buffer = &tr->trace_buffer;
6667         info->spare             = NULL;
6668         /* Force reading ring buffer for first read */
6669         info->read              = (unsigned int)-1;
6670
6671         filp->private_data = info;
6672
6673         tr->current_trace->ref++;
6674
6675         mutex_unlock(&trace_types_lock);
6676
6677         ret = nonseekable_open(inode, filp);
6678         if (ret < 0)
6679                 trace_array_put(tr);
6680
6681         return ret;
6682 }
6683
6684 static __poll_t
6685 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6686 {
6687         struct ftrace_buffer_info *info = filp->private_data;
6688         struct trace_iterator *iter = &info->iter;
6689
6690         return trace_poll(iter, filp, poll_table);
6691 }
6692
6693 static ssize_t
6694 tracing_buffers_read(struct file *filp, char __user *ubuf,
6695                      size_t count, loff_t *ppos)
6696 {
6697         struct ftrace_buffer_info *info = filp->private_data;
6698         struct trace_iterator *iter = &info->iter;
6699         ssize_t ret = 0;
6700         ssize_t size;
6701
6702         if (!count)
6703                 return 0;
6704
6705 #ifdef CONFIG_TRACER_MAX_TRACE
6706         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6707                 return -EBUSY;
6708 #endif
6709
6710         if (!info->spare) {
6711                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6712                                                           iter->cpu_file);
6713                 if (IS_ERR(info->spare)) {
6714                         ret = PTR_ERR(info->spare);
6715                         info->spare = NULL;
6716                 } else {
6717                         info->spare_cpu = iter->cpu_file;
6718                 }
6719         }
6720         if (!info->spare)
6721                 return ret;
6722
6723         /* Do we have previous read data to read? */
6724         if (info->read < PAGE_SIZE)
6725                 goto read;
6726
6727  again:
6728         trace_access_lock(iter->cpu_file);
6729         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6730                                     &info->spare,
6731                                     count,
6732                                     iter->cpu_file, 0);
6733         trace_access_unlock(iter->cpu_file);
6734
6735         if (ret < 0) {
6736                 if (trace_empty(iter)) {
6737                         if ((filp->f_flags & O_NONBLOCK))
6738                                 return -EAGAIN;
6739
6740                         ret = wait_on_pipe(iter, false);
6741                         if (ret)
6742                                 return ret;
6743
6744                         goto again;
6745                 }
6746                 return 0;
6747         }
6748
6749         info->read = 0;
6750  read:
6751         size = PAGE_SIZE - info->read;
6752         if (size > count)
6753                 size = count;
6754
6755         ret = copy_to_user(ubuf, info->spare + info->read, size);
6756         if (ret == size)
6757                 return -EFAULT;
6758
6759         size -= ret;
6760
6761         *ppos += size;
6762         info->read += size;
6763
6764         return size;
6765 }
6766
6767 static int tracing_buffers_release(struct inode *inode, struct file *file)
6768 {
6769         struct ftrace_buffer_info *info = file->private_data;
6770         struct trace_iterator *iter = &info->iter;
6771
6772         mutex_lock(&trace_types_lock);
6773
6774         iter->tr->current_trace->ref--;
6775
6776         __trace_array_put(iter->tr);
6777
6778         if (info->spare)
6779                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6780                                            info->spare_cpu, info->spare);
6781         kfree(info);
6782
6783         mutex_unlock(&trace_types_lock);
6784
6785         return 0;
6786 }
6787
6788 struct buffer_ref {
6789         struct ring_buffer      *buffer;
6790         void                    *page;
6791         int                     cpu;
6792         int                     ref;
6793 };
6794
6795 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6796                                     struct pipe_buffer *buf)
6797 {
6798         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6799
6800         if (--ref->ref)
6801                 return;
6802
6803         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6804         kfree(ref);
6805         buf->private = 0;
6806 }
6807
6808 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6809                                 struct pipe_buffer *buf)
6810 {
6811         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6812
6813         ref->ref++;
6814 }
6815
6816 /* Pipe buffer operations for a buffer. */
6817 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6818         .can_merge              = 0,
6819         .confirm                = generic_pipe_buf_confirm,
6820         .release                = buffer_pipe_buf_release,
6821         .steal                  = generic_pipe_buf_steal,
6822         .get                    = buffer_pipe_buf_get,
6823 };
6824
6825 /*
6826  * Callback from splice_to_pipe(), if we need to release some pages
6827  * at the end of the spd in case we error'ed out in filling the pipe.
6828  */
6829 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6830 {
6831         struct buffer_ref *ref =
6832                 (struct buffer_ref *)spd->partial[i].private;
6833
6834         if (--ref->ref)
6835                 return;
6836
6837         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6838         kfree(ref);
6839         spd->partial[i].private = 0;
6840 }
6841
6842 static ssize_t
6843 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6844                             struct pipe_inode_info *pipe, size_t len,
6845                             unsigned int flags)
6846 {
6847         struct ftrace_buffer_info *info = file->private_data;
6848         struct trace_iterator *iter = &info->iter;
6849         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6850         struct page *pages_def[PIPE_DEF_BUFFERS];
6851         struct splice_pipe_desc spd = {
6852                 .pages          = pages_def,
6853                 .partial        = partial_def,
6854                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6855                 .ops            = &buffer_pipe_buf_ops,
6856                 .spd_release    = buffer_spd_release,
6857         };
6858         struct buffer_ref *ref;
6859         int entries, i;
6860         ssize_t ret = 0;
6861
6862 #ifdef CONFIG_TRACER_MAX_TRACE
6863         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6864                 return -EBUSY;
6865 #endif
6866
6867         if (*ppos & (PAGE_SIZE - 1))
6868                 return -EINVAL;
6869
6870         if (len & (PAGE_SIZE - 1)) {
6871                 if (len < PAGE_SIZE)
6872                         return -EINVAL;
6873                 len &= PAGE_MASK;
6874         }
6875
6876         if (splice_grow_spd(pipe, &spd))
6877                 return -ENOMEM;
6878
6879  again:
6880         trace_access_lock(iter->cpu_file);
6881         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6882
6883         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6884                 struct page *page;
6885                 int r;
6886
6887                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6888                 if (!ref) {
6889                         ret = -ENOMEM;
6890                         break;
6891                 }
6892
6893                 ref->ref = 1;
6894                 ref->buffer = iter->trace_buffer->buffer;
6895                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6896                 if (IS_ERR(ref->page)) {
6897                         ret = PTR_ERR(ref->page);
6898                         ref->page = NULL;
6899                         kfree(ref);
6900                         break;
6901                 }
6902                 ref->cpu = iter->cpu_file;
6903
6904                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6905                                           len, iter->cpu_file, 1);
6906                 if (r < 0) {
6907                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6908                                                    ref->page);
6909                         kfree(ref);
6910                         break;
6911                 }
6912
6913                 page = virt_to_page(ref->page);
6914
6915                 spd.pages[i] = page;
6916                 spd.partial[i].len = PAGE_SIZE;
6917                 spd.partial[i].offset = 0;
6918                 spd.partial[i].private = (unsigned long)ref;
6919                 spd.nr_pages++;
6920                 *ppos += PAGE_SIZE;
6921
6922                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6923         }
6924
6925         trace_access_unlock(iter->cpu_file);
6926         spd.nr_pages = i;
6927
6928         /* did we read anything? */
6929         if (!spd.nr_pages) {
6930                 if (ret)
6931                         goto out;
6932
6933                 ret = -EAGAIN;
6934                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6935                         goto out;
6936
6937                 ret = wait_on_pipe(iter, true);
6938                 if (ret)
6939                         goto out;
6940
6941                 goto again;
6942         }
6943
6944         ret = splice_to_pipe(pipe, &spd);
6945 out:
6946         splice_shrink_spd(&spd);
6947
6948         return ret;
6949 }
6950
6951 static const struct file_operations tracing_buffers_fops = {
6952         .open           = tracing_buffers_open,
6953         .read           = tracing_buffers_read,
6954         .poll           = tracing_buffers_poll,
6955         .release        = tracing_buffers_release,
6956         .splice_read    = tracing_buffers_splice_read,
6957         .llseek         = no_llseek,
6958 };
6959
6960 static ssize_t
6961 tracing_stats_read(struct file *filp, char __user *ubuf,
6962                    size_t count, loff_t *ppos)
6963 {
6964         struct inode *inode = file_inode(filp);
6965         struct trace_array *tr = inode->i_private;
6966         struct trace_buffer *trace_buf = &tr->trace_buffer;
6967         int cpu = tracing_get_cpu(inode);
6968         struct trace_seq *s;
6969         unsigned long cnt;
6970         unsigned long long t;
6971         unsigned long usec_rem;
6972
6973         s = kmalloc(sizeof(*s), GFP_KERNEL);
6974         if (!s)
6975                 return -ENOMEM;
6976
6977         trace_seq_init(s);
6978
6979         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6980         trace_seq_printf(s, "entries: %ld\n", cnt);
6981
6982         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6983         trace_seq_printf(s, "overrun: %ld\n", cnt);
6984
6985         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6986         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6987
6988         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6989         trace_seq_printf(s, "bytes: %ld\n", cnt);
6990
6991         if (trace_clocks[tr->clock_id].in_ns) {
6992                 /* local or global for trace_clock */
6993                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6994                 usec_rem = do_div(t, USEC_PER_SEC);
6995                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6996                                                                 t, usec_rem);
6997
6998                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6999                 usec_rem = do_div(t, USEC_PER_SEC);
7000                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7001         } else {
7002                 /* counter or tsc mode for trace_clock */
7003                 trace_seq_printf(s, "oldest event ts: %llu\n",
7004                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7005
7006                 trace_seq_printf(s, "now ts: %llu\n",
7007                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7008         }
7009
7010         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7011         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7012
7013         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7014         trace_seq_printf(s, "read events: %ld\n", cnt);
7015
7016         count = simple_read_from_buffer(ubuf, count, ppos,
7017                                         s->buffer, trace_seq_used(s));
7018
7019         kfree(s);
7020
7021         return count;
7022 }
7023
7024 static const struct file_operations tracing_stats_fops = {
7025         .open           = tracing_open_generic_tr,
7026         .read           = tracing_stats_read,
7027         .llseek         = generic_file_llseek,
7028         .release        = tracing_release_generic_tr,
7029 };
7030
7031 #ifdef CONFIG_DYNAMIC_FTRACE
7032
7033 static ssize_t
7034 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7035                   size_t cnt, loff_t *ppos)
7036 {
7037         unsigned long *p = filp->private_data;
7038         char buf[64]; /* Not too big for a shallow stack */
7039         int r;
7040
7041         r = scnprintf(buf, 63, "%ld", *p);
7042         buf[r++] = '\n';
7043
7044         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7045 }
7046
7047 static const struct file_operations tracing_dyn_info_fops = {
7048         .open           = tracing_open_generic,
7049         .read           = tracing_read_dyn_info,
7050         .llseek         = generic_file_llseek,
7051 };
7052 #endif /* CONFIG_DYNAMIC_FTRACE */
7053
7054 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7055 static void
7056 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7057                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7058                 void *data)
7059 {
7060         tracing_snapshot_instance(tr);
7061 }
7062
7063 static void
7064 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7065                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7066                       void *data)
7067 {
7068         struct ftrace_func_mapper *mapper = data;
7069         long *count = NULL;
7070
7071         if (mapper)
7072                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7073
7074         if (count) {
7075
7076                 if (*count <= 0)
7077                         return;
7078
7079                 (*count)--;
7080         }
7081
7082         tracing_snapshot_instance(tr);
7083 }
7084
7085 static int
7086 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7087                       struct ftrace_probe_ops *ops, void *data)
7088 {
7089         struct ftrace_func_mapper *mapper = data;
7090         long *count = NULL;
7091
7092         seq_printf(m, "%ps:", (void *)ip);
7093
7094         seq_puts(m, "snapshot");
7095
7096         if (mapper)
7097                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7098
7099         if (count)
7100                 seq_printf(m, ":count=%ld\n", *count);
7101         else
7102                 seq_puts(m, ":unlimited\n");
7103
7104         return 0;
7105 }
7106
7107 static int
7108 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7109                      unsigned long ip, void *init_data, void **data)
7110 {
7111         struct ftrace_func_mapper *mapper = *data;
7112
7113         if (!mapper) {
7114                 mapper = allocate_ftrace_func_mapper();
7115                 if (!mapper)
7116                         return -ENOMEM;
7117                 *data = mapper;
7118         }
7119
7120         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7121 }
7122
7123 static void
7124 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7125                      unsigned long ip, void *data)
7126 {
7127         struct ftrace_func_mapper *mapper = data;
7128
7129         if (!ip) {
7130                 if (!mapper)
7131                         return;
7132                 free_ftrace_func_mapper(mapper, NULL);
7133                 return;
7134         }
7135
7136         ftrace_func_mapper_remove_ip(mapper, ip);
7137 }
7138
7139 static struct ftrace_probe_ops snapshot_probe_ops = {
7140         .func                   = ftrace_snapshot,
7141         .print                  = ftrace_snapshot_print,
7142 };
7143
7144 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7145         .func                   = ftrace_count_snapshot,
7146         .print                  = ftrace_snapshot_print,
7147         .init                   = ftrace_snapshot_init,
7148         .free                   = ftrace_snapshot_free,
7149 };
7150
7151 static int
7152 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7153                                char *glob, char *cmd, char *param, int enable)
7154 {
7155         struct ftrace_probe_ops *ops;
7156         void *count = (void *)-1;
7157         char *number;
7158         int ret;
7159
7160         if (!tr)
7161                 return -ENODEV;
7162
7163         /* hash funcs only work with set_ftrace_filter */
7164         if (!enable)
7165                 return -EINVAL;
7166
7167         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7168
7169         if (glob[0] == '!')
7170                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7171
7172         if (!param)
7173                 goto out_reg;
7174
7175         number = strsep(&param, ":");
7176
7177         if (!strlen(number))
7178                 goto out_reg;
7179
7180         /*
7181          * We use the callback data field (which is a pointer)
7182          * as our counter.
7183          */
7184         ret = kstrtoul(number, 0, (unsigned long *)&count);
7185         if (ret)
7186                 return ret;
7187
7188  out_reg:
7189         ret = tracing_alloc_snapshot_instance(tr);
7190         if (ret < 0)
7191                 goto out;
7192
7193         ret = register_ftrace_function_probe(glob, tr, ops, count);
7194
7195  out:
7196         return ret < 0 ? ret : 0;
7197 }
7198
7199 static struct ftrace_func_command ftrace_snapshot_cmd = {
7200         .name                   = "snapshot",
7201         .func                   = ftrace_trace_snapshot_callback,
7202 };
7203
7204 static __init int register_snapshot_cmd(void)
7205 {
7206         return register_ftrace_command(&ftrace_snapshot_cmd);
7207 }
7208 #else
7209 static inline __init int register_snapshot_cmd(void) { return 0; }
7210 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7211
7212 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7213 {
7214         if (WARN_ON(!tr->dir))
7215                 return ERR_PTR(-ENODEV);
7216
7217         /* Top directory uses NULL as the parent */
7218         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7219                 return NULL;
7220
7221         /* All sub buffers have a descriptor */
7222         return tr->dir;
7223 }
7224
7225 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7226 {
7227         struct dentry *d_tracer;
7228
7229         if (tr->percpu_dir)
7230                 return tr->percpu_dir;
7231
7232         d_tracer = tracing_get_dentry(tr);
7233         if (IS_ERR(d_tracer))
7234                 return NULL;
7235
7236         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7237
7238         WARN_ONCE(!tr->percpu_dir,
7239                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7240
7241         return tr->percpu_dir;
7242 }
7243
7244 static struct dentry *
7245 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7246                       void *data, long cpu, const struct file_operations *fops)
7247 {
7248         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7249
7250         if (ret) /* See tracing_get_cpu() */
7251                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7252         return ret;
7253 }
7254
7255 static void
7256 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7257 {
7258         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7259         struct dentry *d_cpu;
7260         char cpu_dir[30]; /* 30 characters should be more than enough */
7261
7262         if (!d_percpu)
7263                 return;
7264
7265         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7266         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7267         if (!d_cpu) {
7268                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7269                 return;
7270         }
7271
7272         /* per cpu trace_pipe */
7273         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7274                                 tr, cpu, &tracing_pipe_fops);
7275
7276         /* per cpu trace */
7277         trace_create_cpu_file("trace", 0644, d_cpu,
7278                                 tr, cpu, &tracing_fops);
7279
7280         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7281                                 tr, cpu, &tracing_buffers_fops);
7282
7283         trace_create_cpu_file("stats", 0444, d_cpu,
7284                                 tr, cpu, &tracing_stats_fops);
7285
7286         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7287                                 tr, cpu, &tracing_entries_fops);
7288
7289 #ifdef CONFIG_TRACER_SNAPSHOT
7290         trace_create_cpu_file("snapshot", 0644, d_cpu,
7291                                 tr, cpu, &snapshot_fops);
7292
7293         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7294                                 tr, cpu, &snapshot_raw_fops);
7295 #endif
7296 }
7297
7298 #ifdef CONFIG_FTRACE_SELFTEST
7299 /* Let selftest have access to static functions in this file */
7300 #include "trace_selftest.c"
7301 #endif
7302
7303 static ssize_t
7304 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7305                         loff_t *ppos)
7306 {
7307         struct trace_option_dentry *topt = filp->private_data;
7308         char *buf;
7309
7310         if (topt->flags->val & topt->opt->bit)
7311                 buf = "1\n";
7312         else
7313                 buf = "0\n";
7314
7315         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7316 }
7317
7318 static ssize_t
7319 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7320                          loff_t *ppos)
7321 {
7322         struct trace_option_dentry *topt = filp->private_data;
7323         unsigned long val;
7324         int ret;
7325
7326         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7327         if (ret)
7328                 return ret;
7329
7330         if (val != 0 && val != 1)
7331                 return -EINVAL;
7332
7333         if (!!(topt->flags->val & topt->opt->bit) != val) {
7334                 mutex_lock(&trace_types_lock);
7335                 ret = __set_tracer_option(topt->tr, topt->flags,
7336                                           topt->opt, !val);
7337                 mutex_unlock(&trace_types_lock);
7338                 if (ret)
7339                         return ret;
7340         }
7341
7342         *ppos += cnt;
7343
7344         return cnt;
7345 }
7346
7347
7348 static const struct file_operations trace_options_fops = {
7349         .open = tracing_open_generic,
7350         .read = trace_options_read,
7351         .write = trace_options_write,
7352         .llseek = generic_file_llseek,
7353 };
7354
7355 /*
7356  * In order to pass in both the trace_array descriptor as well as the index
7357  * to the flag that the trace option file represents, the trace_array
7358  * has a character array of trace_flags_index[], which holds the index
7359  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7360  * The address of this character array is passed to the flag option file
7361  * read/write callbacks.
7362  *
7363  * In order to extract both the index and the trace_array descriptor,
7364  * get_tr_index() uses the following algorithm.
7365  *
7366  *   idx = *ptr;
7367  *
7368  * As the pointer itself contains the address of the index (remember
7369  * index[1] == 1).
7370  *
7371  * Then to get the trace_array descriptor, by subtracting that index
7372  * from the ptr, we get to the start of the index itself.
7373  *
7374  *   ptr - idx == &index[0]
7375  *
7376  * Then a simple container_of() from that pointer gets us to the
7377  * trace_array descriptor.
7378  */
7379 static void get_tr_index(void *data, struct trace_array **ptr,
7380                          unsigned int *pindex)
7381 {
7382         *pindex = *(unsigned char *)data;
7383
7384         *ptr = container_of(data - *pindex, struct trace_array,
7385                             trace_flags_index);
7386 }
7387
7388 static ssize_t
7389 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7390                         loff_t *ppos)
7391 {
7392         void *tr_index = filp->private_data;
7393         struct trace_array *tr;
7394         unsigned int index;
7395         char *buf;
7396
7397         get_tr_index(tr_index, &tr, &index);
7398
7399         if (tr->trace_flags & (1 << index))
7400                 buf = "1\n";
7401         else
7402                 buf = "0\n";
7403
7404         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7405 }
7406
7407 static ssize_t
7408 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7409                          loff_t *ppos)
7410 {
7411         void *tr_index = filp->private_data;
7412         struct trace_array *tr;
7413         unsigned int index;
7414         unsigned long val;
7415         int ret;
7416
7417         get_tr_index(tr_index, &tr, &index);
7418
7419         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7420         if (ret)
7421                 return ret;
7422
7423         if (val != 0 && val != 1)
7424                 return -EINVAL;
7425
7426         mutex_lock(&trace_types_lock);
7427         ret = set_tracer_flag(tr, 1 << index, val);
7428         mutex_unlock(&trace_types_lock);
7429
7430         if (ret < 0)
7431                 return ret;
7432
7433         *ppos += cnt;
7434
7435         return cnt;
7436 }
7437
7438 static const struct file_operations trace_options_core_fops = {
7439         .open = tracing_open_generic,
7440         .read = trace_options_core_read,
7441         .write = trace_options_core_write,
7442         .llseek = generic_file_llseek,
7443 };
7444
7445 struct dentry *trace_create_file(const char *name,
7446                                  umode_t mode,
7447                                  struct dentry *parent,
7448                                  void *data,
7449                                  const struct file_operations *fops)
7450 {
7451         struct dentry *ret;
7452
7453         ret = tracefs_create_file(name, mode, parent, data, fops);
7454         if (!ret)
7455                 pr_warn("Could not create tracefs '%s' entry\n", name);
7456
7457         return ret;
7458 }
7459
7460
7461 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7462 {
7463         struct dentry *d_tracer;
7464
7465         if (tr->options)
7466                 return tr->options;
7467
7468         d_tracer = tracing_get_dentry(tr);
7469         if (IS_ERR(d_tracer))
7470                 return NULL;
7471
7472         tr->options = tracefs_create_dir("options", d_tracer);
7473         if (!tr->options) {
7474                 pr_warn("Could not create tracefs directory 'options'\n");
7475                 return NULL;
7476         }
7477
7478         return tr->options;
7479 }
7480
7481 static void
7482 create_trace_option_file(struct trace_array *tr,
7483                          struct trace_option_dentry *topt,
7484                          struct tracer_flags *flags,
7485                          struct tracer_opt *opt)
7486 {
7487         struct dentry *t_options;
7488
7489         t_options = trace_options_init_dentry(tr);
7490         if (!t_options)
7491                 return;
7492
7493         topt->flags = flags;
7494         topt->opt = opt;
7495         topt->tr = tr;
7496
7497         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7498                                     &trace_options_fops);
7499
7500 }
7501
7502 static void
7503 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7504 {
7505         struct trace_option_dentry *topts;
7506         struct trace_options *tr_topts;
7507         struct tracer_flags *flags;
7508         struct tracer_opt *opts;
7509         int cnt;
7510         int i;
7511
7512         if (!tracer)
7513                 return;
7514
7515         flags = tracer->flags;
7516
7517         if (!flags || !flags->opts)
7518                 return;
7519
7520         /*
7521          * If this is an instance, only create flags for tracers
7522          * the instance may have.
7523          */
7524         if (!trace_ok_for_array(tracer, tr))
7525                 return;
7526
7527         for (i = 0; i < tr->nr_topts; i++) {
7528                 /* Make sure there's no duplicate flags. */
7529                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7530                         return;
7531         }
7532
7533         opts = flags->opts;
7534
7535         for (cnt = 0; opts[cnt].name; cnt++)
7536                 ;
7537
7538         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7539         if (!topts)
7540                 return;
7541
7542         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7543                             GFP_KERNEL);
7544         if (!tr_topts) {
7545                 kfree(topts);
7546                 return;
7547         }
7548
7549         tr->topts = tr_topts;
7550         tr->topts[tr->nr_topts].tracer = tracer;
7551         tr->topts[tr->nr_topts].topts = topts;
7552         tr->nr_topts++;
7553
7554         for (cnt = 0; opts[cnt].name; cnt++) {
7555                 create_trace_option_file(tr, &topts[cnt], flags,
7556                                          &opts[cnt]);
7557                 WARN_ONCE(topts[cnt].entry == NULL,
7558                           "Failed to create trace option: %s",
7559                           opts[cnt].name);
7560         }
7561 }
7562
7563 static struct dentry *
7564 create_trace_option_core_file(struct trace_array *tr,
7565                               const char *option, long index)
7566 {
7567         struct dentry *t_options;
7568
7569         t_options = trace_options_init_dentry(tr);
7570         if (!t_options)
7571                 return NULL;
7572
7573         return trace_create_file(option, 0644, t_options,
7574                                  (void *)&tr->trace_flags_index[index],
7575                                  &trace_options_core_fops);
7576 }
7577
7578 static void create_trace_options_dir(struct trace_array *tr)
7579 {
7580         struct dentry *t_options;
7581         bool top_level = tr == &global_trace;
7582         int i;
7583
7584         t_options = trace_options_init_dentry(tr);
7585         if (!t_options)
7586                 return;
7587
7588         for (i = 0; trace_options[i]; i++) {
7589                 if (top_level ||
7590                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7591                         create_trace_option_core_file(tr, trace_options[i], i);
7592         }
7593 }
7594
7595 static ssize_t
7596 rb_simple_read(struct file *filp, char __user *ubuf,
7597                size_t cnt, loff_t *ppos)
7598 {
7599         struct trace_array *tr = filp->private_data;
7600         char buf[64];
7601         int r;
7602
7603         r = tracer_tracing_is_on(tr);
7604         r = sprintf(buf, "%d\n", r);
7605
7606         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7607 }
7608
7609 static ssize_t
7610 rb_simple_write(struct file *filp, const char __user *ubuf,
7611                 size_t cnt, loff_t *ppos)
7612 {
7613         struct trace_array *tr = filp->private_data;
7614         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7615         unsigned long val;
7616         int ret;
7617
7618         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7619         if (ret)
7620                 return ret;
7621
7622         if (buffer) {
7623                 mutex_lock(&trace_types_lock);
7624                 if (val) {
7625                         tracer_tracing_on(tr);
7626                         if (tr->current_trace->start)
7627                                 tr->current_trace->start(tr);
7628                 } else {
7629                         tracer_tracing_off(tr);
7630                         if (tr->current_trace->stop)
7631                                 tr->current_trace->stop(tr);
7632                 }
7633                 mutex_unlock(&trace_types_lock);
7634         }
7635
7636         (*ppos)++;
7637
7638         return cnt;
7639 }
7640
7641 static const struct file_operations rb_simple_fops = {
7642         .open           = tracing_open_generic_tr,
7643         .read           = rb_simple_read,
7644         .write          = rb_simple_write,
7645         .release        = tracing_release_generic_tr,
7646         .llseek         = default_llseek,
7647 };
7648
7649 struct dentry *trace_instance_dir;
7650
7651 static void
7652 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7653
7654 static int
7655 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7656 {
7657         enum ring_buffer_flags rb_flags;
7658
7659         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7660
7661         buf->tr = tr;
7662
7663         buf->buffer = ring_buffer_alloc(size, rb_flags);
7664         if (!buf->buffer)
7665                 return -ENOMEM;
7666
7667         buf->data = alloc_percpu(struct trace_array_cpu);
7668         if (!buf->data) {
7669                 ring_buffer_free(buf->buffer);
7670                 buf->buffer = NULL;
7671                 return -ENOMEM;
7672         }
7673
7674         /* Allocate the first page for all buffers */
7675         set_buffer_entries(&tr->trace_buffer,
7676                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7677
7678         return 0;
7679 }
7680
7681 static int allocate_trace_buffers(struct trace_array *tr, int size)
7682 {
7683         int ret;
7684
7685         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7686         if (ret)
7687                 return ret;
7688
7689 #ifdef CONFIG_TRACER_MAX_TRACE
7690         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7691                                     allocate_snapshot ? size : 1);
7692         if (WARN_ON(ret)) {
7693                 ring_buffer_free(tr->trace_buffer.buffer);
7694                 tr->trace_buffer.buffer = NULL;
7695                 free_percpu(tr->trace_buffer.data);
7696                 tr->trace_buffer.data = NULL;
7697                 return -ENOMEM;
7698         }
7699         tr->allocated_snapshot = allocate_snapshot;
7700
7701         /*
7702          * Only the top level trace array gets its snapshot allocated
7703          * from the kernel command line.
7704          */
7705         allocate_snapshot = false;
7706 #endif
7707         return 0;
7708 }
7709
7710 static void free_trace_buffer(struct trace_buffer *buf)
7711 {
7712         if (buf->buffer) {
7713                 ring_buffer_free(buf->buffer);
7714                 buf->buffer = NULL;
7715                 free_percpu(buf->data);
7716                 buf->data = NULL;
7717         }
7718 }
7719
7720 static void free_trace_buffers(struct trace_array *tr)
7721 {
7722         if (!tr)
7723                 return;
7724
7725         free_trace_buffer(&tr->trace_buffer);
7726
7727 #ifdef CONFIG_TRACER_MAX_TRACE
7728         free_trace_buffer(&tr->max_buffer);
7729 #endif
7730 }
7731
7732 static void init_trace_flags_index(struct trace_array *tr)
7733 {
7734         int i;
7735
7736         /* Used by the trace options files */
7737         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7738                 tr->trace_flags_index[i] = i;
7739 }
7740
7741 static void __update_tracer_options(struct trace_array *tr)
7742 {
7743         struct tracer *t;
7744
7745         for (t = trace_types; t; t = t->next)
7746                 add_tracer_options(tr, t);
7747 }
7748
7749 static void update_tracer_options(struct trace_array *tr)
7750 {
7751         mutex_lock(&trace_types_lock);
7752         __update_tracer_options(tr);
7753         mutex_unlock(&trace_types_lock);
7754 }
7755
7756 static int instance_mkdir(const char *name)
7757 {
7758         struct trace_array *tr;
7759         int ret;
7760
7761         mutex_lock(&event_mutex);
7762         mutex_lock(&trace_types_lock);
7763
7764         ret = -EEXIST;
7765         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7766                 if (tr->name && strcmp(tr->name, name) == 0)
7767                         goto out_unlock;
7768         }
7769
7770         ret = -ENOMEM;
7771         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7772         if (!tr)
7773                 goto out_unlock;
7774
7775         tr->name = kstrdup(name, GFP_KERNEL);
7776         if (!tr->name)
7777                 goto out_free_tr;
7778
7779         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7780                 goto out_free_tr;
7781
7782         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7783
7784         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7785
7786         raw_spin_lock_init(&tr->start_lock);
7787
7788         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7789
7790         tr->current_trace = &nop_trace;
7791
7792         INIT_LIST_HEAD(&tr->systems);
7793         INIT_LIST_HEAD(&tr->events);
7794         INIT_LIST_HEAD(&tr->hist_vars);
7795
7796         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7797                 goto out_free_tr;
7798
7799         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7800         if (!tr->dir)
7801                 goto out_free_tr;
7802
7803         ret = event_trace_add_tracer(tr->dir, tr);
7804         if (ret) {
7805                 tracefs_remove_recursive(tr->dir);
7806                 goto out_free_tr;
7807         }
7808
7809         ftrace_init_trace_array(tr);
7810
7811         init_tracer_tracefs(tr, tr->dir);
7812         init_trace_flags_index(tr);
7813         __update_tracer_options(tr);
7814
7815         list_add(&tr->list, &ftrace_trace_arrays);
7816
7817         mutex_unlock(&trace_types_lock);
7818         mutex_unlock(&event_mutex);
7819
7820         return 0;
7821
7822  out_free_tr:
7823         free_trace_buffers(tr);
7824         free_cpumask_var(tr->tracing_cpumask);
7825         kfree(tr->name);
7826         kfree(tr);
7827
7828  out_unlock:
7829         mutex_unlock(&trace_types_lock);
7830         mutex_unlock(&event_mutex);
7831
7832         return ret;
7833
7834 }
7835
7836 static int instance_rmdir(const char *name)
7837 {
7838         struct trace_array *tr;
7839         int found = 0;
7840         int ret;
7841         int i;
7842
7843         mutex_lock(&event_mutex);
7844         mutex_lock(&trace_types_lock);
7845
7846         ret = -ENODEV;
7847         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7848                 if (tr->name && strcmp(tr->name, name) == 0) {
7849                         found = 1;
7850                         break;
7851                 }
7852         }
7853         if (!found)
7854                 goto out_unlock;
7855
7856         ret = -EBUSY;
7857         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7858                 goto out_unlock;
7859
7860         list_del(&tr->list);
7861
7862         /* Disable all the flags that were enabled coming in */
7863         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7864                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7865                         set_tracer_flag(tr, 1 << i, 0);
7866         }
7867
7868         tracing_set_nop(tr);
7869         clear_ftrace_function_probes(tr);
7870         event_trace_del_tracer(tr);
7871         ftrace_clear_pids(tr);
7872         ftrace_destroy_function_files(tr);
7873         tracefs_remove_recursive(tr->dir);
7874         free_trace_buffers(tr);
7875
7876         for (i = 0; i < tr->nr_topts; i++) {
7877                 kfree(tr->topts[i].topts);
7878         }
7879         kfree(tr->topts);
7880
7881         free_cpumask_var(tr->tracing_cpumask);
7882         kfree(tr->name);
7883         kfree(tr);
7884
7885         ret = 0;
7886
7887  out_unlock:
7888         mutex_unlock(&trace_types_lock);
7889         mutex_unlock(&event_mutex);
7890
7891         return ret;
7892 }
7893
7894 static __init void create_trace_instances(struct dentry *d_tracer)
7895 {
7896         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7897                                                          instance_mkdir,
7898                                                          instance_rmdir);
7899         if (WARN_ON(!trace_instance_dir))
7900                 return;
7901 }
7902
7903 static void
7904 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7905 {
7906         struct trace_event_file *file;
7907         int cpu;
7908
7909         trace_create_file("available_tracers", 0444, d_tracer,
7910                         tr, &show_traces_fops);
7911
7912         trace_create_file("current_tracer", 0644, d_tracer,
7913                         tr, &set_tracer_fops);
7914
7915         trace_create_file("tracing_cpumask", 0644, d_tracer,
7916                           tr, &tracing_cpumask_fops);
7917
7918         trace_create_file("trace_options", 0644, d_tracer,
7919                           tr, &tracing_iter_fops);
7920
7921         trace_create_file("trace", 0644, d_tracer,
7922                           tr, &tracing_fops);
7923
7924         trace_create_file("trace_pipe", 0444, d_tracer,
7925                           tr, &tracing_pipe_fops);
7926
7927         trace_create_file("buffer_size_kb", 0644, d_tracer,
7928                           tr, &tracing_entries_fops);
7929
7930         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7931                           tr, &tracing_total_entries_fops);
7932
7933         trace_create_file("free_buffer", 0200, d_tracer,
7934                           tr, &tracing_free_buffer_fops);
7935
7936         trace_create_file("trace_marker", 0220, d_tracer,
7937                           tr, &tracing_mark_fops);
7938
7939         file = __find_event_file(tr, "ftrace", "print");
7940         if (file && file->dir)
7941                 trace_create_file("trigger", 0644, file->dir, file,
7942                                   &event_trigger_fops);
7943         tr->trace_marker_file = file;
7944
7945         trace_create_file("trace_marker_raw", 0220, d_tracer,
7946                           tr, &tracing_mark_raw_fops);
7947
7948         trace_create_file("trace_clock", 0644, d_tracer, tr,
7949                           &trace_clock_fops);
7950
7951         trace_create_file("tracing_on", 0644, d_tracer,
7952                           tr, &rb_simple_fops);
7953
7954         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7955                           &trace_time_stamp_mode_fops);
7956
7957         create_trace_options_dir(tr);
7958
7959 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7960         trace_create_file("tracing_max_latency", 0644, d_tracer,
7961                         &tr->max_latency, &tracing_max_lat_fops);
7962 #endif
7963
7964         if (ftrace_create_function_files(tr, d_tracer))
7965                 WARN(1, "Could not allocate function filter files");
7966
7967 #ifdef CONFIG_TRACER_SNAPSHOT
7968         trace_create_file("snapshot", 0644, d_tracer,
7969                           tr, &snapshot_fops);
7970 #endif
7971
7972         for_each_tracing_cpu(cpu)
7973                 tracing_init_tracefs_percpu(tr, cpu);
7974
7975         ftrace_init_tracefs(tr, d_tracer);
7976 }
7977
7978 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7979 {
7980         struct vfsmount *mnt;
7981         struct file_system_type *type;
7982
7983         /*
7984          * To maintain backward compatibility for tools that mount
7985          * debugfs to get to the tracing facility, tracefs is automatically
7986          * mounted to the debugfs/tracing directory.
7987          */
7988         type = get_fs_type("tracefs");
7989         if (!type)
7990                 return NULL;
7991         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7992         put_filesystem(type);
7993         if (IS_ERR(mnt))
7994                 return NULL;
7995         mntget(mnt);
7996
7997         return mnt;
7998 }
7999
8000 /**
8001  * tracing_init_dentry - initialize top level trace array
8002  *
8003  * This is called when creating files or directories in the tracing
8004  * directory. It is called via fs_initcall() by any of the boot up code
8005  * and expects to return the dentry of the top level tracing directory.
8006  */
8007 struct dentry *tracing_init_dentry(void)
8008 {
8009         struct trace_array *tr = &global_trace;
8010
8011         /* The top level trace array uses  NULL as parent */
8012         if (tr->dir)
8013                 return NULL;
8014
8015         if (WARN_ON(!tracefs_initialized()) ||
8016                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8017                  WARN_ON(!debugfs_initialized())))
8018                 return ERR_PTR(-ENODEV);
8019
8020         /*
8021          * As there may still be users that expect the tracing
8022          * files to exist in debugfs/tracing, we must automount
8023          * the tracefs file system there, so older tools still
8024          * work with the newer kerenl.
8025          */
8026         tr->dir = debugfs_create_automount("tracing", NULL,
8027                                            trace_automount, NULL);
8028         if (!tr->dir) {
8029                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8030                 return ERR_PTR(-ENOMEM);
8031         }
8032
8033         return NULL;
8034 }
8035
8036 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8037 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8038
8039 static void __init trace_eval_init(void)
8040 {
8041         int len;
8042
8043         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8044         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8045 }
8046
8047 #ifdef CONFIG_MODULES
8048 static void trace_module_add_evals(struct module *mod)
8049 {
8050         if (!mod->num_trace_evals)
8051                 return;
8052
8053         /*
8054          * Modules with bad taint do not have events created, do
8055          * not bother with enums either.
8056          */
8057         if (trace_module_has_bad_taint(mod))
8058                 return;
8059
8060         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8061 }
8062
8063 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8064 static void trace_module_remove_evals(struct module *mod)
8065 {
8066         union trace_eval_map_item *map;
8067         union trace_eval_map_item **last = &trace_eval_maps;
8068
8069         if (!mod->num_trace_evals)
8070                 return;
8071
8072         mutex_lock(&trace_eval_mutex);
8073
8074         map = trace_eval_maps;
8075
8076         while (map) {
8077                 if (map->head.mod == mod)
8078                         break;
8079                 map = trace_eval_jmp_to_tail(map);
8080                 last = &map->tail.next;
8081                 map = map->tail.next;
8082         }
8083         if (!map)
8084                 goto out;
8085
8086         *last = trace_eval_jmp_to_tail(map)->tail.next;
8087         kfree(map);
8088  out:
8089         mutex_unlock(&trace_eval_mutex);
8090 }
8091 #else
8092 static inline void trace_module_remove_evals(struct module *mod) { }
8093 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8094
8095 static int trace_module_notify(struct notifier_block *self,
8096                                unsigned long val, void *data)
8097 {
8098         struct module *mod = data;
8099
8100         switch (val) {
8101         case MODULE_STATE_COMING:
8102                 trace_module_add_evals(mod);
8103                 break;
8104         case MODULE_STATE_GOING:
8105                 trace_module_remove_evals(mod);
8106                 break;
8107         }
8108
8109         return 0;
8110 }
8111
8112 static struct notifier_block trace_module_nb = {
8113         .notifier_call = trace_module_notify,
8114         .priority = 0,
8115 };
8116 #endif /* CONFIG_MODULES */
8117
8118 static __init int tracer_init_tracefs(void)
8119 {
8120         struct dentry *d_tracer;
8121
8122         trace_access_lock_init();
8123
8124         d_tracer = tracing_init_dentry();
8125         if (IS_ERR(d_tracer))
8126                 return 0;
8127
8128         event_trace_init();
8129
8130         init_tracer_tracefs(&global_trace, d_tracer);
8131         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8132
8133         trace_create_file("tracing_thresh", 0644, d_tracer,
8134                         &global_trace, &tracing_thresh_fops);
8135
8136         trace_create_file("README", 0444, d_tracer,
8137                         NULL, &tracing_readme_fops);
8138
8139         trace_create_file("saved_cmdlines", 0444, d_tracer,
8140                         NULL, &tracing_saved_cmdlines_fops);
8141
8142         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8143                           NULL, &tracing_saved_cmdlines_size_fops);
8144
8145         trace_create_file("saved_tgids", 0444, d_tracer,
8146                         NULL, &tracing_saved_tgids_fops);
8147
8148         trace_eval_init();
8149
8150         trace_create_eval_file(d_tracer);
8151
8152 #ifdef CONFIG_MODULES
8153         register_module_notifier(&trace_module_nb);
8154 #endif
8155
8156 #ifdef CONFIG_DYNAMIC_FTRACE
8157         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8158                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8159 #endif
8160
8161         create_trace_instances(d_tracer);
8162
8163         update_tracer_options(&global_trace);
8164
8165         return 0;
8166 }
8167
8168 static int trace_panic_handler(struct notifier_block *this,
8169                                unsigned long event, void *unused)
8170 {
8171         if (ftrace_dump_on_oops)
8172                 ftrace_dump(ftrace_dump_on_oops);
8173         return NOTIFY_OK;
8174 }
8175
8176 static struct notifier_block trace_panic_notifier = {
8177         .notifier_call  = trace_panic_handler,
8178         .next           = NULL,
8179         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8180 };
8181
8182 static int trace_die_handler(struct notifier_block *self,
8183                              unsigned long val,
8184                              void *data)
8185 {
8186         switch (val) {
8187         case DIE_OOPS:
8188                 if (ftrace_dump_on_oops)
8189                         ftrace_dump(ftrace_dump_on_oops);
8190                 break;
8191         default:
8192                 break;
8193         }
8194         return NOTIFY_OK;
8195 }
8196
8197 static struct notifier_block trace_die_notifier = {
8198         .notifier_call = trace_die_handler,
8199         .priority = 200
8200 };
8201
8202 /*
8203  * printk is set to max of 1024, we really don't need it that big.
8204  * Nothing should be printing 1000 characters anyway.
8205  */
8206 #define TRACE_MAX_PRINT         1000
8207
8208 /*
8209  * Define here KERN_TRACE so that we have one place to modify
8210  * it if we decide to change what log level the ftrace dump
8211  * should be at.
8212  */
8213 #define KERN_TRACE              KERN_EMERG
8214
8215 void
8216 trace_printk_seq(struct trace_seq *s)
8217 {
8218         /* Probably should print a warning here. */
8219         if (s->seq.len >= TRACE_MAX_PRINT)
8220                 s->seq.len = TRACE_MAX_PRINT;
8221
8222         /*
8223          * More paranoid code. Although the buffer size is set to
8224          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8225          * an extra layer of protection.
8226          */
8227         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8228                 s->seq.len = s->seq.size - 1;
8229
8230         /* should be zero ended, but we are paranoid. */
8231         s->buffer[s->seq.len] = 0;
8232
8233         printk(KERN_TRACE "%s", s->buffer);
8234
8235         trace_seq_init(s);
8236 }
8237
8238 void trace_init_global_iter(struct trace_iterator *iter)
8239 {
8240         iter->tr = &global_trace;
8241         iter->trace = iter->tr->current_trace;
8242         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8243         iter->trace_buffer = &global_trace.trace_buffer;
8244
8245         if (iter->trace && iter->trace->open)
8246                 iter->trace->open(iter);
8247
8248         /* Annotate start of buffers if we had overruns */
8249         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8250                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8251
8252         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8253         if (trace_clocks[iter->tr->clock_id].in_ns)
8254                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8255 }
8256
8257 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8258 {
8259         /* use static because iter can be a bit big for the stack */
8260         static struct trace_iterator iter;
8261         static atomic_t dump_running;
8262         struct trace_array *tr = &global_trace;
8263         unsigned int old_userobj;
8264         unsigned long flags;
8265         int cnt = 0, cpu;
8266
8267         /* Only allow one dump user at a time. */
8268         if (atomic_inc_return(&dump_running) != 1) {
8269                 atomic_dec(&dump_running);
8270                 return;
8271         }
8272
8273         /*
8274          * Always turn off tracing when we dump.
8275          * We don't need to show trace output of what happens
8276          * between multiple crashes.
8277          *
8278          * If the user does a sysrq-z, then they can re-enable
8279          * tracing with echo 1 > tracing_on.
8280          */
8281         tracing_off();
8282
8283         local_irq_save(flags);
8284
8285         /* Simulate the iterator */
8286         trace_init_global_iter(&iter);
8287
8288         for_each_tracing_cpu(cpu) {
8289                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8290         }
8291
8292         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8293
8294         /* don't look at user memory in panic mode */
8295         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8296
8297         switch (oops_dump_mode) {
8298         case DUMP_ALL:
8299                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8300                 break;
8301         case DUMP_ORIG:
8302                 iter.cpu_file = raw_smp_processor_id();
8303                 break;
8304         case DUMP_NONE:
8305                 goto out_enable;
8306         default:
8307                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8308                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8309         }
8310
8311         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8312
8313         /* Did function tracer already get disabled? */
8314         if (ftrace_is_dead()) {
8315                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8316                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8317         }
8318
8319         /*
8320          * We need to stop all tracing on all CPUS to read the
8321          * the next buffer. This is a bit expensive, but is
8322          * not done often. We fill all what we can read,
8323          * and then release the locks again.
8324          */
8325
8326         while (!trace_empty(&iter)) {
8327
8328                 if (!cnt)
8329                         printk(KERN_TRACE "---------------------------------\n");
8330
8331                 cnt++;
8332
8333                 /* reset all but tr, trace, and overruns */
8334                 memset(&iter.seq, 0,
8335                        sizeof(struct trace_iterator) -
8336                        offsetof(struct trace_iterator, seq));
8337                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8338                 iter.pos = -1;
8339
8340                 if (trace_find_next_entry_inc(&iter) != NULL) {
8341                         int ret;
8342
8343                         ret = print_trace_line(&iter);
8344                         if (ret != TRACE_TYPE_NO_CONSUME)
8345                                 trace_consume(&iter);
8346                 }
8347                 touch_nmi_watchdog();
8348
8349                 trace_printk_seq(&iter.seq);
8350         }
8351
8352         if (!cnt)
8353                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8354         else
8355                 printk(KERN_TRACE "---------------------------------\n");
8356
8357  out_enable:
8358         tr->trace_flags |= old_userobj;
8359
8360         for_each_tracing_cpu(cpu) {
8361                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8362         }
8363         atomic_dec(&dump_running);
8364         local_irq_restore(flags);
8365 }
8366 EXPORT_SYMBOL_GPL(ftrace_dump);
8367
8368 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8369 {
8370         char **argv;
8371         int argc, ret;
8372
8373         argc = 0;
8374         ret = 0;
8375         argv = argv_split(GFP_KERNEL, buf, &argc);
8376         if (!argv)
8377                 return -ENOMEM;
8378
8379         if (argc)
8380                 ret = createfn(argc, argv);
8381
8382         argv_free(argv);
8383
8384         return ret;
8385 }
8386
8387 #define WRITE_BUFSIZE  4096
8388
8389 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8390                                 size_t count, loff_t *ppos,
8391                                 int (*createfn)(int, char **))
8392 {
8393         char *kbuf, *buf, *tmp;
8394         int ret = 0;
8395         size_t done = 0;
8396         size_t size;
8397
8398         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8399         if (!kbuf)
8400                 return -ENOMEM;
8401
8402         while (done < count) {
8403                 size = count - done;
8404
8405                 if (size >= WRITE_BUFSIZE)
8406                         size = WRITE_BUFSIZE - 1;
8407
8408                 if (copy_from_user(kbuf, buffer + done, size)) {
8409                         ret = -EFAULT;
8410                         goto out;
8411                 }
8412                 kbuf[size] = '\0';
8413                 buf = kbuf;
8414                 do {
8415                         tmp = strchr(buf, '\n');
8416                         if (tmp) {
8417                                 *tmp = '\0';
8418                                 size = tmp - buf + 1;
8419                         } else {
8420                                 size = strlen(buf);
8421                                 if (done + size < count) {
8422                                         if (buf != kbuf)
8423                                                 break;
8424                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8425                                         pr_warn("Line length is too long: Should be less than %d\n",
8426                                                 WRITE_BUFSIZE - 2);
8427                                         ret = -EINVAL;
8428                                         goto out;
8429                                 }
8430                         }
8431                         done += size;
8432
8433                         /* Remove comments */
8434                         tmp = strchr(buf, '#');
8435
8436                         if (tmp)
8437                                 *tmp = '\0';
8438
8439                         ret = trace_run_command(buf, createfn);
8440                         if (ret)
8441                                 goto out;
8442                         buf += size;
8443
8444                 } while (done < count);
8445         }
8446         ret = done;
8447
8448 out:
8449         kfree(kbuf);
8450
8451         return ret;
8452 }
8453
8454 __init static int tracer_alloc_buffers(void)
8455 {
8456         int ring_buf_size;
8457         int ret = -ENOMEM;
8458
8459         /*
8460          * Make sure we don't accidently add more trace options
8461          * than we have bits for.
8462          */
8463         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8464
8465         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8466                 goto out;
8467
8468         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8469                 goto out_free_buffer_mask;
8470
8471         /* Only allocate trace_printk buffers if a trace_printk exists */
8472         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8473                 /* Must be called before global_trace.buffer is allocated */
8474                 trace_printk_init_buffers();
8475
8476         /* To save memory, keep the ring buffer size to its minimum */
8477         if (ring_buffer_expanded)
8478                 ring_buf_size = trace_buf_size;
8479         else
8480                 ring_buf_size = 1;
8481
8482         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8483         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8484
8485         raw_spin_lock_init(&global_trace.start_lock);
8486
8487         /*
8488          * The prepare callbacks allocates some memory for the ring buffer. We
8489          * don't free the buffer if the if the CPU goes down. If we were to free
8490          * the buffer, then the user would lose any trace that was in the
8491          * buffer. The memory will be removed once the "instance" is removed.
8492          */
8493         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8494                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8495                                       NULL);
8496         if (ret < 0)
8497                 goto out_free_cpumask;
8498         /* Used for event triggers */
8499         ret = -ENOMEM;
8500         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8501         if (!temp_buffer)
8502                 goto out_rm_hp_state;
8503
8504         if (trace_create_savedcmd() < 0)
8505                 goto out_free_temp_buffer;
8506
8507         /* TODO: make the number of buffers hot pluggable with CPUS */
8508         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8509                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8510                 WARN_ON(1);
8511                 goto out_free_savedcmd;
8512         }
8513
8514         if (global_trace.buffer_disabled)
8515                 tracing_off();
8516
8517         if (trace_boot_clock) {
8518                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8519                 if (ret < 0)
8520                         pr_warn("Trace clock %s not defined, going back to default\n",
8521                                 trace_boot_clock);
8522         }
8523
8524         /*
8525          * register_tracer() might reference current_trace, so it
8526          * needs to be set before we register anything. This is
8527          * just a bootstrap of current_trace anyway.
8528          */
8529         global_trace.current_trace = &nop_trace;
8530
8531         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8532
8533         ftrace_init_global_array_ops(&global_trace);
8534
8535         init_trace_flags_index(&global_trace);
8536
8537         register_tracer(&nop_trace);
8538
8539         /* Function tracing may start here (via kernel command line) */
8540         init_function_trace();
8541
8542         /* All seems OK, enable tracing */
8543         tracing_disabled = 0;
8544
8545         atomic_notifier_chain_register(&panic_notifier_list,
8546                                        &trace_panic_notifier);
8547
8548         register_die_notifier(&trace_die_notifier);
8549
8550         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8551
8552         INIT_LIST_HEAD(&global_trace.systems);
8553         INIT_LIST_HEAD(&global_trace.events);
8554         INIT_LIST_HEAD(&global_trace.hist_vars);
8555         list_add(&global_trace.list, &ftrace_trace_arrays);
8556
8557         apply_trace_boot_options();
8558
8559         register_snapshot_cmd();
8560
8561         return 0;
8562
8563 out_free_savedcmd:
8564         free_saved_cmdlines_buffer(savedcmd);
8565 out_free_temp_buffer:
8566         ring_buffer_free(temp_buffer);
8567 out_rm_hp_state:
8568         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8569 out_free_cpumask:
8570         free_cpumask_var(global_trace.tracing_cpumask);
8571 out_free_buffer_mask:
8572         free_cpumask_var(tracing_buffer_mask);
8573 out:
8574         return ret;
8575 }
8576
8577 void __init early_trace_init(void)
8578 {
8579         if (tracepoint_printk) {
8580                 tracepoint_print_iter =
8581                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8582                 if (WARN_ON(!tracepoint_print_iter))
8583                         tracepoint_printk = 0;
8584                 else
8585                         static_key_enable(&tracepoint_printk_key.key);
8586         }
8587         tracer_alloc_buffers();
8588 }
8589
8590 void __init trace_init(void)
8591 {
8592         trace_event_init();
8593 }
8594
8595 __init static int clear_boot_tracer(void)
8596 {
8597         /*
8598          * The default tracer at boot buffer is an init section.
8599          * This function is called in lateinit. If we did not
8600          * find the boot tracer, then clear it out, to prevent
8601          * later registration from accessing the buffer that is
8602          * about to be freed.
8603          */
8604         if (!default_bootup_tracer)
8605                 return 0;
8606
8607         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8608                default_bootup_tracer);
8609         default_bootup_tracer = NULL;
8610
8611         return 0;
8612 }
8613
8614 fs_initcall(tracer_init_tracefs);
8615 late_initcall_sync(clear_boot_tracer);
8616
8617 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8618 __init static int tracing_set_default_clock(void)
8619 {
8620         /* sched_clock_stable() is determined in late_initcall */
8621         if (!trace_boot_clock && !sched_clock_stable()) {
8622                 printk(KERN_WARNING
8623                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8624                        "If you want to keep using the local clock, then add:\n"
8625                        "  \"trace_clock=local\"\n"
8626                        "on the kernel command line\n");
8627                 tracing_set_clock(&global_trace, "global");
8628         }
8629
8630         return 0;
8631 }
8632 late_initcall_sync(tracing_set_default_clock);
8633 #endif