Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list) {
500                 trace_parser_put(&parser);
501                 return -ENOMEM;
502         }
503
504         pid_list->pid_max = READ_ONCE(pid_max);
505
506         /* Only truncating will shrink pid_max */
507         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508                 pid_list->pid_max = filtered_pids->pid_max;
509
510         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511         if (!pid_list->pids) {
512                 trace_parser_put(&parser);
513                 kfree(pid_list);
514                 return -ENOMEM;
515         }
516
517         if (filtered_pids) {
518                 /* copy the current bits to the new max */
519                 for_each_set_bit(pid, filtered_pids->pids,
520                                  filtered_pids->pid_max) {
521                         set_bit(pid, pid_list->pids);
522                         nr_pids++;
523                 }
524         }
525
526         while (cnt > 0) {
527
528                 pos = 0;
529
530                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
531                 if (ret < 0 || !trace_parser_loaded(&parser))
532                         break;
533
534                 read += ret;
535                 ubuf += ret;
536                 cnt -= ret;
537
538                 ret = -EINVAL;
539                 if (kstrtoul(parser.buffer, 0, &val))
540                         break;
541                 if (val >= pid_list->pid_max)
542                         break;
543
544                 pid = (pid_t)val;
545
546                 set_bit(pid, pid_list->pids);
547                 nr_pids++;
548
549                 trace_parser_clear(&parser);
550                 ret = 0;
551         }
552         trace_parser_put(&parser);
553
554         if (ret < 0) {
555                 trace_free_pid_list(pid_list);
556                 return ret;
557         }
558
559         if (!nr_pids) {
560                 /* Cleared the list of pids */
561                 trace_free_pid_list(pid_list);
562                 read = ret;
563                 pid_list = NULL;
564         }
565
566         *new_pid_list = pid_list;
567
568         return read;
569 }
570
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573         u64 ts;
574
575         /* Early boot up does not have a buffer yet */
576         if (!buf->buffer)
577                 return trace_clock_local();
578
579         ts = ring_buffer_time_stamp(buf->buffer, cpu);
580         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582         return ts;
583 }
584
585 u64 ftrace_now(int cpu)
586 {
587         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601         /*
602          * For quick access (irqsoff uses this in fast path), just
603          * return the mirror variable of the state of the ring buffer.
604          * It's a little racy, but we don't really care.
605          */
606         smp_rmb();
607         return !global_trace.buffer_disabled;
608 }
609
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer            *trace_types __read_mostly;
626
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
658 static inline void trace_access_lock(int cpu)
659 {
660         if (cpu == RING_BUFFER_ALL_CPUS) {
661                 /* gain it for accessing the whole ring buffer. */
662                 down_write(&all_cpu_access_lock);
663         } else {
664                 /* gain it for accessing a cpu ring buffer. */
665
666                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667                 down_read(&all_cpu_access_lock);
668
669                 /* Secondly block other access to this @cpu ring buffer. */
670                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671         }
672 }
673
674 static inline void trace_access_unlock(int cpu)
675 {
676         if (cpu == RING_BUFFER_ALL_CPUS) {
677                 up_write(&all_cpu_access_lock);
678         } else {
679                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680                 up_read(&all_cpu_access_lock);
681         }
682 }
683
684 static inline void trace_access_lock_init(void)
685 {
686         int cpu;
687
688         for_each_possible_cpu(cpu)
689                 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
696 static inline void trace_access_lock(int cpu)
697 {
698         (void)cpu;
699         mutex_lock(&access_lock);
700 }
701
702 static inline void trace_access_unlock(int cpu)
703 {
704         (void)cpu;
705         mutex_unlock(&access_lock);
706 }
707
708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716                                  unsigned long flags,
717                                  int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719                                       struct ring_buffer *buffer,
720                                       unsigned long flags,
721                                       int skip, int pc, struct pt_regs *regs);
722
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725                                         unsigned long flags,
726                                         int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730                                       struct ring_buffer *buffer,
731                                       unsigned long flags,
732                                       int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740                   int type, unsigned long flags, int pc)
741 {
742         struct trace_entry *ent = ring_buffer_event_data(event);
743
744         tracing_generic_entry_update(ent, flags, pc);
745         ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750                           int type,
751                           unsigned long len,
752                           unsigned long flags, int pc)
753 {
754         struct ring_buffer_event *event;
755
756         event = ring_buffer_lock_reserve(buffer, len);
757         if (event != NULL)
758                 trace_event_setup(event, type, flags, pc);
759
760         return event;
761 }
762
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765         if (tr->trace_buffer.buffer)
766                 ring_buffer_record_on(tr->trace_buffer.buffer);
767         /*
768          * This flag is looked at when buffers haven't been allocated
769          * yet, or by some tracers (like irqsoff), that just want to
770          * know if the ring buffer has been disabled, but it can handle
771          * races of where it gets disabled but we still do a record.
772          * As the check is in the fast path of the tracers, it is more
773          * important to be fast than accurate.
774          */
775         tr->buffer_disabled = 0;
776         /* Make the flag seen by readers */
777         smp_wmb();
778 }
779
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788         tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796         __this_cpu_write(trace_taskinfo_save, true);
797
798         /* If this is the temp buffer, we need to commit fully */
799         if (this_cpu_read(trace_buffered_event) == event) {
800                 /* Length is in event->array[0] */
801                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802                 /* Release the temp buffer */
803                 this_cpu_dec(trace_buffered_event_cnt);
804         } else
805                 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:    The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816         struct ring_buffer_event *event;
817         struct ring_buffer *buffer;
818         struct print_entry *entry;
819         unsigned long irq_flags;
820         int alloc;
821         int pc;
822
823         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824                 return 0;
825
826         pc = preempt_count();
827
828         if (unlikely(tracing_selftest_running || tracing_disabled))
829                 return 0;
830
831         alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833         local_save_flags(irq_flags);
834         buffer = global_trace.trace_buffer.buffer;
835         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
836                                             irq_flags, pc);
837         if (!event)
838                 return 0;
839
840         entry = ring_buffer_event_data(event);
841         entry->ip = ip;
842
843         memcpy(&entry->buf, str, size);
844
845         /* Add a newline if necessary */
846         if (entry->buf[size - 1] != '\n') {
847                 entry->buf[size] = '\n';
848                 entry->buf[size + 1] = '\0';
849         } else
850                 entry->buf[size] = '\0';
851
852         __buffer_unlock_commit(buffer, event);
853         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855         return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:    The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866         struct ring_buffer_event *event;
867         struct ring_buffer *buffer;
868         struct bputs_entry *entry;
869         unsigned long irq_flags;
870         int size = sizeof(struct bputs_entry);
871         int pc;
872
873         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874                 return 0;
875
876         pc = preempt_count();
877
878         if (unlikely(tracing_selftest_running || tracing_disabled))
879                 return 0;
880
881         local_save_flags(irq_flags);
882         buffer = global_trace.trace_buffer.buffer;
883         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884                                             irq_flags, pc);
885         if (!event)
886                 return 0;
887
888         entry = ring_buffer_event_data(event);
889         entry->ip                       = ip;
890         entry->str                      = str;
891
892         __buffer_unlock_commit(buffer, event);
893         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895         return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
901 {
902         struct tracer *tracer = tr->current_trace;
903         unsigned long flags;
904
905         if (in_nmi()) {
906                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907                 internal_trace_puts("*** snapshot is being ignored        ***\n");
908                 return;
909         }
910
911         if (!tr->allocated_snapshot) {
912                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913                 internal_trace_puts("*** stopping trace here!   ***\n");
914                 tracing_off();
915                 return;
916         }
917
918         /* Note, snapshot can not be used when the tracer uses it */
919         if (tracer->use_max_tr) {
920                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922                 return;
923         }
924
925         local_irq_save(flags);
926         update_max_tr(tr, current, smp_processor_id(), cond_data);
927         local_irq_restore(flags);
928 }
929
930 void tracing_snapshot_instance(struct trace_array *tr)
931 {
932         tracing_snapshot_instance_cond(tr, NULL);
933 }
934
935 /**
936  * tracing_snapshot - take a snapshot of the current buffer.
937  *
938  * This causes a swap between the snapshot buffer and the current live
939  * tracing buffer. You can use this to take snapshots of the live
940  * trace when some condition is triggered, but continue to trace.
941  *
942  * Note, make sure to allocate the snapshot with either
943  * a tracing_snapshot_alloc(), or by doing it manually
944  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
945  *
946  * If the snapshot buffer is not allocated, it will stop tracing.
947  * Basically making a permanent snapshot.
948  */
949 void tracing_snapshot(void)
950 {
951         struct trace_array *tr = &global_trace;
952
953         tracing_snapshot_instance(tr);
954 }
955 EXPORT_SYMBOL_GPL(tracing_snapshot);
956
957 /**
958  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
959  * @tr:         The tracing instance to snapshot
960  * @cond_data:  The data to be tested conditionally, and possibly saved
961  *
962  * This is the same as tracing_snapshot() except that the snapshot is
963  * conditional - the snapshot will only happen if the
964  * cond_snapshot.update() implementation receiving the cond_data
965  * returns true, which means that the trace array's cond_snapshot
966  * update() operation used the cond_data to determine whether the
967  * snapshot should be taken, and if it was, presumably saved it along
968  * with the snapshot.
969  */
970 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
971 {
972         tracing_snapshot_instance_cond(tr, cond_data);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
975
976 /**
977  * tracing_snapshot_cond_data - get the user data associated with a snapshot
978  * @tr:         The tracing instance
979  *
980  * When the user enables a conditional snapshot using
981  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
982  * with the snapshot.  This accessor is used to retrieve it.
983  *
984  * Should not be called from cond_snapshot.update(), since it takes
985  * the tr->max_lock lock, which the code calling
986  * cond_snapshot.update() has already done.
987  *
988  * Returns the cond_data associated with the trace array's snapshot.
989  */
990 void *tracing_cond_snapshot_data(struct trace_array *tr)
991 {
992         void *cond_data = NULL;
993
994         arch_spin_lock(&tr->max_lock);
995
996         if (tr->cond_snapshot)
997                 cond_data = tr->cond_snapshot->cond_data;
998
999         arch_spin_unlock(&tr->max_lock);
1000
1001         return cond_data;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1004
1005 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1006                                         struct trace_buffer *size_buf, int cpu_id);
1007 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1008
1009 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1010 {
1011         int ret;
1012
1013         if (!tr->allocated_snapshot) {
1014
1015                 /* allocate spare buffer */
1016                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1017                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1018                 if (ret < 0)
1019                         return ret;
1020
1021                 tr->allocated_snapshot = true;
1022         }
1023
1024         return 0;
1025 }
1026
1027 static void free_snapshot(struct trace_array *tr)
1028 {
1029         /*
1030          * We don't free the ring buffer. instead, resize it because
1031          * The max_tr ring buffer has some state (e.g. ring->clock) and
1032          * we want preserve it.
1033          */
1034         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1035         set_buffer_entries(&tr->max_buffer, 1);
1036         tracing_reset_online_cpus(&tr->max_buffer);
1037         tr->allocated_snapshot = false;
1038 }
1039
1040 /**
1041  * tracing_alloc_snapshot - allocate snapshot buffer.
1042  *
1043  * This only allocates the snapshot buffer if it isn't already
1044  * allocated - it doesn't also take a snapshot.
1045  *
1046  * This is meant to be used in cases where the snapshot buffer needs
1047  * to be set up for events that can't sleep but need to be able to
1048  * trigger a snapshot.
1049  */
1050 int tracing_alloc_snapshot(void)
1051 {
1052         struct trace_array *tr = &global_trace;
1053         int ret;
1054
1055         ret = tracing_alloc_snapshot_instance(tr);
1056         WARN_ON(ret < 0);
1057
1058         return ret;
1059 }
1060 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1061
1062 /**
1063  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1064  *
1065  * This is similar to tracing_snapshot(), but it will allocate the
1066  * snapshot buffer if it isn't already allocated. Use this only
1067  * where it is safe to sleep, as the allocation may sleep.
1068  *
1069  * This causes a swap between the snapshot buffer and the current live
1070  * tracing buffer. You can use this to take snapshots of the live
1071  * trace when some condition is triggered, but continue to trace.
1072  */
1073 void tracing_snapshot_alloc(void)
1074 {
1075         int ret;
1076
1077         ret = tracing_alloc_snapshot();
1078         if (ret < 0)
1079                 return;
1080
1081         tracing_snapshot();
1082 }
1083 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1084
1085 /**
1086  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1087  * @tr:         The tracing instance
1088  * @cond_data:  User data to associate with the snapshot
1089  * @update:     Implementation of the cond_snapshot update function
1090  *
1091  * Check whether the conditional snapshot for the given instance has
1092  * already been enabled, or if the current tracer is already using a
1093  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1094  * save the cond_data and update function inside.
1095  *
1096  * Returns 0 if successful, error otherwise.
1097  */
1098 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1099                                  cond_update_fn_t update)
1100 {
1101         struct cond_snapshot *cond_snapshot;
1102         int ret = 0;
1103
1104         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1105         if (!cond_snapshot)
1106                 return -ENOMEM;
1107
1108         cond_snapshot->cond_data = cond_data;
1109         cond_snapshot->update = update;
1110
1111         mutex_lock(&trace_types_lock);
1112
1113         ret = tracing_alloc_snapshot_instance(tr);
1114         if (ret)
1115                 goto fail_unlock;
1116
1117         if (tr->current_trace->use_max_tr) {
1118                 ret = -EBUSY;
1119                 goto fail_unlock;
1120         }
1121
1122         /*
1123          * The cond_snapshot can only change to NULL without the
1124          * trace_types_lock. We don't care if we race with it going
1125          * to NULL, but we want to make sure that it's not set to
1126          * something other than NULL when we get here, which we can
1127          * do safely with only holding the trace_types_lock and not
1128          * having to take the max_lock.
1129          */
1130         if (tr->cond_snapshot) {
1131                 ret = -EBUSY;
1132                 goto fail_unlock;
1133         }
1134
1135         arch_spin_lock(&tr->max_lock);
1136         tr->cond_snapshot = cond_snapshot;
1137         arch_spin_unlock(&tr->max_lock);
1138
1139         mutex_unlock(&trace_types_lock);
1140
1141         return ret;
1142
1143  fail_unlock:
1144         mutex_unlock(&trace_types_lock);
1145         kfree(cond_snapshot);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1149
1150 /**
1151  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1152  * @tr:         The tracing instance
1153  *
1154  * Check whether the conditional snapshot for the given instance is
1155  * enabled; if so, free the cond_snapshot associated with it,
1156  * otherwise return -EINVAL.
1157  *
1158  * Returns 0 if successful, error otherwise.
1159  */
1160 int tracing_snapshot_cond_disable(struct trace_array *tr)
1161 {
1162         int ret = 0;
1163
1164         arch_spin_lock(&tr->max_lock);
1165
1166         if (!tr->cond_snapshot)
1167                 ret = -EINVAL;
1168         else {
1169                 kfree(tr->cond_snapshot);
1170                 tr->cond_snapshot = NULL;
1171         }
1172
1173         arch_spin_unlock(&tr->max_lock);
1174
1175         return ret;
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1178 #else
1179 void tracing_snapshot(void)
1180 {
1181         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot);
1184 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1185 {
1186         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1187 }
1188 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1189 int tracing_alloc_snapshot(void)
1190 {
1191         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1192         return -ENODEV;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1195 void tracing_snapshot_alloc(void)
1196 {
1197         /* Give warning */
1198         tracing_snapshot();
1199 }
1200 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1201 void *tracing_cond_snapshot_data(struct trace_array *tr)
1202 {
1203         return NULL;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1207 {
1208         return -ENODEV;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         return false;
1214 }
1215 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1216 #endif /* CONFIG_TRACER_SNAPSHOT */
1217
1218 void tracer_tracing_off(struct trace_array *tr)
1219 {
1220         if (tr->trace_buffer.buffer)
1221                 ring_buffer_record_off(tr->trace_buffer.buffer);
1222         /*
1223          * This flag is looked at when buffers haven't been allocated
1224          * yet, or by some tracers (like irqsoff), that just want to
1225          * know if the ring buffer has been disabled, but it can handle
1226          * races of where it gets disabled but we still do a record.
1227          * As the check is in the fast path of the tracers, it is more
1228          * important to be fast than accurate.
1229          */
1230         tr->buffer_disabled = 1;
1231         /* Make the flag seen by readers */
1232         smp_wmb();
1233 }
1234
1235 /**
1236  * tracing_off - turn off tracing buffers
1237  *
1238  * This function stops the tracing buffers from recording data.
1239  * It does not disable any overhead the tracers themselves may
1240  * be causing. This function simply causes all recording to
1241  * the ring buffers to fail.
1242  */
1243 void tracing_off(void)
1244 {
1245         tracer_tracing_off(&global_trace);
1246 }
1247 EXPORT_SYMBOL_GPL(tracing_off);
1248
1249 void disable_trace_on_warning(void)
1250 {
1251         if (__disable_trace_on_warning)
1252                 tracing_off();
1253 }
1254
1255 /**
1256  * tracer_tracing_is_on - show real state of ring buffer enabled
1257  * @tr : the trace array to know if ring buffer is enabled
1258  *
1259  * Shows real state of the ring buffer if it is enabled or not.
1260  */
1261 bool tracer_tracing_is_on(struct trace_array *tr)
1262 {
1263         if (tr->trace_buffer.buffer)
1264                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1265         return !tr->buffer_disabled;
1266 }
1267
1268 /**
1269  * tracing_is_on - show state of ring buffers enabled
1270  */
1271 int tracing_is_on(void)
1272 {
1273         return tracer_tracing_is_on(&global_trace);
1274 }
1275 EXPORT_SYMBOL_GPL(tracing_is_on);
1276
1277 static int __init set_buf_size(char *str)
1278 {
1279         unsigned long buf_size;
1280
1281         if (!str)
1282                 return 0;
1283         buf_size = memparse(str, &str);
1284         /* nr_entries can not be zero */
1285         if (buf_size == 0)
1286                 return 0;
1287         trace_buf_size = buf_size;
1288         return 1;
1289 }
1290 __setup("trace_buf_size=", set_buf_size);
1291
1292 static int __init set_tracing_thresh(char *str)
1293 {
1294         unsigned long threshold;
1295         int ret;
1296
1297         if (!str)
1298                 return 0;
1299         ret = kstrtoul(str, 0, &threshold);
1300         if (ret < 0)
1301                 return 0;
1302         tracing_thresh = threshold * 1000;
1303         return 1;
1304 }
1305 __setup("tracing_thresh=", set_tracing_thresh);
1306
1307 unsigned long nsecs_to_usecs(unsigned long nsecs)
1308 {
1309         return nsecs / 1000;
1310 }
1311
1312 /*
1313  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1314  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1315  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1316  * of strings in the order that the evals (enum) were defined.
1317  */
1318 #undef C
1319 #define C(a, b) b
1320
1321 /* These must match the bit postions in trace_iterator_flags */
1322 static const char *trace_options[] = {
1323         TRACE_FLAGS
1324         NULL
1325 };
1326
1327 static struct {
1328         u64 (*func)(void);
1329         const char *name;
1330         int in_ns;              /* is this clock in nanoseconds? */
1331 } trace_clocks[] = {
1332         { trace_clock_local,            "local",        1 },
1333         { trace_clock_global,           "global",       1 },
1334         { trace_clock_counter,          "counter",      0 },
1335         { trace_clock_jiffies,          "uptime",       0 },
1336         { trace_clock,                  "perf",         1 },
1337         { ktime_get_mono_fast_ns,       "mono",         1 },
1338         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1339         { ktime_get_boot_fast_ns,       "boot",         1 },
1340         ARCH_TRACE_CLOCKS
1341 };
1342
1343 bool trace_clock_in_ns(struct trace_array *tr)
1344 {
1345         if (trace_clocks[tr->clock_id].in_ns)
1346                 return true;
1347
1348         return false;
1349 }
1350
1351 /*
1352  * trace_parser_get_init - gets the buffer for trace parser
1353  */
1354 int trace_parser_get_init(struct trace_parser *parser, int size)
1355 {
1356         memset(parser, 0, sizeof(*parser));
1357
1358         parser->buffer = kmalloc(size, GFP_KERNEL);
1359         if (!parser->buffer)
1360                 return 1;
1361
1362         parser->size = size;
1363         return 0;
1364 }
1365
1366 /*
1367  * trace_parser_put - frees the buffer for trace parser
1368  */
1369 void trace_parser_put(struct trace_parser *parser)
1370 {
1371         kfree(parser->buffer);
1372         parser->buffer = NULL;
1373 }
1374
1375 /*
1376  * trace_get_user - reads the user input string separated by  space
1377  * (matched by isspace(ch))
1378  *
1379  * For each string found the 'struct trace_parser' is updated,
1380  * and the function returns.
1381  *
1382  * Returns number of bytes read.
1383  *
1384  * See kernel/trace/trace.h for 'struct trace_parser' details.
1385  */
1386 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1387         size_t cnt, loff_t *ppos)
1388 {
1389         char ch;
1390         size_t read = 0;
1391         ssize_t ret;
1392
1393         if (!*ppos)
1394                 trace_parser_clear(parser);
1395
1396         ret = get_user(ch, ubuf++);
1397         if (ret)
1398                 goto out;
1399
1400         read++;
1401         cnt--;
1402
1403         /*
1404          * The parser is not finished with the last write,
1405          * continue reading the user input without skipping spaces.
1406          */
1407         if (!parser->cont) {
1408                 /* skip white space */
1409                 while (cnt && isspace(ch)) {
1410                         ret = get_user(ch, ubuf++);
1411                         if (ret)
1412                                 goto out;
1413                         read++;
1414                         cnt--;
1415                 }
1416
1417                 parser->idx = 0;
1418
1419                 /* only spaces were written */
1420                 if (isspace(ch) || !ch) {
1421                         *ppos += read;
1422                         ret = read;
1423                         goto out;
1424                 }
1425         }
1426
1427         /* read the non-space input */
1428         while (cnt && !isspace(ch) && ch) {
1429                 if (parser->idx < parser->size - 1)
1430                         parser->buffer[parser->idx++] = ch;
1431                 else {
1432                         ret = -EINVAL;
1433                         goto out;
1434                 }
1435                 ret = get_user(ch, ubuf++);
1436                 if (ret)
1437                         goto out;
1438                 read++;
1439                 cnt--;
1440         }
1441
1442         /* We either got finished input or we have to wait for another call. */
1443         if (isspace(ch) || !ch) {
1444                 parser->buffer[parser->idx] = 0;
1445                 parser->cont = false;
1446         } else if (parser->idx < parser->size - 1) {
1447                 parser->cont = true;
1448                 parser->buffer[parser->idx++] = ch;
1449                 /* Make sure the parsed string always terminates with '\0'. */
1450                 parser->buffer[parser->idx] = 0;
1451         } else {
1452                 ret = -EINVAL;
1453                 goto out;
1454         }
1455
1456         *ppos += read;
1457         ret = read;
1458
1459 out:
1460         return ret;
1461 }
1462
1463 /* TODO add a seq_buf_to_buffer() */
1464 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1465 {
1466         int len;
1467
1468         if (trace_seq_used(s) <= s->seq.readpos)
1469                 return -EBUSY;
1470
1471         len = trace_seq_used(s) - s->seq.readpos;
1472         if (cnt > len)
1473                 cnt = len;
1474         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1475
1476         s->seq.readpos += cnt;
1477         return cnt;
1478 }
1479
1480 unsigned long __read_mostly     tracing_thresh;
1481
1482 #ifdef CONFIG_TRACER_MAX_TRACE
1483 /*
1484  * Copy the new maximum trace into the separate maximum-trace
1485  * structure. (this way the maximum trace is permanently saved,
1486  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1487  */
1488 static void
1489 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1490 {
1491         struct trace_buffer *trace_buf = &tr->trace_buffer;
1492         struct trace_buffer *max_buf = &tr->max_buffer;
1493         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1494         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1495
1496         max_buf->cpu = cpu;
1497         max_buf->time_start = data->preempt_timestamp;
1498
1499         max_data->saved_latency = tr->max_latency;
1500         max_data->critical_start = data->critical_start;
1501         max_data->critical_end = data->critical_end;
1502
1503         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1504         max_data->pid = tsk->pid;
1505         /*
1506          * If tsk == current, then use current_uid(), as that does not use
1507          * RCU. The irq tracer can be called out of RCU scope.
1508          */
1509         if (tsk == current)
1510                 max_data->uid = current_uid();
1511         else
1512                 max_data->uid = task_uid(tsk);
1513
1514         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1515         max_data->policy = tsk->policy;
1516         max_data->rt_priority = tsk->rt_priority;
1517
1518         /* record this tasks comm */
1519         tracing_record_cmdline(tsk);
1520 }
1521
1522 /**
1523  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1524  * @tr: tracer
1525  * @tsk: the task with the latency
1526  * @cpu: The cpu that initiated the trace.
1527  * @cond_data: User data associated with a conditional snapshot
1528  *
1529  * Flip the buffers between the @tr and the max_tr and record information
1530  * about which task was the cause of this latency.
1531  */
1532 void
1533 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1534               void *cond_data)
1535 {
1536         if (tr->stop_count)
1537                 return;
1538
1539         WARN_ON_ONCE(!irqs_disabled());
1540
1541         if (!tr->allocated_snapshot) {
1542                 /* Only the nop tracer should hit this when disabling */
1543                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1544                 return;
1545         }
1546
1547         arch_spin_lock(&tr->max_lock);
1548
1549         /* Inherit the recordable setting from trace_buffer */
1550         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1551                 ring_buffer_record_on(tr->max_buffer.buffer);
1552         else
1553                 ring_buffer_record_off(tr->max_buffer.buffer);
1554
1555 #ifdef CONFIG_TRACER_SNAPSHOT
1556         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1557                 goto out_unlock;
1558 #endif
1559         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1560
1561         __update_max_tr(tr, tsk, cpu);
1562
1563  out_unlock:
1564         arch_spin_unlock(&tr->max_lock);
1565 }
1566
1567 /**
1568  * update_max_tr_single - only copy one trace over, and reset the rest
1569  * @tr - tracer
1570  * @tsk - task with the latency
1571  * @cpu - the cpu of the buffer to copy.
1572  *
1573  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1574  */
1575 void
1576 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1577 {
1578         int ret;
1579
1580         if (tr->stop_count)
1581                 return;
1582
1583         WARN_ON_ONCE(!irqs_disabled());
1584         if (!tr->allocated_snapshot) {
1585                 /* Only the nop tracer should hit this when disabling */
1586                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1587                 return;
1588         }
1589
1590         arch_spin_lock(&tr->max_lock);
1591
1592         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1593
1594         if (ret == -EBUSY) {
1595                 /*
1596                  * We failed to swap the buffer due to a commit taking
1597                  * place on this CPU. We fail to record, but we reset
1598                  * the max trace buffer (no one writes directly to it)
1599                  * and flag that it failed.
1600                  */
1601                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1602                         "Failed to swap buffers due to commit in progress\n");
1603         }
1604
1605         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1606
1607         __update_max_tr(tr, tsk, cpu);
1608         arch_spin_unlock(&tr->max_lock);
1609 }
1610 #endif /* CONFIG_TRACER_MAX_TRACE */
1611
1612 static int wait_on_pipe(struct trace_iterator *iter, int full)
1613 {
1614         /* Iterators are static, they should be filled or empty */
1615         if (trace_buffer_iter(iter, iter->cpu_file))
1616                 return 0;
1617
1618         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1619                                 full);
1620 }
1621
1622 #ifdef CONFIG_FTRACE_STARTUP_TEST
1623 static bool selftests_can_run;
1624
1625 struct trace_selftests {
1626         struct list_head                list;
1627         struct tracer                   *type;
1628 };
1629
1630 static LIST_HEAD(postponed_selftests);
1631
1632 static int save_selftest(struct tracer *type)
1633 {
1634         struct trace_selftests *selftest;
1635
1636         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1637         if (!selftest)
1638                 return -ENOMEM;
1639
1640         selftest->type = type;
1641         list_add(&selftest->list, &postponed_selftests);
1642         return 0;
1643 }
1644
1645 static int run_tracer_selftest(struct tracer *type)
1646 {
1647         struct trace_array *tr = &global_trace;
1648         struct tracer *saved_tracer = tr->current_trace;
1649         int ret;
1650
1651         if (!type->selftest || tracing_selftest_disabled)
1652                 return 0;
1653
1654         /*
1655          * If a tracer registers early in boot up (before scheduling is
1656          * initialized and such), then do not run its selftests yet.
1657          * Instead, run it a little later in the boot process.
1658          */
1659         if (!selftests_can_run)
1660                 return save_selftest(type);
1661
1662         /*
1663          * Run a selftest on this tracer.
1664          * Here we reset the trace buffer, and set the current
1665          * tracer to be this tracer. The tracer can then run some
1666          * internal tracing to verify that everything is in order.
1667          * If we fail, we do not register this tracer.
1668          */
1669         tracing_reset_online_cpus(&tr->trace_buffer);
1670
1671         tr->current_trace = type;
1672
1673 #ifdef CONFIG_TRACER_MAX_TRACE
1674         if (type->use_max_tr) {
1675                 /* If we expanded the buffers, make sure the max is expanded too */
1676                 if (ring_buffer_expanded)
1677                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1678                                            RING_BUFFER_ALL_CPUS);
1679                 tr->allocated_snapshot = true;
1680         }
1681 #endif
1682
1683         /* the test is responsible for initializing and enabling */
1684         pr_info("Testing tracer %s: ", type->name);
1685         ret = type->selftest(type, tr);
1686         /* the test is responsible for resetting too */
1687         tr->current_trace = saved_tracer;
1688         if (ret) {
1689                 printk(KERN_CONT "FAILED!\n");
1690                 /* Add the warning after printing 'FAILED' */
1691                 WARN_ON(1);
1692                 return -1;
1693         }
1694         /* Only reset on passing, to avoid touching corrupted buffers */
1695         tracing_reset_online_cpus(&tr->trace_buffer);
1696
1697 #ifdef CONFIG_TRACER_MAX_TRACE
1698         if (type->use_max_tr) {
1699                 tr->allocated_snapshot = false;
1700
1701                 /* Shrink the max buffer again */
1702                 if (ring_buffer_expanded)
1703                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1704                                            RING_BUFFER_ALL_CPUS);
1705         }
1706 #endif
1707
1708         printk(KERN_CONT "PASSED\n");
1709         return 0;
1710 }
1711
1712 static __init int init_trace_selftests(void)
1713 {
1714         struct trace_selftests *p, *n;
1715         struct tracer *t, **last;
1716         int ret;
1717
1718         selftests_can_run = true;
1719
1720         mutex_lock(&trace_types_lock);
1721
1722         if (list_empty(&postponed_selftests))
1723                 goto out;
1724
1725         pr_info("Running postponed tracer tests:\n");
1726
1727         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1728                 ret = run_tracer_selftest(p->type);
1729                 /* If the test fails, then warn and remove from available_tracers */
1730                 if (ret < 0) {
1731                         WARN(1, "tracer: %s failed selftest, disabling\n",
1732                              p->type->name);
1733                         last = &trace_types;
1734                         for (t = trace_types; t; t = t->next) {
1735                                 if (t == p->type) {
1736                                         *last = t->next;
1737                                         break;
1738                                 }
1739                                 last = &t->next;
1740                         }
1741                 }
1742                 list_del(&p->list);
1743                 kfree(p);
1744         }
1745
1746  out:
1747         mutex_unlock(&trace_types_lock);
1748
1749         return 0;
1750 }
1751 core_initcall(init_trace_selftests);
1752 #else
1753 static inline int run_tracer_selftest(struct tracer *type)
1754 {
1755         return 0;
1756 }
1757 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1758
1759 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1760
1761 static void __init apply_trace_boot_options(void);
1762
1763 /**
1764  * register_tracer - register a tracer with the ftrace system.
1765  * @type - the plugin for the tracer
1766  *
1767  * Register a new plugin tracer.
1768  */
1769 int __init register_tracer(struct tracer *type)
1770 {
1771         struct tracer *t;
1772         int ret = 0;
1773
1774         if (!type->name) {
1775                 pr_info("Tracer must have a name\n");
1776                 return -1;
1777         }
1778
1779         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1780                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1781                 return -1;
1782         }
1783
1784         mutex_lock(&trace_types_lock);
1785
1786         tracing_selftest_running = true;
1787
1788         for (t = trace_types; t; t = t->next) {
1789                 if (strcmp(type->name, t->name) == 0) {
1790                         /* already found */
1791                         pr_info("Tracer %s already registered\n",
1792                                 type->name);
1793                         ret = -1;
1794                         goto out;
1795                 }
1796         }
1797
1798         if (!type->set_flag)
1799                 type->set_flag = &dummy_set_flag;
1800         if (!type->flags) {
1801                 /*allocate a dummy tracer_flags*/
1802                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1803                 if (!type->flags) {
1804                         ret = -ENOMEM;
1805                         goto out;
1806                 }
1807                 type->flags->val = 0;
1808                 type->flags->opts = dummy_tracer_opt;
1809         } else
1810                 if (!type->flags->opts)
1811                         type->flags->opts = dummy_tracer_opt;
1812
1813         /* store the tracer for __set_tracer_option */
1814         type->flags->trace = type;
1815
1816         ret = run_tracer_selftest(type);
1817         if (ret < 0)
1818                 goto out;
1819
1820         type->next = trace_types;
1821         trace_types = type;
1822         add_tracer_options(&global_trace, type);
1823
1824  out:
1825         tracing_selftest_running = false;
1826         mutex_unlock(&trace_types_lock);
1827
1828         if (ret || !default_bootup_tracer)
1829                 goto out_unlock;
1830
1831         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1832                 goto out_unlock;
1833
1834         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1835         /* Do we want this tracer to start on bootup? */
1836         tracing_set_tracer(&global_trace, type->name);
1837         default_bootup_tracer = NULL;
1838
1839         apply_trace_boot_options();
1840
1841         /* disable other selftests, since this will break it. */
1842         tracing_selftest_disabled = true;
1843 #ifdef CONFIG_FTRACE_STARTUP_TEST
1844         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1845                type->name);
1846 #endif
1847
1848  out_unlock:
1849         return ret;
1850 }
1851
1852 void tracing_reset(struct trace_buffer *buf, int cpu)
1853 {
1854         struct ring_buffer *buffer = buf->buffer;
1855
1856         if (!buffer)
1857                 return;
1858
1859         ring_buffer_record_disable(buffer);
1860
1861         /* Make sure all commits have finished */
1862         synchronize_rcu();
1863         ring_buffer_reset_cpu(buffer, cpu);
1864
1865         ring_buffer_record_enable(buffer);
1866 }
1867
1868 void tracing_reset_online_cpus(struct trace_buffer *buf)
1869 {
1870         struct ring_buffer *buffer = buf->buffer;
1871         int cpu;
1872
1873         if (!buffer)
1874                 return;
1875
1876         ring_buffer_record_disable(buffer);
1877
1878         /* Make sure all commits have finished */
1879         synchronize_rcu();
1880
1881         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1882
1883         for_each_online_cpu(cpu)
1884                 ring_buffer_reset_cpu(buffer, cpu);
1885
1886         ring_buffer_record_enable(buffer);
1887 }
1888
1889 /* Must have trace_types_lock held */
1890 void tracing_reset_all_online_cpus(void)
1891 {
1892         struct trace_array *tr;
1893
1894         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1895                 if (!tr->clear_trace)
1896                         continue;
1897                 tr->clear_trace = false;
1898                 tracing_reset_online_cpus(&tr->trace_buffer);
1899 #ifdef CONFIG_TRACER_MAX_TRACE
1900                 tracing_reset_online_cpus(&tr->max_buffer);
1901 #endif
1902         }
1903 }
1904
1905 static int *tgid_map;
1906
1907 #define SAVED_CMDLINES_DEFAULT 128
1908 #define NO_CMDLINE_MAP UINT_MAX
1909 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1910 struct saved_cmdlines_buffer {
1911         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1912         unsigned *map_cmdline_to_pid;
1913         unsigned cmdline_num;
1914         int cmdline_idx;
1915         char *saved_cmdlines;
1916 };
1917 static struct saved_cmdlines_buffer *savedcmd;
1918
1919 /* temporary disable recording */
1920 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1921
1922 static inline char *get_saved_cmdlines(int idx)
1923 {
1924         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1925 }
1926
1927 static inline void set_cmdline(int idx, const char *cmdline)
1928 {
1929         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1930 }
1931
1932 static int allocate_cmdlines_buffer(unsigned int val,
1933                                     struct saved_cmdlines_buffer *s)
1934 {
1935         s->map_cmdline_to_pid = kmalloc_array(val,
1936                                               sizeof(*s->map_cmdline_to_pid),
1937                                               GFP_KERNEL);
1938         if (!s->map_cmdline_to_pid)
1939                 return -ENOMEM;
1940
1941         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1942         if (!s->saved_cmdlines) {
1943                 kfree(s->map_cmdline_to_pid);
1944                 return -ENOMEM;
1945         }
1946
1947         s->cmdline_idx = 0;
1948         s->cmdline_num = val;
1949         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1950                sizeof(s->map_pid_to_cmdline));
1951         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1952                val * sizeof(*s->map_cmdline_to_pid));
1953
1954         return 0;
1955 }
1956
1957 static int trace_create_savedcmd(void)
1958 {
1959         int ret;
1960
1961         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1962         if (!savedcmd)
1963                 return -ENOMEM;
1964
1965         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1966         if (ret < 0) {
1967                 kfree(savedcmd);
1968                 savedcmd = NULL;
1969                 return -ENOMEM;
1970         }
1971
1972         return 0;
1973 }
1974
1975 int is_tracing_stopped(void)
1976 {
1977         return global_trace.stop_count;
1978 }
1979
1980 /**
1981  * tracing_start - quick start of the tracer
1982  *
1983  * If tracing is enabled but was stopped by tracing_stop,
1984  * this will start the tracer back up.
1985  */
1986 void tracing_start(void)
1987 {
1988         struct ring_buffer *buffer;
1989         unsigned long flags;
1990
1991         if (tracing_disabled)
1992                 return;
1993
1994         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1995         if (--global_trace.stop_count) {
1996                 if (global_trace.stop_count < 0) {
1997                         /* Someone screwed up their debugging */
1998                         WARN_ON_ONCE(1);
1999                         global_trace.stop_count = 0;
2000                 }
2001                 goto out;
2002         }
2003
2004         /* Prevent the buffers from switching */
2005         arch_spin_lock(&global_trace.max_lock);
2006
2007         buffer = global_trace.trace_buffer.buffer;
2008         if (buffer)
2009                 ring_buffer_record_enable(buffer);
2010
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012         buffer = global_trace.max_buffer.buffer;
2013         if (buffer)
2014                 ring_buffer_record_enable(buffer);
2015 #endif
2016
2017         arch_spin_unlock(&global_trace.max_lock);
2018
2019  out:
2020         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2021 }
2022
2023 static void tracing_start_tr(struct trace_array *tr)
2024 {
2025         struct ring_buffer *buffer;
2026         unsigned long flags;
2027
2028         if (tracing_disabled)
2029                 return;
2030
2031         /* If global, we need to also start the max tracer */
2032         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2033                 return tracing_start();
2034
2035         raw_spin_lock_irqsave(&tr->start_lock, flags);
2036
2037         if (--tr->stop_count) {
2038                 if (tr->stop_count < 0) {
2039                         /* Someone screwed up their debugging */
2040                         WARN_ON_ONCE(1);
2041                         tr->stop_count = 0;
2042                 }
2043                 goto out;
2044         }
2045
2046         buffer = tr->trace_buffer.buffer;
2047         if (buffer)
2048                 ring_buffer_record_enable(buffer);
2049
2050  out:
2051         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2052 }
2053
2054 /**
2055  * tracing_stop - quick stop of the tracer
2056  *
2057  * Light weight way to stop tracing. Use in conjunction with
2058  * tracing_start.
2059  */
2060 void tracing_stop(void)
2061 {
2062         struct ring_buffer *buffer;
2063         unsigned long flags;
2064
2065         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2066         if (global_trace.stop_count++)
2067                 goto out;
2068
2069         /* Prevent the buffers from switching */
2070         arch_spin_lock(&global_trace.max_lock);
2071
2072         buffer = global_trace.trace_buffer.buffer;
2073         if (buffer)
2074                 ring_buffer_record_disable(buffer);
2075
2076 #ifdef CONFIG_TRACER_MAX_TRACE
2077         buffer = global_trace.max_buffer.buffer;
2078         if (buffer)
2079                 ring_buffer_record_disable(buffer);
2080 #endif
2081
2082         arch_spin_unlock(&global_trace.max_lock);
2083
2084  out:
2085         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2086 }
2087
2088 static void tracing_stop_tr(struct trace_array *tr)
2089 {
2090         struct ring_buffer *buffer;
2091         unsigned long flags;
2092
2093         /* If global, we need to also stop the max tracer */
2094         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2095                 return tracing_stop();
2096
2097         raw_spin_lock_irqsave(&tr->start_lock, flags);
2098         if (tr->stop_count++)
2099                 goto out;
2100
2101         buffer = tr->trace_buffer.buffer;
2102         if (buffer)
2103                 ring_buffer_record_disable(buffer);
2104
2105  out:
2106         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2107 }
2108
2109 static int trace_save_cmdline(struct task_struct *tsk)
2110 {
2111         unsigned pid, idx;
2112
2113         /* treat recording of idle task as a success */
2114         if (!tsk->pid)
2115                 return 1;
2116
2117         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2118                 return 0;
2119
2120         /*
2121          * It's not the end of the world if we don't get
2122          * the lock, but we also don't want to spin
2123          * nor do we want to disable interrupts,
2124          * so if we miss here, then better luck next time.
2125          */
2126         if (!arch_spin_trylock(&trace_cmdline_lock))
2127                 return 0;
2128
2129         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2130         if (idx == NO_CMDLINE_MAP) {
2131                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2132
2133                 /*
2134                  * Check whether the cmdline buffer at idx has a pid
2135                  * mapped. We are going to overwrite that entry so we
2136                  * need to clear the map_pid_to_cmdline. Otherwise we
2137                  * would read the new comm for the old pid.
2138                  */
2139                 pid = savedcmd->map_cmdline_to_pid[idx];
2140                 if (pid != NO_CMDLINE_MAP)
2141                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2142
2143                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2144                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2145
2146                 savedcmd->cmdline_idx = idx;
2147         }
2148
2149         set_cmdline(idx, tsk->comm);
2150
2151         arch_spin_unlock(&trace_cmdline_lock);
2152
2153         return 1;
2154 }
2155
2156 static void __trace_find_cmdline(int pid, char comm[])
2157 {
2158         unsigned map;
2159
2160         if (!pid) {
2161                 strcpy(comm, "<idle>");
2162                 return;
2163         }
2164
2165         if (WARN_ON_ONCE(pid < 0)) {
2166                 strcpy(comm, "<XXX>");
2167                 return;
2168         }
2169
2170         if (pid > PID_MAX_DEFAULT) {
2171                 strcpy(comm, "<...>");
2172                 return;
2173         }
2174
2175         map = savedcmd->map_pid_to_cmdline[pid];
2176         if (map != NO_CMDLINE_MAP)
2177                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2178         else
2179                 strcpy(comm, "<...>");
2180 }
2181
2182 void trace_find_cmdline(int pid, char comm[])
2183 {
2184         preempt_disable();
2185         arch_spin_lock(&trace_cmdline_lock);
2186
2187         __trace_find_cmdline(pid, comm);
2188
2189         arch_spin_unlock(&trace_cmdline_lock);
2190         preempt_enable();
2191 }
2192
2193 int trace_find_tgid(int pid)
2194 {
2195         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2196                 return 0;
2197
2198         return tgid_map[pid];
2199 }
2200
2201 static int trace_save_tgid(struct task_struct *tsk)
2202 {
2203         /* treat recording of idle task as a success */
2204         if (!tsk->pid)
2205                 return 1;
2206
2207         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2208                 return 0;
2209
2210         tgid_map[tsk->pid] = tsk->tgid;
2211         return 1;
2212 }
2213
2214 static bool tracing_record_taskinfo_skip(int flags)
2215 {
2216         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2217                 return true;
2218         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2219                 return true;
2220         if (!__this_cpu_read(trace_taskinfo_save))
2221                 return true;
2222         return false;
2223 }
2224
2225 /**
2226  * tracing_record_taskinfo - record the task info of a task
2227  *
2228  * @task  - task to record
2229  * @flags - TRACE_RECORD_CMDLINE for recording comm
2230  *        - TRACE_RECORD_TGID for recording tgid
2231  */
2232 void tracing_record_taskinfo(struct task_struct *task, int flags)
2233 {
2234         bool done;
2235
2236         if (tracing_record_taskinfo_skip(flags))
2237                 return;
2238
2239         /*
2240          * Record as much task information as possible. If some fail, continue
2241          * to try to record the others.
2242          */
2243         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2244         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2245
2246         /* If recording any information failed, retry again soon. */
2247         if (!done)
2248                 return;
2249
2250         __this_cpu_write(trace_taskinfo_save, false);
2251 }
2252
2253 /**
2254  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2255  *
2256  * @prev - previous task during sched_switch
2257  * @next - next task during sched_switch
2258  * @flags - TRACE_RECORD_CMDLINE for recording comm
2259  *          TRACE_RECORD_TGID for recording tgid
2260  */
2261 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2262                                           struct task_struct *next, int flags)
2263 {
2264         bool done;
2265
2266         if (tracing_record_taskinfo_skip(flags))
2267                 return;
2268
2269         /*
2270          * Record as much task information as possible. If some fail, continue
2271          * to try to record the others.
2272          */
2273         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2274         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2275         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2276         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2277
2278         /* If recording any information failed, retry again soon. */
2279         if (!done)
2280                 return;
2281
2282         __this_cpu_write(trace_taskinfo_save, false);
2283 }
2284
2285 /* Helpers to record a specific task information */
2286 void tracing_record_cmdline(struct task_struct *task)
2287 {
2288         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2289 }
2290
2291 void tracing_record_tgid(struct task_struct *task)
2292 {
2293         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2294 }
2295
2296 /*
2297  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2298  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2299  * simplifies those functions and keeps them in sync.
2300  */
2301 enum print_line_t trace_handle_return(struct trace_seq *s)
2302 {
2303         return trace_seq_has_overflowed(s) ?
2304                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_handle_return);
2307
2308 void
2309 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2310                              int pc)
2311 {
2312         struct task_struct *tsk = current;
2313
2314         entry->preempt_count            = pc & 0xff;
2315         entry->pid                      = (tsk) ? tsk->pid : 0;
2316         entry->flags =
2317 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2318                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2319 #else
2320                 TRACE_FLAG_IRQS_NOSUPPORT |
2321 #endif
2322                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2323                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2324                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2325                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2326                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2327 }
2328 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2329
2330 struct ring_buffer_event *
2331 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2332                           int type,
2333                           unsigned long len,
2334                           unsigned long flags, int pc)
2335 {
2336         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2337 }
2338
2339 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2340 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2341 static int trace_buffered_event_ref;
2342
2343 /**
2344  * trace_buffered_event_enable - enable buffering events
2345  *
2346  * When events are being filtered, it is quicker to use a temporary
2347  * buffer to write the event data into if there's a likely chance
2348  * that it will not be committed. The discard of the ring buffer
2349  * is not as fast as committing, and is much slower than copying
2350  * a commit.
2351  *
2352  * When an event is to be filtered, allocate per cpu buffers to
2353  * write the event data into, and if the event is filtered and discarded
2354  * it is simply dropped, otherwise, the entire data is to be committed
2355  * in one shot.
2356  */
2357 void trace_buffered_event_enable(void)
2358 {
2359         struct ring_buffer_event *event;
2360         struct page *page;
2361         int cpu;
2362
2363         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2364
2365         if (trace_buffered_event_ref++)
2366                 return;
2367
2368         for_each_tracing_cpu(cpu) {
2369                 page = alloc_pages_node(cpu_to_node(cpu),
2370                                         GFP_KERNEL | __GFP_NORETRY, 0);
2371                 if (!page)
2372                         goto failed;
2373
2374                 event = page_address(page);
2375                 memset(event, 0, sizeof(*event));
2376
2377                 per_cpu(trace_buffered_event, cpu) = event;
2378
2379                 preempt_disable();
2380                 if (cpu == smp_processor_id() &&
2381                     this_cpu_read(trace_buffered_event) !=
2382                     per_cpu(trace_buffered_event, cpu))
2383                         WARN_ON_ONCE(1);
2384                 preempt_enable();
2385         }
2386
2387         return;
2388  failed:
2389         trace_buffered_event_disable();
2390 }
2391
2392 static void enable_trace_buffered_event(void *data)
2393 {
2394         /* Probably not needed, but do it anyway */
2395         smp_rmb();
2396         this_cpu_dec(trace_buffered_event_cnt);
2397 }
2398
2399 static void disable_trace_buffered_event(void *data)
2400 {
2401         this_cpu_inc(trace_buffered_event_cnt);
2402 }
2403
2404 /**
2405  * trace_buffered_event_disable - disable buffering events
2406  *
2407  * When a filter is removed, it is faster to not use the buffered
2408  * events, and to commit directly into the ring buffer. Free up
2409  * the temp buffers when there are no more users. This requires
2410  * special synchronization with current events.
2411  */
2412 void trace_buffered_event_disable(void)
2413 {
2414         int cpu;
2415
2416         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2417
2418         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2419                 return;
2420
2421         if (--trace_buffered_event_ref)
2422                 return;
2423
2424         preempt_disable();
2425         /* For each CPU, set the buffer as used. */
2426         smp_call_function_many(tracing_buffer_mask,
2427                                disable_trace_buffered_event, NULL, 1);
2428         preempt_enable();
2429
2430         /* Wait for all current users to finish */
2431         synchronize_rcu();
2432
2433         for_each_tracing_cpu(cpu) {
2434                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2435                 per_cpu(trace_buffered_event, cpu) = NULL;
2436         }
2437         /*
2438          * Make sure trace_buffered_event is NULL before clearing
2439          * trace_buffered_event_cnt.
2440          */
2441         smp_wmb();
2442
2443         preempt_disable();
2444         /* Do the work on each cpu */
2445         smp_call_function_many(tracing_buffer_mask,
2446                                enable_trace_buffered_event, NULL, 1);
2447         preempt_enable();
2448 }
2449
2450 static struct ring_buffer *temp_buffer;
2451
2452 struct ring_buffer_event *
2453 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2454                           struct trace_event_file *trace_file,
2455                           int type, unsigned long len,
2456                           unsigned long flags, int pc)
2457 {
2458         struct ring_buffer_event *entry;
2459         int val;
2460
2461         *current_rb = trace_file->tr->trace_buffer.buffer;
2462
2463         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2464              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2465             (entry = this_cpu_read(trace_buffered_event))) {
2466                 /* Try to use the per cpu buffer first */
2467                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2468                 if (val == 1) {
2469                         trace_event_setup(entry, type, flags, pc);
2470                         entry->array[0] = len;
2471                         return entry;
2472                 }
2473                 this_cpu_dec(trace_buffered_event_cnt);
2474         }
2475
2476         entry = __trace_buffer_lock_reserve(*current_rb,
2477                                             type, len, flags, pc);
2478         /*
2479          * If tracing is off, but we have triggers enabled
2480          * we still need to look at the event data. Use the temp_buffer
2481          * to store the trace event for the tigger to use. It's recusive
2482          * safe and will not be recorded anywhere.
2483          */
2484         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2485                 *current_rb = temp_buffer;
2486                 entry = __trace_buffer_lock_reserve(*current_rb,
2487                                                     type, len, flags, pc);
2488         }
2489         return entry;
2490 }
2491 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2492
2493 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2494 static DEFINE_MUTEX(tracepoint_printk_mutex);
2495
2496 static void output_printk(struct trace_event_buffer *fbuffer)
2497 {
2498         struct trace_event_call *event_call;
2499         struct trace_event *event;
2500         unsigned long flags;
2501         struct trace_iterator *iter = tracepoint_print_iter;
2502
2503         /* We should never get here if iter is NULL */
2504         if (WARN_ON_ONCE(!iter))
2505                 return;
2506
2507         event_call = fbuffer->trace_file->event_call;
2508         if (!event_call || !event_call->event.funcs ||
2509             !event_call->event.funcs->trace)
2510                 return;
2511
2512         event = &fbuffer->trace_file->event_call->event;
2513
2514         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2515         trace_seq_init(&iter->seq);
2516         iter->ent = fbuffer->entry;
2517         event_call->event.funcs->trace(iter, 0, event);
2518         trace_seq_putc(&iter->seq, 0);
2519         printk("%s", iter->seq.buffer);
2520
2521         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2522 }
2523
2524 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2525                              void __user *buffer, size_t *lenp,
2526                              loff_t *ppos)
2527 {
2528         int save_tracepoint_printk;
2529         int ret;
2530
2531         mutex_lock(&tracepoint_printk_mutex);
2532         save_tracepoint_printk = tracepoint_printk;
2533
2534         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2535
2536         /*
2537          * This will force exiting early, as tracepoint_printk
2538          * is always zero when tracepoint_printk_iter is not allocated
2539          */
2540         if (!tracepoint_print_iter)
2541                 tracepoint_printk = 0;
2542
2543         if (save_tracepoint_printk == tracepoint_printk)
2544                 goto out;
2545
2546         if (tracepoint_printk)
2547                 static_key_enable(&tracepoint_printk_key.key);
2548         else
2549                 static_key_disable(&tracepoint_printk_key.key);
2550
2551  out:
2552         mutex_unlock(&tracepoint_printk_mutex);
2553
2554         return ret;
2555 }
2556
2557 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2558 {
2559         if (static_key_false(&tracepoint_printk_key.key))
2560                 output_printk(fbuffer);
2561
2562         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2563                                     fbuffer->event, fbuffer->entry,
2564                                     fbuffer->flags, fbuffer->pc);
2565 }
2566 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2567
2568 /*
2569  * Skip 3:
2570  *
2571  *   trace_buffer_unlock_commit_regs()
2572  *   trace_event_buffer_commit()
2573  *   trace_event_raw_event_xxx()
2574  */
2575 # define STACK_SKIP 3
2576
2577 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2578                                      struct ring_buffer *buffer,
2579                                      struct ring_buffer_event *event,
2580                                      unsigned long flags, int pc,
2581                                      struct pt_regs *regs)
2582 {
2583         __buffer_unlock_commit(buffer, event);
2584
2585         /*
2586          * If regs is not set, then skip the necessary functions.
2587          * Note, we can still get here via blktrace, wakeup tracer
2588          * and mmiotrace, but that's ok if they lose a function or
2589          * two. They are not that meaningful.
2590          */
2591         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2592         ftrace_trace_userstack(buffer, flags, pc);
2593 }
2594
2595 /*
2596  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2597  */
2598 void
2599 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2600                                    struct ring_buffer_event *event)
2601 {
2602         __buffer_unlock_commit(buffer, event);
2603 }
2604
2605 static void
2606 trace_process_export(struct trace_export *export,
2607                struct ring_buffer_event *event)
2608 {
2609         struct trace_entry *entry;
2610         unsigned int size = 0;
2611
2612         entry = ring_buffer_event_data(event);
2613         size = ring_buffer_event_length(event);
2614         export->write(export, entry, size);
2615 }
2616
2617 static DEFINE_MUTEX(ftrace_export_lock);
2618
2619 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2620
2621 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2622
2623 static inline void ftrace_exports_enable(void)
2624 {
2625         static_branch_enable(&ftrace_exports_enabled);
2626 }
2627
2628 static inline void ftrace_exports_disable(void)
2629 {
2630         static_branch_disable(&ftrace_exports_enabled);
2631 }
2632
2633 static void ftrace_exports(struct ring_buffer_event *event)
2634 {
2635         struct trace_export *export;
2636
2637         preempt_disable_notrace();
2638
2639         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2640         while (export) {
2641                 trace_process_export(export, event);
2642                 export = rcu_dereference_raw_notrace(export->next);
2643         }
2644
2645         preempt_enable_notrace();
2646 }
2647
2648 static inline void
2649 add_trace_export(struct trace_export **list, struct trace_export *export)
2650 {
2651         rcu_assign_pointer(export->next, *list);
2652         /*
2653          * We are entering export into the list but another
2654          * CPU might be walking that list. We need to make sure
2655          * the export->next pointer is valid before another CPU sees
2656          * the export pointer included into the list.
2657          */
2658         rcu_assign_pointer(*list, export);
2659 }
2660
2661 static inline int
2662 rm_trace_export(struct trace_export **list, struct trace_export *export)
2663 {
2664         struct trace_export **p;
2665
2666         for (p = list; *p != NULL; p = &(*p)->next)
2667                 if (*p == export)
2668                         break;
2669
2670         if (*p != export)
2671                 return -1;
2672
2673         rcu_assign_pointer(*p, (*p)->next);
2674
2675         return 0;
2676 }
2677
2678 static inline void
2679 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2680 {
2681         if (*list == NULL)
2682                 ftrace_exports_enable();
2683
2684         add_trace_export(list, export);
2685 }
2686
2687 static inline int
2688 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2689 {
2690         int ret;
2691
2692         ret = rm_trace_export(list, export);
2693         if (*list == NULL)
2694                 ftrace_exports_disable();
2695
2696         return ret;
2697 }
2698
2699 int register_ftrace_export(struct trace_export *export)
2700 {
2701         if (WARN_ON_ONCE(!export->write))
2702                 return -1;
2703
2704         mutex_lock(&ftrace_export_lock);
2705
2706         add_ftrace_export(&ftrace_exports_list, export);
2707
2708         mutex_unlock(&ftrace_export_lock);
2709
2710         return 0;
2711 }
2712 EXPORT_SYMBOL_GPL(register_ftrace_export);
2713
2714 int unregister_ftrace_export(struct trace_export *export)
2715 {
2716         int ret;
2717
2718         mutex_lock(&ftrace_export_lock);
2719
2720         ret = rm_ftrace_export(&ftrace_exports_list, export);
2721
2722         mutex_unlock(&ftrace_export_lock);
2723
2724         return ret;
2725 }
2726 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2727
2728 void
2729 trace_function(struct trace_array *tr,
2730                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2731                int pc)
2732 {
2733         struct trace_event_call *call = &event_function;
2734         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2735         struct ring_buffer_event *event;
2736         struct ftrace_entry *entry;
2737
2738         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2739                                             flags, pc);
2740         if (!event)
2741                 return;
2742         entry   = ring_buffer_event_data(event);
2743         entry->ip                       = ip;
2744         entry->parent_ip                = parent_ip;
2745
2746         if (!call_filter_check_discard(call, entry, buffer, event)) {
2747                 if (static_branch_unlikely(&ftrace_exports_enabled))
2748                         ftrace_exports(event);
2749                 __buffer_unlock_commit(buffer, event);
2750         }
2751 }
2752
2753 #ifdef CONFIG_STACKTRACE
2754
2755 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2756 struct ftrace_stack {
2757         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2758 };
2759
2760 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2761 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2762
2763 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2764                                  unsigned long flags,
2765                                  int skip, int pc, struct pt_regs *regs)
2766 {
2767         struct trace_event_call *call = &event_kernel_stack;
2768         struct ring_buffer_event *event;
2769         struct stack_entry *entry;
2770         struct stack_trace trace;
2771         int use_stack;
2772         int size = FTRACE_STACK_ENTRIES;
2773
2774         trace.nr_entries        = 0;
2775         trace.skip              = skip;
2776
2777         /*
2778          * Add one, for this function and the call to save_stack_trace()
2779          * If regs is set, then these functions will not be in the way.
2780          */
2781 #ifndef CONFIG_UNWINDER_ORC
2782         if (!regs)
2783                 trace.skip++;
2784 #endif
2785
2786         /*
2787          * Since events can happen in NMIs there's no safe way to
2788          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2789          * or NMI comes in, it will just have to use the default
2790          * FTRACE_STACK_SIZE.
2791          */
2792         preempt_disable_notrace();
2793
2794         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2795         /*
2796          * We don't need any atomic variables, just a barrier.
2797          * If an interrupt comes in, we don't care, because it would
2798          * have exited and put the counter back to what we want.
2799          * We just need a barrier to keep gcc from moving things
2800          * around.
2801          */
2802         barrier();
2803         if (use_stack == 1) {
2804                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2805                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2806
2807                 if (regs)
2808                         save_stack_trace_regs(regs, &trace);
2809                 else
2810                         save_stack_trace(&trace);
2811
2812                 if (trace.nr_entries > size)
2813                         size = trace.nr_entries;
2814         } else
2815                 /* From now on, use_stack is a boolean */
2816                 use_stack = 0;
2817
2818         size *= sizeof(unsigned long);
2819
2820         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2821                                             sizeof(*entry) + size, flags, pc);
2822         if (!event)
2823                 goto out;
2824         entry = ring_buffer_event_data(event);
2825
2826         memset(&entry->caller, 0, size);
2827
2828         if (use_stack)
2829                 memcpy(&entry->caller, trace.entries,
2830                        trace.nr_entries * sizeof(unsigned long));
2831         else {
2832                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2833                 trace.entries           = entry->caller;
2834                 if (regs)
2835                         save_stack_trace_regs(regs, &trace);
2836                 else
2837                         save_stack_trace(&trace);
2838         }
2839
2840         entry->size = trace.nr_entries;
2841
2842         if (!call_filter_check_discard(call, entry, buffer, event))
2843                 __buffer_unlock_commit(buffer, event);
2844
2845  out:
2846         /* Again, don't let gcc optimize things here */
2847         barrier();
2848         __this_cpu_dec(ftrace_stack_reserve);
2849         preempt_enable_notrace();
2850
2851 }
2852
2853 static inline void ftrace_trace_stack(struct trace_array *tr,
2854                                       struct ring_buffer *buffer,
2855                                       unsigned long flags,
2856                                       int skip, int pc, struct pt_regs *regs)
2857 {
2858         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2859                 return;
2860
2861         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2862 }
2863
2864 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2865                    int pc)
2866 {
2867         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2868
2869         if (rcu_is_watching()) {
2870                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2871                 return;
2872         }
2873
2874         /*
2875          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2876          * but if the above rcu_is_watching() failed, then the NMI
2877          * triggered someplace critical, and rcu_irq_enter() should
2878          * not be called from NMI.
2879          */
2880         if (unlikely(in_nmi()))
2881                 return;
2882
2883         rcu_irq_enter_irqson();
2884         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2885         rcu_irq_exit_irqson();
2886 }
2887
2888 /**
2889  * trace_dump_stack - record a stack back trace in the trace buffer
2890  * @skip: Number of functions to skip (helper handlers)
2891  */
2892 void trace_dump_stack(int skip)
2893 {
2894         unsigned long flags;
2895
2896         if (tracing_disabled || tracing_selftest_running)
2897                 return;
2898
2899         local_save_flags(flags);
2900
2901 #ifndef CONFIG_UNWINDER_ORC
2902         /* Skip 1 to skip this function. */
2903         skip++;
2904 #endif
2905         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2906                              flags, skip, preempt_count(), NULL);
2907 }
2908 EXPORT_SYMBOL_GPL(trace_dump_stack);
2909
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911
2912 void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915         struct trace_event_call *call = &event_user_stack;
2916         struct ring_buffer_event *event;
2917         struct userstack_entry *entry;
2918         struct stack_trace trace;
2919
2920         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2921                 return;
2922
2923         /*
2924          * NMIs can not handle page faults, even with fix ups.
2925          * The save user stack can (and often does) fault.
2926          */
2927         if (unlikely(in_nmi()))
2928                 return;
2929
2930         /*
2931          * prevent recursion, since the user stack tracing may
2932          * trigger other kernel events.
2933          */
2934         preempt_disable();
2935         if (__this_cpu_read(user_stack_count))
2936                 goto out;
2937
2938         __this_cpu_inc(user_stack_count);
2939
2940         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2941                                             sizeof(*entry), flags, pc);
2942         if (!event)
2943                 goto out_drop_count;
2944         entry   = ring_buffer_event_data(event);
2945
2946         entry->tgid             = current->tgid;
2947         memset(&entry->caller, 0, sizeof(entry->caller));
2948
2949         trace.nr_entries        = 0;
2950         trace.max_entries       = FTRACE_STACK_ENTRIES;
2951         trace.skip              = 0;
2952         trace.entries           = entry->caller;
2953
2954         save_stack_trace_user(&trace);
2955         if (!call_filter_check_discard(call, entry, buffer, event))
2956                 __buffer_unlock_commit(buffer, event);
2957
2958  out_drop_count:
2959         __this_cpu_dec(user_stack_count);
2960  out:
2961         preempt_enable();
2962 }
2963
2964 #ifdef UNUSED
2965 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2966 {
2967         ftrace_trace_userstack(tr, flags, preempt_count());
2968 }
2969 #endif /* UNUSED */
2970
2971 #endif /* CONFIG_STACKTRACE */
2972
2973 /* created for use with alloc_percpu */
2974 struct trace_buffer_struct {
2975         int nesting;
2976         char buffer[4][TRACE_BUF_SIZE];
2977 };
2978
2979 static struct trace_buffer_struct *trace_percpu_buffer;
2980
2981 /*
2982  * Thise allows for lockless recording.  If we're nested too deeply, then
2983  * this returns NULL.
2984  */
2985 static char *get_trace_buf(void)
2986 {
2987         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2988
2989         if (!buffer || buffer->nesting >= 4)
2990                 return NULL;
2991
2992         buffer->nesting++;
2993
2994         /* Interrupts must see nesting incremented before we use the buffer */
2995         barrier();
2996         return &buffer->buffer[buffer->nesting][0];
2997 }
2998
2999 static void put_trace_buf(void)
3000 {
3001         /* Don't let the decrement of nesting leak before this */
3002         barrier();
3003         this_cpu_dec(trace_percpu_buffer->nesting);
3004 }
3005
3006 static int alloc_percpu_trace_buffer(void)
3007 {
3008         struct trace_buffer_struct *buffers;
3009
3010         buffers = alloc_percpu(struct trace_buffer_struct);
3011         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3012                 return -ENOMEM;
3013
3014         trace_percpu_buffer = buffers;
3015         return 0;
3016 }
3017
3018 static int buffers_allocated;
3019
3020 void trace_printk_init_buffers(void)
3021 {
3022         if (buffers_allocated)
3023                 return;
3024
3025         if (alloc_percpu_trace_buffer())
3026                 return;
3027
3028         /* trace_printk() is for debug use only. Don't use it in production. */
3029
3030         pr_warn("\n");
3031         pr_warn("**********************************************************\n");
3032         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3033         pr_warn("**                                                      **\n");
3034         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3035         pr_warn("**                                                      **\n");
3036         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3037         pr_warn("** unsafe for production use.                           **\n");
3038         pr_warn("**                                                      **\n");
3039         pr_warn("** If you see this message and you are not debugging    **\n");
3040         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3041         pr_warn("**                                                      **\n");
3042         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3043         pr_warn("**********************************************************\n");
3044
3045         /* Expand the buffers to set size */
3046         tracing_update_buffers();
3047
3048         buffers_allocated = 1;
3049
3050         /*
3051          * trace_printk_init_buffers() can be called by modules.
3052          * If that happens, then we need to start cmdline recording
3053          * directly here. If the global_trace.buffer is already
3054          * allocated here, then this was called by module code.
3055          */
3056         if (global_trace.trace_buffer.buffer)
3057                 tracing_start_cmdline_record();
3058 }
3059
3060 void trace_printk_start_comm(void)
3061 {
3062         /* Start tracing comms if trace printk is set */
3063         if (!buffers_allocated)
3064                 return;
3065         tracing_start_cmdline_record();
3066 }
3067
3068 static void trace_printk_start_stop_comm(int enabled)
3069 {
3070         if (!buffers_allocated)
3071                 return;
3072
3073         if (enabled)
3074                 tracing_start_cmdline_record();
3075         else
3076                 tracing_stop_cmdline_record();
3077 }
3078
3079 /**
3080  * trace_vbprintk - write binary msg to tracing buffer
3081  *
3082  */
3083 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3084 {
3085         struct trace_event_call *call = &event_bprint;
3086         struct ring_buffer_event *event;
3087         struct ring_buffer *buffer;
3088         struct trace_array *tr = &global_trace;
3089         struct bprint_entry *entry;
3090         unsigned long flags;
3091         char *tbuffer;
3092         int len = 0, size, pc;
3093
3094         if (unlikely(tracing_selftest_running || tracing_disabled))
3095                 return 0;
3096
3097         /* Don't pollute graph traces with trace_vprintk internals */
3098         pause_graph_tracing();
3099
3100         pc = preempt_count();
3101         preempt_disable_notrace();
3102
3103         tbuffer = get_trace_buf();
3104         if (!tbuffer) {
3105                 len = 0;
3106                 goto out_nobuffer;
3107         }
3108
3109         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3110
3111         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3112                 goto out;
3113
3114         local_save_flags(flags);
3115         size = sizeof(*entry) + sizeof(u32) * len;
3116         buffer = tr->trace_buffer.buffer;
3117         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3118                                             flags, pc);
3119         if (!event)
3120                 goto out;
3121         entry = ring_buffer_event_data(event);
3122         entry->ip                       = ip;
3123         entry->fmt                      = fmt;
3124
3125         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3126         if (!call_filter_check_discard(call, entry, buffer, event)) {
3127                 __buffer_unlock_commit(buffer, event);
3128                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3129         }
3130
3131 out:
3132         put_trace_buf();
3133
3134 out_nobuffer:
3135         preempt_enable_notrace();
3136         unpause_graph_tracing();
3137
3138         return len;
3139 }
3140 EXPORT_SYMBOL_GPL(trace_vbprintk);
3141
3142 __printf(3, 0)
3143 static int
3144 __trace_array_vprintk(struct ring_buffer *buffer,
3145                       unsigned long ip, const char *fmt, va_list args)
3146 {
3147         struct trace_event_call *call = &event_print;
3148         struct ring_buffer_event *event;
3149         int len = 0, size, pc;
3150         struct print_entry *entry;
3151         unsigned long flags;
3152         char *tbuffer;
3153
3154         if (tracing_disabled || tracing_selftest_running)
3155                 return 0;
3156
3157         /* Don't pollute graph traces with trace_vprintk internals */
3158         pause_graph_tracing();
3159
3160         pc = preempt_count();
3161         preempt_disable_notrace();
3162
3163
3164         tbuffer = get_trace_buf();
3165         if (!tbuffer) {
3166                 len = 0;
3167                 goto out_nobuffer;
3168         }
3169
3170         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3171
3172         local_save_flags(flags);
3173         size = sizeof(*entry) + len + 1;
3174         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3175                                             flags, pc);
3176         if (!event)
3177                 goto out;
3178         entry = ring_buffer_event_data(event);
3179         entry->ip = ip;
3180
3181         memcpy(&entry->buf, tbuffer, len + 1);
3182         if (!call_filter_check_discard(call, entry, buffer, event)) {
3183                 __buffer_unlock_commit(buffer, event);
3184                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3185         }
3186
3187 out:
3188         put_trace_buf();
3189
3190 out_nobuffer:
3191         preempt_enable_notrace();
3192         unpause_graph_tracing();
3193
3194         return len;
3195 }
3196
3197 __printf(3, 0)
3198 int trace_array_vprintk(struct trace_array *tr,
3199                         unsigned long ip, const char *fmt, va_list args)
3200 {
3201         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3202 }
3203
3204 __printf(3, 0)
3205 int trace_array_printk(struct trace_array *tr,
3206                        unsigned long ip, const char *fmt, ...)
3207 {
3208         int ret;
3209         va_list ap;
3210
3211         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3212                 return 0;
3213
3214         va_start(ap, fmt);
3215         ret = trace_array_vprintk(tr, ip, fmt, ap);
3216         va_end(ap);
3217         return ret;
3218 }
3219
3220 __printf(3, 4)
3221 int trace_array_printk_buf(struct ring_buffer *buffer,
3222                            unsigned long ip, const char *fmt, ...)
3223 {
3224         int ret;
3225         va_list ap;
3226
3227         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3228                 return 0;
3229
3230         va_start(ap, fmt);
3231         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3232         va_end(ap);
3233         return ret;
3234 }
3235
3236 __printf(2, 0)
3237 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3238 {
3239         return trace_array_vprintk(&global_trace, ip, fmt, args);
3240 }
3241 EXPORT_SYMBOL_GPL(trace_vprintk);
3242
3243 static void trace_iterator_increment(struct trace_iterator *iter)
3244 {
3245         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3246
3247         iter->idx++;
3248         if (buf_iter)
3249                 ring_buffer_read(buf_iter, NULL);
3250 }
3251
3252 static struct trace_entry *
3253 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3254                 unsigned long *lost_events)
3255 {
3256         struct ring_buffer_event *event;
3257         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3258
3259         if (buf_iter)
3260                 event = ring_buffer_iter_peek(buf_iter, ts);
3261         else
3262                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3263                                          lost_events);
3264
3265         if (event) {
3266                 iter->ent_size = ring_buffer_event_length(event);
3267                 return ring_buffer_event_data(event);
3268         }
3269         iter->ent_size = 0;
3270         return NULL;
3271 }
3272
3273 static struct trace_entry *
3274 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3275                   unsigned long *missing_events, u64 *ent_ts)
3276 {
3277         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3278         struct trace_entry *ent, *next = NULL;
3279         unsigned long lost_events = 0, next_lost = 0;
3280         int cpu_file = iter->cpu_file;
3281         u64 next_ts = 0, ts;
3282         int next_cpu = -1;
3283         int next_size = 0;
3284         int cpu;
3285
3286         /*
3287          * If we are in a per_cpu trace file, don't bother by iterating over
3288          * all cpu and peek directly.
3289          */
3290         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3291                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3292                         return NULL;
3293                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3294                 if (ent_cpu)
3295                         *ent_cpu = cpu_file;
3296
3297                 return ent;
3298         }
3299
3300         for_each_tracing_cpu(cpu) {
3301
3302                 if (ring_buffer_empty_cpu(buffer, cpu))
3303                         continue;
3304
3305                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3306
3307                 /*
3308                  * Pick the entry with the smallest timestamp:
3309                  */
3310                 if (ent && (!next || ts < next_ts)) {
3311                         next = ent;
3312                         next_cpu = cpu;
3313                         next_ts = ts;
3314                         next_lost = lost_events;
3315                         next_size = iter->ent_size;
3316                 }
3317         }
3318
3319         iter->ent_size = next_size;
3320
3321         if (ent_cpu)
3322                 *ent_cpu = next_cpu;
3323
3324         if (ent_ts)
3325                 *ent_ts = next_ts;
3326
3327         if (missing_events)
3328                 *missing_events = next_lost;
3329
3330         return next;
3331 }
3332
3333 /* Find the next real entry, without updating the iterator itself */
3334 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3335                                           int *ent_cpu, u64 *ent_ts)
3336 {
3337         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3338 }
3339
3340 /* Find the next real entry, and increment the iterator to the next entry */
3341 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3342 {
3343         iter->ent = __find_next_entry(iter, &iter->cpu,
3344                                       &iter->lost_events, &iter->ts);
3345
3346         if (iter->ent)
3347                 trace_iterator_increment(iter);
3348
3349         return iter->ent ? iter : NULL;
3350 }
3351
3352 static void trace_consume(struct trace_iterator *iter)
3353 {
3354         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3355                             &iter->lost_events);
3356 }
3357
3358 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3359 {
3360         struct trace_iterator *iter = m->private;
3361         int i = (int)*pos;
3362         void *ent;
3363
3364         WARN_ON_ONCE(iter->leftover);
3365
3366         (*pos)++;
3367
3368         /* can't go backwards */
3369         if (iter->idx > i)
3370                 return NULL;
3371
3372         if (iter->idx < 0)
3373                 ent = trace_find_next_entry_inc(iter);
3374         else
3375                 ent = iter;
3376
3377         while (ent && iter->idx < i)
3378                 ent = trace_find_next_entry_inc(iter);
3379
3380         iter->pos = *pos;
3381
3382         return ent;
3383 }
3384
3385 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3386 {
3387         struct ring_buffer_event *event;
3388         struct ring_buffer_iter *buf_iter;
3389         unsigned long entries = 0;
3390         u64 ts;
3391
3392         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3393
3394         buf_iter = trace_buffer_iter(iter, cpu);
3395         if (!buf_iter)
3396                 return;
3397
3398         ring_buffer_iter_reset(buf_iter);
3399
3400         /*
3401          * We could have the case with the max latency tracers
3402          * that a reset never took place on a cpu. This is evident
3403          * by the timestamp being before the start of the buffer.
3404          */
3405         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3406                 if (ts >= iter->trace_buffer->time_start)
3407                         break;
3408                 entries++;
3409                 ring_buffer_read(buf_iter, NULL);
3410         }
3411
3412         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3413 }
3414
3415 /*
3416  * The current tracer is copied to avoid a global locking
3417  * all around.
3418  */
3419 static void *s_start(struct seq_file *m, loff_t *pos)
3420 {
3421         struct trace_iterator *iter = m->private;
3422         struct trace_array *tr = iter->tr;
3423         int cpu_file = iter->cpu_file;
3424         void *p = NULL;
3425         loff_t l = 0;
3426         int cpu;
3427
3428         /*
3429          * copy the tracer to avoid using a global lock all around.
3430          * iter->trace is a copy of current_trace, the pointer to the
3431          * name may be used instead of a strcmp(), as iter->trace->name
3432          * will point to the same string as current_trace->name.
3433          */
3434         mutex_lock(&trace_types_lock);
3435         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3436                 *iter->trace = *tr->current_trace;
3437         mutex_unlock(&trace_types_lock);
3438
3439 #ifdef CONFIG_TRACER_MAX_TRACE
3440         if (iter->snapshot && iter->trace->use_max_tr)
3441                 return ERR_PTR(-EBUSY);
3442 #endif
3443
3444         if (!iter->snapshot)
3445                 atomic_inc(&trace_record_taskinfo_disabled);
3446
3447         if (*pos != iter->pos) {
3448                 iter->ent = NULL;
3449                 iter->cpu = 0;
3450                 iter->idx = -1;
3451
3452                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3453                         for_each_tracing_cpu(cpu)
3454                                 tracing_iter_reset(iter, cpu);
3455                 } else
3456                         tracing_iter_reset(iter, cpu_file);
3457
3458                 iter->leftover = 0;
3459                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3460                         ;
3461
3462         } else {
3463                 /*
3464                  * If we overflowed the seq_file before, then we want
3465                  * to just reuse the trace_seq buffer again.
3466                  */
3467                 if (iter->leftover)
3468                         p = iter;
3469                 else {
3470                         l = *pos - 1;
3471                         p = s_next(m, p, &l);
3472                 }
3473         }
3474
3475         trace_event_read_lock();
3476         trace_access_lock(cpu_file);
3477         return p;
3478 }
3479
3480 static void s_stop(struct seq_file *m, void *p)
3481 {
3482         struct trace_iterator *iter = m->private;
3483
3484 #ifdef CONFIG_TRACER_MAX_TRACE
3485         if (iter->snapshot && iter->trace->use_max_tr)
3486                 return;
3487 #endif
3488
3489         if (!iter->snapshot)
3490                 atomic_dec(&trace_record_taskinfo_disabled);
3491
3492         trace_access_unlock(iter->cpu_file);
3493         trace_event_read_unlock();
3494 }
3495
3496 static void
3497 get_total_entries(struct trace_buffer *buf,
3498                   unsigned long *total, unsigned long *entries)
3499 {
3500         unsigned long count;
3501         int cpu;
3502
3503         *total = 0;
3504         *entries = 0;
3505
3506         for_each_tracing_cpu(cpu) {
3507                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3508                 /*
3509                  * If this buffer has skipped entries, then we hold all
3510                  * entries for the trace and we need to ignore the
3511                  * ones before the time stamp.
3512                  */
3513                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3514                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3515                         /* total is the same as the entries */
3516                         *total += count;
3517                 } else
3518                         *total += count +
3519                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3520                 *entries += count;
3521         }
3522 }
3523
3524 static void print_lat_help_header(struct seq_file *m)
3525 {
3526         seq_puts(m, "#                  _------=> CPU#            \n"
3527                     "#                 / _-----=> irqs-off        \n"
3528                     "#                | / _----=> need-resched    \n"
3529                     "#                || / _---=> hardirq/softirq \n"
3530                     "#                ||| / _--=> preempt-depth   \n"
3531                     "#                |||| /     delay            \n"
3532                     "#  cmd     pid   ||||| time  |   caller      \n"
3533                     "#     \\   /      |||||  \\    |   /         \n");
3534 }
3535
3536 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3537 {
3538         unsigned long total;
3539         unsigned long entries;
3540
3541         get_total_entries(buf, &total, &entries);
3542         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3543                    entries, total, num_online_cpus());
3544         seq_puts(m, "#\n");
3545 }
3546
3547 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3548                                    unsigned int flags)
3549 {
3550         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551
3552         print_event_info(buf, m);
3553
3554         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3555         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3556 }
3557
3558 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3559                                        unsigned int flags)
3560 {
3561         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3562         const char tgid_space[] = "          ";
3563         const char space[] = "  ";
3564
3565         print_event_info(buf, m);
3566
3567         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3568                    tgid ? tgid_space : space);
3569         seq_printf(m, "#                          %s / _----=> need-resched\n",
3570                    tgid ? tgid_space : space);
3571         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3572                    tgid ? tgid_space : space);
3573         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3574                    tgid ? tgid_space : space);
3575         seq_printf(m, "#                          %s||| /     delay\n",
3576                    tgid ? tgid_space : space);
3577         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3578                    tgid ? "   TGID   " : space);
3579         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3580                    tgid ? "     |    " : space);
3581 }
3582
3583 void
3584 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3585 {
3586         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3587         struct trace_buffer *buf = iter->trace_buffer;
3588         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3589         struct tracer *type = iter->trace;
3590         unsigned long entries;
3591         unsigned long total;
3592         const char *name = "preemption";
3593
3594         name = type->name;
3595
3596         get_total_entries(buf, &total, &entries);
3597
3598         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3599                    name, UTS_RELEASE);
3600         seq_puts(m, "# -----------------------------------"
3601                  "---------------------------------\n");
3602         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3603                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3604                    nsecs_to_usecs(data->saved_latency),
3605                    entries,
3606                    total,
3607                    buf->cpu,
3608 #if defined(CONFIG_PREEMPT_NONE)
3609                    "server",
3610 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3611                    "desktop",
3612 #elif defined(CONFIG_PREEMPT)
3613                    "preempt",
3614 #else
3615                    "unknown",
3616 #endif
3617                    /* These are reserved for later use */
3618                    0, 0, 0, 0);
3619 #ifdef CONFIG_SMP
3620         seq_printf(m, " #P:%d)\n", num_online_cpus());
3621 #else
3622         seq_puts(m, ")\n");
3623 #endif
3624         seq_puts(m, "#    -----------------\n");
3625         seq_printf(m, "#    | task: %.16s-%d "
3626                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3627                    data->comm, data->pid,
3628                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3629                    data->policy, data->rt_priority);
3630         seq_puts(m, "#    -----------------\n");
3631
3632         if (data->critical_start) {
3633                 seq_puts(m, "#  => started at: ");
3634                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3635                 trace_print_seq(m, &iter->seq);
3636                 seq_puts(m, "\n#  => ended at:   ");
3637                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3638                 trace_print_seq(m, &iter->seq);
3639                 seq_puts(m, "\n#\n");
3640         }
3641
3642         seq_puts(m, "#\n");
3643 }
3644
3645 static void test_cpu_buff_start(struct trace_iterator *iter)
3646 {
3647         struct trace_seq *s = &iter->seq;
3648         struct trace_array *tr = iter->tr;
3649
3650         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3651                 return;
3652
3653         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3654                 return;
3655
3656         if (cpumask_available(iter->started) &&
3657             cpumask_test_cpu(iter->cpu, iter->started))
3658                 return;
3659
3660         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3661                 return;
3662
3663         if (cpumask_available(iter->started))
3664                 cpumask_set_cpu(iter->cpu, iter->started);
3665
3666         /* Don't print started cpu buffer for the first entry of the trace */
3667         if (iter->idx > 1)
3668                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3669                                 iter->cpu);
3670 }
3671
3672 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3673 {
3674         struct trace_array *tr = iter->tr;
3675         struct trace_seq *s = &iter->seq;
3676         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3677         struct trace_entry *entry;
3678         struct trace_event *event;
3679
3680         entry = iter->ent;
3681
3682         test_cpu_buff_start(iter);
3683
3684         event = ftrace_find_event(entry->type);
3685
3686         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3687                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3688                         trace_print_lat_context(iter);
3689                 else
3690                         trace_print_context(iter);
3691         }
3692
3693         if (trace_seq_has_overflowed(s))
3694                 return TRACE_TYPE_PARTIAL_LINE;
3695
3696         if (event)
3697                 return event->funcs->trace(iter, sym_flags, event);
3698
3699         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3700
3701         return trace_handle_return(s);
3702 }
3703
3704 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3705 {
3706         struct trace_array *tr = iter->tr;
3707         struct trace_seq *s = &iter->seq;
3708         struct trace_entry *entry;
3709         struct trace_event *event;
3710
3711         entry = iter->ent;
3712
3713         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3714                 trace_seq_printf(s, "%d %d %llu ",
3715                                  entry->pid, iter->cpu, iter->ts);
3716
3717         if (trace_seq_has_overflowed(s))
3718                 return TRACE_TYPE_PARTIAL_LINE;
3719
3720         event = ftrace_find_event(entry->type);
3721         if (event)
3722                 return event->funcs->raw(iter, 0, event);
3723
3724         trace_seq_printf(s, "%d ?\n", entry->type);
3725
3726         return trace_handle_return(s);
3727 }
3728
3729 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3730 {
3731         struct trace_array *tr = iter->tr;
3732         struct trace_seq *s = &iter->seq;
3733         unsigned char newline = '\n';
3734         struct trace_entry *entry;
3735         struct trace_event *event;
3736
3737         entry = iter->ent;
3738
3739         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3740                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3741                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3742                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3743                 if (trace_seq_has_overflowed(s))
3744                         return TRACE_TYPE_PARTIAL_LINE;
3745         }
3746
3747         event = ftrace_find_event(entry->type);
3748         if (event) {
3749                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3750                 if (ret != TRACE_TYPE_HANDLED)
3751                         return ret;
3752         }
3753
3754         SEQ_PUT_FIELD(s, newline);
3755
3756         return trace_handle_return(s);
3757 }
3758
3759 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3760 {
3761         struct trace_array *tr = iter->tr;
3762         struct trace_seq *s = &iter->seq;
3763         struct trace_entry *entry;
3764         struct trace_event *event;
3765
3766         entry = iter->ent;
3767
3768         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3769                 SEQ_PUT_FIELD(s, entry->pid);
3770                 SEQ_PUT_FIELD(s, iter->cpu);
3771                 SEQ_PUT_FIELD(s, iter->ts);
3772                 if (trace_seq_has_overflowed(s))
3773                         return TRACE_TYPE_PARTIAL_LINE;
3774         }
3775
3776         event = ftrace_find_event(entry->type);
3777         return event ? event->funcs->binary(iter, 0, event) :
3778                 TRACE_TYPE_HANDLED;
3779 }
3780
3781 int trace_empty(struct trace_iterator *iter)
3782 {
3783         struct ring_buffer_iter *buf_iter;
3784         int cpu;
3785
3786         /* If we are looking at one CPU buffer, only check that one */
3787         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3788                 cpu = iter->cpu_file;
3789                 buf_iter = trace_buffer_iter(iter, cpu);
3790                 if (buf_iter) {
3791                         if (!ring_buffer_iter_empty(buf_iter))
3792                                 return 0;
3793                 } else {
3794                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3795                                 return 0;
3796                 }
3797                 return 1;
3798         }
3799
3800         for_each_tracing_cpu(cpu) {
3801                 buf_iter = trace_buffer_iter(iter, cpu);
3802                 if (buf_iter) {
3803                         if (!ring_buffer_iter_empty(buf_iter))
3804                                 return 0;
3805                 } else {
3806                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3807                                 return 0;
3808                 }
3809         }
3810
3811         return 1;
3812 }
3813
3814 /*  Called with trace_event_read_lock() held. */
3815 enum print_line_t print_trace_line(struct trace_iterator *iter)
3816 {
3817         struct trace_array *tr = iter->tr;
3818         unsigned long trace_flags = tr->trace_flags;
3819         enum print_line_t ret;
3820
3821         if (iter->lost_events) {
3822                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3823                                  iter->cpu, iter->lost_events);
3824                 if (trace_seq_has_overflowed(&iter->seq))
3825                         return TRACE_TYPE_PARTIAL_LINE;
3826         }
3827
3828         if (iter->trace && iter->trace->print_line) {
3829                 ret = iter->trace->print_line(iter);
3830                 if (ret != TRACE_TYPE_UNHANDLED)
3831                         return ret;
3832         }
3833
3834         if (iter->ent->type == TRACE_BPUTS &&
3835                         trace_flags & TRACE_ITER_PRINTK &&
3836                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3837                 return trace_print_bputs_msg_only(iter);
3838
3839         if (iter->ent->type == TRACE_BPRINT &&
3840                         trace_flags & TRACE_ITER_PRINTK &&
3841                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3842                 return trace_print_bprintk_msg_only(iter);
3843
3844         if (iter->ent->type == TRACE_PRINT &&
3845                         trace_flags & TRACE_ITER_PRINTK &&
3846                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3847                 return trace_print_printk_msg_only(iter);
3848
3849         if (trace_flags & TRACE_ITER_BIN)
3850                 return print_bin_fmt(iter);
3851
3852         if (trace_flags & TRACE_ITER_HEX)
3853                 return print_hex_fmt(iter);
3854
3855         if (trace_flags & TRACE_ITER_RAW)
3856                 return print_raw_fmt(iter);
3857
3858         return print_trace_fmt(iter);
3859 }
3860
3861 void trace_latency_header(struct seq_file *m)
3862 {
3863         struct trace_iterator *iter = m->private;
3864         struct trace_array *tr = iter->tr;
3865
3866         /* print nothing if the buffers are empty */
3867         if (trace_empty(iter))
3868                 return;
3869
3870         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3871                 print_trace_header(m, iter);
3872
3873         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3874                 print_lat_help_header(m);
3875 }
3876
3877 void trace_default_header(struct seq_file *m)
3878 {
3879         struct trace_iterator *iter = m->private;
3880         struct trace_array *tr = iter->tr;
3881         unsigned long trace_flags = tr->trace_flags;
3882
3883         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3884                 return;
3885
3886         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3887                 /* print nothing if the buffers are empty */
3888                 if (trace_empty(iter))
3889                         return;
3890                 print_trace_header(m, iter);
3891                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3892                         print_lat_help_header(m);
3893         } else {
3894                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3895                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3896                                 print_func_help_header_irq(iter->trace_buffer,
3897                                                            m, trace_flags);
3898                         else
3899                                 print_func_help_header(iter->trace_buffer, m,
3900                                                        trace_flags);
3901                 }
3902         }
3903 }
3904
3905 static void test_ftrace_alive(struct seq_file *m)
3906 {
3907         if (!ftrace_is_dead())
3908                 return;
3909         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3910                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3911 }
3912
3913 #ifdef CONFIG_TRACER_MAX_TRACE
3914 static void show_snapshot_main_help(struct seq_file *m)
3915 {
3916         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3917                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3918                     "#                      Takes a snapshot of the main buffer.\n"
3919                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3920                     "#                      (Doesn't have to be '2' works with any number that\n"
3921                     "#                       is not a '0' or '1')\n");
3922 }
3923
3924 static void show_snapshot_percpu_help(struct seq_file *m)
3925 {
3926         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3927 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3928         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3929                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3930 #else
3931         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3932                     "#                     Must use main snapshot file to allocate.\n");
3933 #endif
3934         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3935                     "#                      (Doesn't have to be '2' works with any number that\n"
3936                     "#                       is not a '0' or '1')\n");
3937 }
3938
3939 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3940 {
3941         if (iter->tr->allocated_snapshot)
3942                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3943         else
3944                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3945
3946         seq_puts(m, "# Snapshot commands:\n");
3947         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3948                 show_snapshot_main_help(m);
3949         else
3950                 show_snapshot_percpu_help(m);
3951 }
3952 #else
3953 /* Should never be called */
3954 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3955 #endif
3956
3957 static int s_show(struct seq_file *m, void *v)
3958 {
3959         struct trace_iterator *iter = v;
3960         int ret;
3961
3962         if (iter->ent == NULL) {
3963                 if (iter->tr) {
3964                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3965                         seq_puts(m, "#\n");
3966                         test_ftrace_alive(m);
3967                 }
3968                 if (iter->snapshot && trace_empty(iter))
3969                         print_snapshot_help(m, iter);
3970                 else if (iter->trace && iter->trace->print_header)
3971                         iter->trace->print_header(m);
3972                 else
3973                         trace_default_header(m);
3974
3975         } else if (iter->leftover) {
3976                 /*
3977                  * If we filled the seq_file buffer earlier, we
3978                  * want to just show it now.
3979                  */
3980                 ret = trace_print_seq(m, &iter->seq);
3981
3982                 /* ret should this time be zero, but you never know */
3983                 iter->leftover = ret;
3984
3985         } else {
3986                 print_trace_line(iter);
3987                 ret = trace_print_seq(m, &iter->seq);
3988                 /*
3989                  * If we overflow the seq_file buffer, then it will
3990                  * ask us for this data again at start up.
3991                  * Use that instead.
3992                  *  ret is 0 if seq_file write succeeded.
3993                  *        -1 otherwise.
3994                  */
3995                 iter->leftover = ret;
3996         }
3997
3998         return 0;
3999 }
4000
4001 /*
4002  * Should be used after trace_array_get(), trace_types_lock
4003  * ensures that i_cdev was already initialized.
4004  */
4005 static inline int tracing_get_cpu(struct inode *inode)
4006 {
4007         if (inode->i_cdev) /* See trace_create_cpu_file() */
4008                 return (long)inode->i_cdev - 1;
4009         return RING_BUFFER_ALL_CPUS;
4010 }
4011
4012 static const struct seq_operations tracer_seq_ops = {
4013         .start          = s_start,
4014         .next           = s_next,
4015         .stop           = s_stop,
4016         .show           = s_show,
4017 };
4018
4019 static struct trace_iterator *
4020 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4021 {
4022         struct trace_array *tr = inode->i_private;
4023         struct trace_iterator *iter;
4024         int cpu;
4025
4026         if (tracing_disabled)
4027                 return ERR_PTR(-ENODEV);
4028
4029         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4030         if (!iter)
4031                 return ERR_PTR(-ENOMEM);
4032
4033         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4034                                     GFP_KERNEL);
4035         if (!iter->buffer_iter)
4036                 goto release;
4037
4038         /*
4039          * We make a copy of the current tracer to avoid concurrent
4040          * changes on it while we are reading.
4041          */
4042         mutex_lock(&trace_types_lock);
4043         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4044         if (!iter->trace)
4045                 goto fail;
4046
4047         *iter->trace = *tr->current_trace;
4048
4049         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4050                 goto fail;
4051
4052         iter->tr = tr;
4053
4054 #ifdef CONFIG_TRACER_MAX_TRACE
4055         /* Currently only the top directory has a snapshot */
4056         if (tr->current_trace->print_max || snapshot)
4057                 iter->trace_buffer = &tr->max_buffer;
4058         else
4059 #endif
4060                 iter->trace_buffer = &tr->trace_buffer;
4061         iter->snapshot = snapshot;
4062         iter->pos = -1;
4063         iter->cpu_file = tracing_get_cpu(inode);
4064         mutex_init(&iter->mutex);
4065
4066         /* Notify the tracer early; before we stop tracing. */
4067         if (iter->trace && iter->trace->open)
4068                 iter->trace->open(iter);
4069
4070         /* Annotate start of buffers if we had overruns */
4071         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4072                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4073
4074         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4075         if (trace_clocks[tr->clock_id].in_ns)
4076                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4077
4078         /* stop the trace while dumping if we are not opening "snapshot" */
4079         if (!iter->snapshot)
4080                 tracing_stop_tr(tr);
4081
4082         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4083                 for_each_tracing_cpu(cpu) {
4084                         iter->buffer_iter[cpu] =
4085                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4086                                                          cpu, GFP_KERNEL);
4087                 }
4088                 ring_buffer_read_prepare_sync();
4089                 for_each_tracing_cpu(cpu) {
4090                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4091                         tracing_iter_reset(iter, cpu);
4092                 }
4093         } else {
4094                 cpu = iter->cpu_file;
4095                 iter->buffer_iter[cpu] =
4096                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4097                                                  cpu, GFP_KERNEL);
4098                 ring_buffer_read_prepare_sync();
4099                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4100                 tracing_iter_reset(iter, cpu);
4101         }
4102
4103         mutex_unlock(&trace_types_lock);
4104
4105         return iter;
4106
4107  fail:
4108         mutex_unlock(&trace_types_lock);
4109         kfree(iter->trace);
4110         kfree(iter->buffer_iter);
4111 release:
4112         seq_release_private(inode, file);
4113         return ERR_PTR(-ENOMEM);
4114 }
4115
4116 int tracing_open_generic(struct inode *inode, struct file *filp)
4117 {
4118         if (tracing_disabled)
4119                 return -ENODEV;
4120
4121         filp->private_data = inode->i_private;
4122         return 0;
4123 }
4124
4125 bool tracing_is_disabled(void)
4126 {
4127         return (tracing_disabled) ? true: false;
4128 }
4129
4130 /*
4131  * Open and update trace_array ref count.
4132  * Must have the current trace_array passed to it.
4133  */
4134 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4135 {
4136         struct trace_array *tr = inode->i_private;
4137
4138         if (tracing_disabled)
4139                 return -ENODEV;
4140
4141         if (trace_array_get(tr) < 0)
4142                 return -ENODEV;
4143
4144         filp->private_data = inode->i_private;
4145
4146         return 0;
4147 }
4148
4149 static int tracing_release(struct inode *inode, struct file *file)
4150 {
4151         struct trace_array *tr = inode->i_private;
4152         struct seq_file *m = file->private_data;
4153         struct trace_iterator *iter;
4154         int cpu;
4155
4156         if (!(file->f_mode & FMODE_READ)) {
4157                 trace_array_put(tr);
4158                 return 0;
4159         }
4160
4161         /* Writes do not use seq_file */
4162         iter = m->private;
4163         mutex_lock(&trace_types_lock);
4164
4165         for_each_tracing_cpu(cpu) {
4166                 if (iter->buffer_iter[cpu])
4167                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4168         }
4169
4170         if (iter->trace && iter->trace->close)
4171                 iter->trace->close(iter);
4172
4173         if (!iter->snapshot)
4174                 /* reenable tracing if it was previously enabled */
4175                 tracing_start_tr(tr);
4176
4177         __trace_array_put(tr);
4178
4179         mutex_unlock(&trace_types_lock);
4180
4181         mutex_destroy(&iter->mutex);
4182         free_cpumask_var(iter->started);
4183         kfree(iter->trace);
4184         kfree(iter->buffer_iter);
4185         seq_release_private(inode, file);
4186
4187         return 0;
4188 }
4189
4190 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4191 {
4192         struct trace_array *tr = inode->i_private;
4193
4194         trace_array_put(tr);
4195         return 0;
4196 }
4197
4198 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4199 {
4200         struct trace_array *tr = inode->i_private;
4201
4202         trace_array_put(tr);
4203
4204         return single_release(inode, file);
4205 }
4206
4207 static int tracing_open(struct inode *inode, struct file *file)
4208 {
4209         struct trace_array *tr = inode->i_private;
4210         struct trace_iterator *iter;
4211         int ret = 0;
4212
4213         if (trace_array_get(tr) < 0)
4214                 return -ENODEV;
4215
4216         /* If this file was open for write, then erase contents */
4217         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4218                 int cpu = tracing_get_cpu(inode);
4219                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4220
4221 #ifdef CONFIG_TRACER_MAX_TRACE
4222                 if (tr->current_trace->print_max)
4223                         trace_buf = &tr->max_buffer;
4224 #endif
4225
4226                 if (cpu == RING_BUFFER_ALL_CPUS)
4227                         tracing_reset_online_cpus(trace_buf);
4228                 else
4229                         tracing_reset(trace_buf, cpu);
4230         }
4231
4232         if (file->f_mode & FMODE_READ) {
4233                 iter = __tracing_open(inode, file, false);
4234                 if (IS_ERR(iter))
4235                         ret = PTR_ERR(iter);
4236                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4237                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4238         }
4239
4240         if (ret < 0)
4241                 trace_array_put(tr);
4242
4243         return ret;
4244 }
4245
4246 /*
4247  * Some tracers are not suitable for instance buffers.
4248  * A tracer is always available for the global array (toplevel)
4249  * or if it explicitly states that it is.
4250  */
4251 static bool
4252 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4253 {
4254         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4255 }
4256
4257 /* Find the next tracer that this trace array may use */
4258 static struct tracer *
4259 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4260 {
4261         while (t && !trace_ok_for_array(t, tr))
4262                 t = t->next;
4263
4264         return t;
4265 }
4266
4267 static void *
4268 t_next(struct seq_file *m, void *v, loff_t *pos)
4269 {
4270         struct trace_array *tr = m->private;
4271         struct tracer *t = v;
4272
4273         (*pos)++;
4274
4275         if (t)
4276                 t = get_tracer_for_array(tr, t->next);
4277
4278         return t;
4279 }
4280
4281 static void *t_start(struct seq_file *m, loff_t *pos)
4282 {
4283         struct trace_array *tr = m->private;
4284         struct tracer *t;
4285         loff_t l = 0;
4286
4287         mutex_lock(&trace_types_lock);
4288
4289         t = get_tracer_for_array(tr, trace_types);
4290         for (; t && l < *pos; t = t_next(m, t, &l))
4291                         ;
4292
4293         return t;
4294 }
4295
4296 static void t_stop(struct seq_file *m, void *p)
4297 {
4298         mutex_unlock(&trace_types_lock);
4299 }
4300
4301 static int t_show(struct seq_file *m, void *v)
4302 {
4303         struct tracer *t = v;
4304
4305         if (!t)
4306                 return 0;
4307
4308         seq_puts(m, t->name);
4309         if (t->next)
4310                 seq_putc(m, ' ');
4311         else
4312                 seq_putc(m, '\n');
4313
4314         return 0;
4315 }
4316
4317 static const struct seq_operations show_traces_seq_ops = {
4318         .start          = t_start,
4319         .next           = t_next,
4320         .stop           = t_stop,
4321         .show           = t_show,
4322 };
4323
4324 static int show_traces_open(struct inode *inode, struct file *file)
4325 {
4326         struct trace_array *tr = inode->i_private;
4327         struct seq_file *m;
4328         int ret;
4329
4330         if (tracing_disabled)
4331                 return -ENODEV;
4332
4333         ret = seq_open(file, &show_traces_seq_ops);
4334         if (ret)
4335                 return ret;
4336
4337         m = file->private_data;
4338         m->private = tr;
4339
4340         return 0;
4341 }
4342
4343 static ssize_t
4344 tracing_write_stub(struct file *filp, const char __user *ubuf,
4345                    size_t count, loff_t *ppos)
4346 {
4347         return count;
4348 }
4349
4350 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4351 {
4352         int ret;
4353
4354         if (file->f_mode & FMODE_READ)
4355                 ret = seq_lseek(file, offset, whence);
4356         else
4357                 file->f_pos = ret = 0;
4358
4359         return ret;
4360 }
4361
4362 static const struct file_operations tracing_fops = {
4363         .open           = tracing_open,
4364         .read           = seq_read,
4365         .write          = tracing_write_stub,
4366         .llseek         = tracing_lseek,
4367         .release        = tracing_release,
4368 };
4369
4370 static const struct file_operations show_traces_fops = {
4371         .open           = show_traces_open,
4372         .read           = seq_read,
4373         .release        = seq_release,
4374         .llseek         = seq_lseek,
4375 };
4376
4377 static ssize_t
4378 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4379                      size_t count, loff_t *ppos)
4380 {
4381         struct trace_array *tr = file_inode(filp)->i_private;
4382         char *mask_str;
4383         int len;
4384
4385         len = snprintf(NULL, 0, "%*pb\n",
4386                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4387         mask_str = kmalloc(len, GFP_KERNEL);
4388         if (!mask_str)
4389                 return -ENOMEM;
4390
4391         len = snprintf(mask_str, len, "%*pb\n",
4392                        cpumask_pr_args(tr->tracing_cpumask));
4393         if (len >= count) {
4394                 count = -EINVAL;
4395                 goto out_err;
4396         }
4397         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4398
4399 out_err:
4400         kfree(mask_str);
4401
4402         return count;
4403 }
4404
4405 static ssize_t
4406 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4407                       size_t count, loff_t *ppos)
4408 {
4409         struct trace_array *tr = file_inode(filp)->i_private;
4410         cpumask_var_t tracing_cpumask_new;
4411         int err, cpu;
4412
4413         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4414                 return -ENOMEM;
4415
4416         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4417         if (err)
4418                 goto err_unlock;
4419
4420         local_irq_disable();
4421         arch_spin_lock(&tr->max_lock);
4422         for_each_tracing_cpu(cpu) {
4423                 /*
4424                  * Increase/decrease the disabled counter if we are
4425                  * about to flip a bit in the cpumask:
4426                  */
4427                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4428                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4429                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4430                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4431                 }
4432                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4433                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4434                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4435                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4436                 }
4437         }
4438         arch_spin_unlock(&tr->max_lock);
4439         local_irq_enable();
4440
4441         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4442         free_cpumask_var(tracing_cpumask_new);
4443
4444         return count;
4445
4446 err_unlock:
4447         free_cpumask_var(tracing_cpumask_new);
4448
4449         return err;
4450 }
4451
4452 static const struct file_operations tracing_cpumask_fops = {
4453         .open           = tracing_open_generic_tr,
4454         .read           = tracing_cpumask_read,
4455         .write          = tracing_cpumask_write,
4456         .release        = tracing_release_generic_tr,
4457         .llseek         = generic_file_llseek,
4458 };
4459
4460 static int tracing_trace_options_show(struct seq_file *m, void *v)
4461 {
4462         struct tracer_opt *trace_opts;
4463         struct trace_array *tr = m->private;
4464         u32 tracer_flags;
4465         int i;
4466
4467         mutex_lock(&trace_types_lock);
4468         tracer_flags = tr->current_trace->flags->val;
4469         trace_opts = tr->current_trace->flags->opts;
4470
4471         for (i = 0; trace_options[i]; i++) {
4472                 if (tr->trace_flags & (1 << i))
4473                         seq_printf(m, "%s\n", trace_options[i]);
4474                 else
4475                         seq_printf(m, "no%s\n", trace_options[i]);
4476         }
4477
4478         for (i = 0; trace_opts[i].name; i++) {
4479                 if (tracer_flags & trace_opts[i].bit)
4480                         seq_printf(m, "%s\n", trace_opts[i].name);
4481                 else
4482                         seq_printf(m, "no%s\n", trace_opts[i].name);
4483         }
4484         mutex_unlock(&trace_types_lock);
4485
4486         return 0;
4487 }
4488
4489 static int __set_tracer_option(struct trace_array *tr,
4490                                struct tracer_flags *tracer_flags,
4491                                struct tracer_opt *opts, int neg)
4492 {
4493         struct tracer *trace = tracer_flags->trace;
4494         int ret;
4495
4496         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4497         if (ret)
4498                 return ret;
4499
4500         if (neg)
4501                 tracer_flags->val &= ~opts->bit;
4502         else
4503                 tracer_flags->val |= opts->bit;
4504         return 0;
4505 }
4506
4507 /* Try to assign a tracer specific option */
4508 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4509 {
4510         struct tracer *trace = tr->current_trace;
4511         struct tracer_flags *tracer_flags = trace->flags;
4512         struct tracer_opt *opts = NULL;
4513         int i;
4514
4515         for (i = 0; tracer_flags->opts[i].name; i++) {
4516                 opts = &tracer_flags->opts[i];
4517
4518                 if (strcmp(cmp, opts->name) == 0)
4519                         return __set_tracer_option(tr, trace->flags, opts, neg);
4520         }
4521
4522         return -EINVAL;
4523 }
4524
4525 /* Some tracers require overwrite to stay enabled */
4526 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4527 {
4528         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4529                 return -1;
4530
4531         return 0;
4532 }
4533
4534 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4535 {
4536         /* do nothing if flag is already set */
4537         if (!!(tr->trace_flags & mask) == !!enabled)
4538                 return 0;
4539
4540         /* Give the tracer a chance to approve the change */
4541         if (tr->current_trace->flag_changed)
4542                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4543                         return -EINVAL;
4544
4545         if (enabled)
4546                 tr->trace_flags |= mask;
4547         else
4548                 tr->trace_flags &= ~mask;
4549
4550         if (mask == TRACE_ITER_RECORD_CMD)
4551                 trace_event_enable_cmd_record(enabled);
4552
4553         if (mask == TRACE_ITER_RECORD_TGID) {
4554                 if (!tgid_map)
4555                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4556                                            sizeof(*tgid_map),
4557                                            GFP_KERNEL);
4558                 if (!tgid_map) {
4559                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4560                         return -ENOMEM;
4561                 }
4562
4563                 trace_event_enable_tgid_record(enabled);
4564         }
4565
4566         if (mask == TRACE_ITER_EVENT_FORK)
4567                 trace_event_follow_fork(tr, enabled);
4568
4569         if (mask == TRACE_ITER_FUNC_FORK)
4570                 ftrace_pid_follow_fork(tr, enabled);
4571
4572         if (mask == TRACE_ITER_OVERWRITE) {
4573                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4574 #ifdef CONFIG_TRACER_MAX_TRACE
4575                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4576 #endif
4577         }
4578
4579         if (mask == TRACE_ITER_PRINTK) {
4580                 trace_printk_start_stop_comm(enabled);
4581                 trace_printk_control(enabled);
4582         }
4583
4584         return 0;
4585 }
4586
4587 static int trace_set_options(struct trace_array *tr, char *option)
4588 {
4589         char *cmp;
4590         int neg = 0;
4591         int ret;
4592         size_t orig_len = strlen(option);
4593         int len;
4594
4595         cmp = strstrip(option);
4596
4597         len = str_has_prefix(cmp, "no");
4598         if (len)
4599                 neg = 1;
4600
4601         cmp += len;
4602
4603         mutex_lock(&trace_types_lock);
4604
4605         ret = match_string(trace_options, -1, cmp);
4606         /* If no option could be set, test the specific tracer options */
4607         if (ret < 0)
4608                 ret = set_tracer_option(tr, cmp, neg);
4609         else
4610                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4611
4612         mutex_unlock(&trace_types_lock);
4613
4614         /*
4615          * If the first trailing whitespace is replaced with '\0' by strstrip,
4616          * turn it back into a space.
4617          */
4618         if (orig_len > strlen(option))
4619                 option[strlen(option)] = ' ';
4620
4621         return ret;
4622 }
4623
4624 static void __init apply_trace_boot_options(void)
4625 {
4626         char *buf = trace_boot_options_buf;
4627         char *option;
4628
4629         while (true) {
4630                 option = strsep(&buf, ",");
4631
4632                 if (!option)
4633                         break;
4634
4635                 if (*option)
4636                         trace_set_options(&global_trace, option);
4637
4638                 /* Put back the comma to allow this to be called again */
4639                 if (buf)
4640                         *(buf - 1) = ',';
4641         }
4642 }
4643
4644 static ssize_t
4645 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4646                         size_t cnt, loff_t *ppos)
4647 {
4648         struct seq_file *m = filp->private_data;
4649         struct trace_array *tr = m->private;
4650         char buf[64];
4651         int ret;
4652
4653         if (cnt >= sizeof(buf))
4654                 return -EINVAL;
4655
4656         if (copy_from_user(buf, ubuf, cnt))
4657                 return -EFAULT;
4658
4659         buf[cnt] = 0;
4660
4661         ret = trace_set_options(tr, buf);
4662         if (ret < 0)
4663                 return ret;
4664
4665         *ppos += cnt;
4666
4667         return cnt;
4668 }
4669
4670 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4671 {
4672         struct trace_array *tr = inode->i_private;
4673         int ret;
4674
4675         if (tracing_disabled)
4676                 return -ENODEV;
4677
4678         if (trace_array_get(tr) < 0)
4679                 return -ENODEV;
4680
4681         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4682         if (ret < 0)
4683                 trace_array_put(tr);
4684
4685         return ret;
4686 }
4687
4688 static const struct file_operations tracing_iter_fops = {
4689         .open           = tracing_trace_options_open,
4690         .read           = seq_read,
4691         .llseek         = seq_lseek,
4692         .release        = tracing_single_release_tr,
4693         .write          = tracing_trace_options_write,
4694 };
4695
4696 static const char readme_msg[] =
4697         "tracing mini-HOWTO:\n\n"
4698         "# echo 0 > tracing_on : quick way to disable tracing\n"
4699         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4700         " Important files:\n"
4701         "  trace\t\t\t- The static contents of the buffer\n"
4702         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4703         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4704         "  current_tracer\t- function and latency tracers\n"
4705         "  available_tracers\t- list of configured tracers for current_tracer\n"
4706         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4707         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4708         "  trace_clock\t\t-change the clock used to order events\n"
4709         "       local:   Per cpu clock but may not be synced across CPUs\n"
4710         "      global:   Synced across CPUs but slows tracing down.\n"
4711         "     counter:   Not a clock, but just an increment\n"
4712         "      uptime:   Jiffy counter from time of boot\n"
4713         "        perf:   Same clock that perf events use\n"
4714 #ifdef CONFIG_X86_64
4715         "     x86-tsc:   TSC cycle counter\n"
4716 #endif
4717         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4718         "       delta:   Delta difference against a buffer-wide timestamp\n"
4719         "    absolute:   Absolute (standalone) timestamp\n"
4720         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4721         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4722         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4723         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4724         "\t\t\t  Remove sub-buffer with rmdir\n"
4725         "  trace_options\t\t- Set format or modify how tracing happens\n"
4726         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4727         "\t\t\t  option name\n"
4728         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4729 #ifdef CONFIG_DYNAMIC_FTRACE
4730         "\n  available_filter_functions - list of functions that can be filtered on\n"
4731         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4732         "\t\t\t  functions\n"
4733         "\t     accepts: func_full_name or glob-matching-pattern\n"
4734         "\t     modules: Can select a group via module\n"
4735         "\t      Format: :mod:<module-name>\n"
4736         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4737         "\t    triggers: a command to perform when function is hit\n"
4738         "\t      Format: <function>:<trigger>[:count]\n"
4739         "\t     trigger: traceon, traceoff\n"
4740         "\t\t      enable_event:<system>:<event>\n"
4741         "\t\t      disable_event:<system>:<event>\n"
4742 #ifdef CONFIG_STACKTRACE
4743         "\t\t      stacktrace\n"
4744 #endif
4745 #ifdef CONFIG_TRACER_SNAPSHOT
4746         "\t\t      snapshot\n"
4747 #endif
4748         "\t\t      dump\n"
4749         "\t\t      cpudump\n"
4750         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4751         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4752         "\t     The first one will disable tracing every time do_fault is hit\n"
4753         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4754         "\t       The first time do trap is hit and it disables tracing, the\n"
4755         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4756         "\t       the counter will not decrement. It only decrements when the\n"
4757         "\t       trigger did work\n"
4758         "\t     To remove trigger without count:\n"
4759         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4760         "\t     To remove trigger with a count:\n"
4761         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4762         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4763         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4764         "\t    modules: Can select a group via module command :mod:\n"
4765         "\t    Does not accept triggers\n"
4766 #endif /* CONFIG_DYNAMIC_FTRACE */
4767 #ifdef CONFIG_FUNCTION_TRACER
4768         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4769         "\t\t    (function)\n"
4770 #endif
4771 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4772         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4773         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4774         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4775 #endif
4776 #ifdef CONFIG_TRACER_SNAPSHOT
4777         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4778         "\t\t\t  snapshot buffer. Read the contents for more\n"
4779         "\t\t\t  information\n"
4780 #endif
4781 #ifdef CONFIG_STACK_TRACER
4782         "  stack_trace\t\t- Shows the max stack trace when active\n"
4783         "  stack_max_size\t- Shows current max stack size that was traced\n"
4784         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4785         "\t\t\t  new trace)\n"
4786 #ifdef CONFIG_DYNAMIC_FTRACE
4787         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4788         "\t\t\t  traces\n"
4789 #endif
4790 #endif /* CONFIG_STACK_TRACER */
4791 #ifdef CONFIG_DYNAMIC_EVENTS
4792         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4793         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4794 #endif
4795 #ifdef CONFIG_KPROBE_EVENTS
4796         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4797         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4798 #endif
4799 #ifdef CONFIG_UPROBE_EVENTS
4800         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4801         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4802 #endif
4803 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4804         "\t  accepts: event-definitions (one definition per line)\n"
4805         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4806         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4807 #ifdef CONFIG_HIST_TRIGGERS
4808         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4809 #endif
4810         "\t           -:[<group>/]<event>\n"
4811 #ifdef CONFIG_KPROBE_EVENTS
4812         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4813   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4814 #endif
4815 #ifdef CONFIG_UPROBE_EVENTS
4816   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4817 #endif
4818         "\t     args: <name>=fetcharg[:type]\n"
4819         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4820 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4821         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4822 #else
4823         "\t           $stack<index>, $stack, $retval, $comm\n"
4824 #endif
4825         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4826         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4827         "\t           <type>\\[<array-size>\\]\n"
4828 #ifdef CONFIG_HIST_TRIGGERS
4829         "\t    field: <stype> <name>;\n"
4830         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4831         "\t           [unsigned] char/int/long\n"
4832 #endif
4833 #endif
4834         "  events/\t\t- Directory containing all trace event subsystems:\n"
4835         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4836         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4837         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4838         "\t\t\t  events\n"
4839         "      filter\t\t- If set, only events passing filter are traced\n"
4840         "  events/<system>/<event>/\t- Directory containing control files for\n"
4841         "\t\t\t  <event>:\n"
4842         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4843         "      filter\t\t- If set, only events passing filter are traced\n"
4844         "      trigger\t\t- If set, a command to perform when event is hit\n"
4845         "\t    Format: <trigger>[:count][if <filter>]\n"
4846         "\t   trigger: traceon, traceoff\n"
4847         "\t            enable_event:<system>:<event>\n"
4848         "\t            disable_event:<system>:<event>\n"
4849 #ifdef CONFIG_HIST_TRIGGERS
4850         "\t            enable_hist:<system>:<event>\n"
4851         "\t            disable_hist:<system>:<event>\n"
4852 #endif
4853 #ifdef CONFIG_STACKTRACE
4854         "\t\t    stacktrace\n"
4855 #endif
4856 #ifdef CONFIG_TRACER_SNAPSHOT
4857         "\t\t    snapshot\n"
4858 #endif
4859 #ifdef CONFIG_HIST_TRIGGERS
4860         "\t\t    hist (see below)\n"
4861 #endif
4862         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4863         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4864         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4865         "\t                  events/block/block_unplug/trigger\n"
4866         "\t   The first disables tracing every time block_unplug is hit.\n"
4867         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4868         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4869         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4870         "\t   Like function triggers, the counter is only decremented if it\n"
4871         "\t    enabled or disabled tracing.\n"
4872         "\t   To remove a trigger without a count:\n"
4873         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4874         "\t   To remove a trigger with a count:\n"
4875         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4876         "\t   Filters can be ignored when removing a trigger.\n"
4877 #ifdef CONFIG_HIST_TRIGGERS
4878         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4879         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4880         "\t            [:values=<field1[,field2,...]>]\n"
4881         "\t            [:sort=<field1[,field2,...]>]\n"
4882         "\t            [:size=#entries]\n"
4883         "\t            [:pause][:continue][:clear]\n"
4884         "\t            [:name=histname1]\n"
4885         "\t            [:<handler>.<action>]\n"
4886         "\t            [if <filter>]\n\n"
4887         "\t    When a matching event is hit, an entry is added to a hash\n"
4888         "\t    table using the key(s) and value(s) named, and the value of a\n"
4889         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4890         "\t    correspond to fields in the event's format description.  Keys\n"
4891         "\t    can be any field, or the special string 'stacktrace'.\n"
4892         "\t    Compound keys consisting of up to two fields can be specified\n"
4893         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4894         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4895         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4896         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4897         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4898         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4899         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4900         "\t    its histogram data will be shared with other triggers of the\n"
4901         "\t    same name, and trigger hits will update this common data.\n\n"
4902         "\t    Reading the 'hist' file for the event will dump the hash\n"
4903         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4904         "\t    triggers attached to an event, there will be a table for each\n"
4905         "\t    trigger in the output.  The table displayed for a named\n"
4906         "\t    trigger will be the same as any other instance having the\n"
4907         "\t    same name.  The default format used to display a given field\n"
4908         "\t    can be modified by appending any of the following modifiers\n"
4909         "\t    to the field name, as applicable:\n\n"
4910         "\t            .hex        display a number as a hex value\n"
4911         "\t            .sym        display an address as a symbol\n"
4912         "\t            .sym-offset display an address as a symbol and offset\n"
4913         "\t            .execname   display a common_pid as a program name\n"
4914         "\t            .syscall    display a syscall id as a syscall name\n"
4915         "\t            .log2       display log2 value rather than raw number\n"
4916         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4917         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4918         "\t    trigger or to start a hist trigger but not log any events\n"
4919         "\t    until told to do so.  'continue' can be used to start or\n"
4920         "\t    restart a paused hist trigger.\n\n"
4921         "\t    The 'clear' parameter will clear the contents of a running\n"
4922         "\t    hist trigger and leave its current paused/active state\n"
4923         "\t    unchanged.\n\n"
4924         "\t    The enable_hist and disable_hist triggers can be used to\n"
4925         "\t    have one event conditionally start and stop another event's\n"
4926         "\t    already-attached hist trigger.  The syntax is analogous to\n"
4927         "\t    the enable_event and disable_event triggers.\n\n"
4928         "\t    Hist trigger handlers and actions are executed whenever a\n"
4929         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4930         "\t        <handler>.<action>\n\n"
4931         "\t    The available handlers are:\n\n"
4932         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4933         "\t        onmax(var)               - invoke if var exceeds current max\n"
4934         "\t        onchange(var)            - invoke action if var changes\n\n"
4935         "\t    The available actions are:\n\n"
4936         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4937         "\t        save(field,...)                      - save current event fields\n"
4938 #ifdef CONFIG_TRACER_SNAPSHOT
4939         "\t        snapshot()                           - snapshot the trace buffer\n"
4940 #endif
4941 #endif
4942 ;
4943
4944 static ssize_t
4945 tracing_readme_read(struct file *filp, char __user *ubuf,
4946                        size_t cnt, loff_t *ppos)
4947 {
4948         return simple_read_from_buffer(ubuf, cnt, ppos,
4949                                         readme_msg, strlen(readme_msg));
4950 }
4951
4952 static const struct file_operations tracing_readme_fops = {
4953         .open           = tracing_open_generic,
4954         .read           = tracing_readme_read,
4955         .llseek         = generic_file_llseek,
4956 };
4957
4958 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4959 {
4960         int *ptr = v;
4961
4962         if (*pos || m->count)
4963                 ptr++;
4964
4965         (*pos)++;
4966
4967         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4968                 if (trace_find_tgid(*ptr))
4969                         return ptr;
4970         }
4971
4972         return NULL;
4973 }
4974
4975 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4976 {
4977         void *v;
4978         loff_t l = 0;
4979
4980         if (!tgid_map)
4981                 return NULL;
4982
4983         v = &tgid_map[0];
4984         while (l <= *pos) {
4985                 v = saved_tgids_next(m, v, &l);
4986                 if (!v)
4987                         return NULL;
4988         }
4989
4990         return v;
4991 }
4992
4993 static void saved_tgids_stop(struct seq_file *m, void *v)
4994 {
4995 }
4996
4997 static int saved_tgids_show(struct seq_file *m, void *v)
4998 {
4999         int pid = (int *)v - tgid_map;
5000
5001         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5002         return 0;
5003 }
5004
5005 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5006         .start          = saved_tgids_start,
5007         .stop           = saved_tgids_stop,
5008         .next           = saved_tgids_next,
5009         .show           = saved_tgids_show,
5010 };
5011
5012 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5013 {
5014         if (tracing_disabled)
5015                 return -ENODEV;
5016
5017         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5018 }
5019
5020
5021 static const struct file_operations tracing_saved_tgids_fops = {
5022         .open           = tracing_saved_tgids_open,
5023         .read           = seq_read,
5024         .llseek         = seq_lseek,
5025         .release        = seq_release,
5026 };
5027
5028 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5029 {
5030         unsigned int *ptr = v;
5031
5032         if (*pos || m->count)
5033                 ptr++;
5034
5035         (*pos)++;
5036
5037         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5038              ptr++) {
5039                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5040                         continue;
5041
5042                 return ptr;
5043         }
5044
5045         return NULL;
5046 }
5047
5048 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5049 {
5050         void *v;
5051         loff_t l = 0;
5052
5053         preempt_disable();
5054         arch_spin_lock(&trace_cmdline_lock);
5055
5056         v = &savedcmd->map_cmdline_to_pid[0];
5057         while (l <= *pos) {
5058                 v = saved_cmdlines_next(m, v, &l);
5059                 if (!v)
5060                         return NULL;
5061         }
5062
5063         return v;
5064 }
5065
5066 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5067 {
5068         arch_spin_unlock(&trace_cmdline_lock);
5069         preempt_enable();
5070 }
5071
5072 static int saved_cmdlines_show(struct seq_file *m, void *v)
5073 {
5074         char buf[TASK_COMM_LEN];
5075         unsigned int *pid = v;
5076
5077         __trace_find_cmdline(*pid, buf);
5078         seq_printf(m, "%d %s\n", *pid, buf);
5079         return 0;
5080 }
5081
5082 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5083         .start          = saved_cmdlines_start,
5084         .next           = saved_cmdlines_next,
5085         .stop           = saved_cmdlines_stop,
5086         .show           = saved_cmdlines_show,
5087 };
5088
5089 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5090 {
5091         if (tracing_disabled)
5092                 return -ENODEV;
5093
5094         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5095 }
5096
5097 static const struct file_operations tracing_saved_cmdlines_fops = {
5098         .open           = tracing_saved_cmdlines_open,
5099         .read           = seq_read,
5100         .llseek         = seq_lseek,
5101         .release        = seq_release,
5102 };
5103
5104 static ssize_t
5105 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5106                                  size_t cnt, loff_t *ppos)
5107 {
5108         char buf[64];
5109         int r;
5110
5111         arch_spin_lock(&trace_cmdline_lock);
5112         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5113         arch_spin_unlock(&trace_cmdline_lock);
5114
5115         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5116 }
5117
5118 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5119 {
5120         kfree(s->saved_cmdlines);
5121         kfree(s->map_cmdline_to_pid);
5122         kfree(s);
5123 }
5124
5125 static int tracing_resize_saved_cmdlines(unsigned int val)
5126 {
5127         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5128
5129         s = kmalloc(sizeof(*s), GFP_KERNEL);
5130         if (!s)
5131                 return -ENOMEM;
5132
5133         if (allocate_cmdlines_buffer(val, s) < 0) {
5134                 kfree(s);
5135                 return -ENOMEM;
5136         }
5137
5138         arch_spin_lock(&trace_cmdline_lock);
5139         savedcmd_temp = savedcmd;
5140         savedcmd = s;
5141         arch_spin_unlock(&trace_cmdline_lock);
5142         free_saved_cmdlines_buffer(savedcmd_temp);
5143
5144         return 0;
5145 }
5146
5147 static ssize_t
5148 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5149                                   size_t cnt, loff_t *ppos)
5150 {
5151         unsigned long val;
5152         int ret;
5153
5154         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5155         if (ret)
5156                 return ret;
5157
5158         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5159         if (!val || val > PID_MAX_DEFAULT)
5160                 return -EINVAL;
5161
5162         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5163         if (ret < 0)
5164                 return ret;
5165
5166         *ppos += cnt;
5167
5168         return cnt;
5169 }
5170
5171 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5172         .open           = tracing_open_generic,
5173         .read           = tracing_saved_cmdlines_size_read,
5174         .write          = tracing_saved_cmdlines_size_write,
5175 };
5176
5177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5178 static union trace_eval_map_item *
5179 update_eval_map(union trace_eval_map_item *ptr)
5180 {
5181         if (!ptr->map.eval_string) {
5182                 if (ptr->tail.next) {
5183                         ptr = ptr->tail.next;
5184                         /* Set ptr to the next real item (skip head) */
5185                         ptr++;
5186                 } else
5187                         return NULL;
5188         }
5189         return ptr;
5190 }
5191
5192 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5193 {
5194         union trace_eval_map_item *ptr = v;
5195
5196         /*
5197          * Paranoid! If ptr points to end, we don't want to increment past it.
5198          * This really should never happen.
5199          */
5200         ptr = update_eval_map(ptr);
5201         if (WARN_ON_ONCE(!ptr))
5202                 return NULL;
5203
5204         ptr++;
5205
5206         (*pos)++;
5207
5208         ptr = update_eval_map(ptr);
5209
5210         return ptr;
5211 }
5212
5213 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5214 {
5215         union trace_eval_map_item *v;
5216         loff_t l = 0;
5217
5218         mutex_lock(&trace_eval_mutex);
5219
5220         v = trace_eval_maps;
5221         if (v)
5222                 v++;
5223
5224         while (v && l < *pos) {
5225                 v = eval_map_next(m, v, &l);
5226         }
5227
5228         return v;
5229 }
5230
5231 static void eval_map_stop(struct seq_file *m, void *v)
5232 {
5233         mutex_unlock(&trace_eval_mutex);
5234 }
5235
5236 static int eval_map_show(struct seq_file *m, void *v)
5237 {
5238         union trace_eval_map_item *ptr = v;
5239
5240         seq_printf(m, "%s %ld (%s)\n",
5241                    ptr->map.eval_string, ptr->map.eval_value,
5242                    ptr->map.system);
5243
5244         return 0;
5245 }
5246
5247 static const struct seq_operations tracing_eval_map_seq_ops = {
5248         .start          = eval_map_start,
5249         .next           = eval_map_next,
5250         .stop           = eval_map_stop,
5251         .show           = eval_map_show,
5252 };
5253
5254 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5255 {
5256         if (tracing_disabled)
5257                 return -ENODEV;
5258
5259         return seq_open(filp, &tracing_eval_map_seq_ops);
5260 }
5261
5262 static const struct file_operations tracing_eval_map_fops = {
5263         .open           = tracing_eval_map_open,
5264         .read           = seq_read,
5265         .llseek         = seq_lseek,
5266         .release        = seq_release,
5267 };
5268
5269 static inline union trace_eval_map_item *
5270 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5271 {
5272         /* Return tail of array given the head */
5273         return ptr + ptr->head.length + 1;
5274 }
5275
5276 static void
5277 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5278                            int len)
5279 {
5280         struct trace_eval_map **stop;
5281         struct trace_eval_map **map;
5282         union trace_eval_map_item *map_array;
5283         union trace_eval_map_item *ptr;
5284
5285         stop = start + len;
5286
5287         /*
5288          * The trace_eval_maps contains the map plus a head and tail item,
5289          * where the head holds the module and length of array, and the
5290          * tail holds a pointer to the next list.
5291          */
5292         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5293         if (!map_array) {
5294                 pr_warn("Unable to allocate trace eval mapping\n");
5295                 return;
5296         }
5297
5298         mutex_lock(&trace_eval_mutex);
5299
5300         if (!trace_eval_maps)
5301                 trace_eval_maps = map_array;
5302         else {
5303                 ptr = trace_eval_maps;
5304                 for (;;) {
5305                         ptr = trace_eval_jmp_to_tail(ptr);
5306                         if (!ptr->tail.next)
5307                                 break;
5308                         ptr = ptr->tail.next;
5309
5310                 }
5311                 ptr->tail.next = map_array;
5312         }
5313         map_array->head.mod = mod;
5314         map_array->head.length = len;
5315         map_array++;
5316
5317         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5318                 map_array->map = **map;
5319                 map_array++;
5320         }
5321         memset(map_array, 0, sizeof(*map_array));
5322
5323         mutex_unlock(&trace_eval_mutex);
5324 }
5325
5326 static void trace_create_eval_file(struct dentry *d_tracer)
5327 {
5328         trace_create_file("eval_map", 0444, d_tracer,
5329                           NULL, &tracing_eval_map_fops);
5330 }
5331
5332 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5333 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5334 static inline void trace_insert_eval_map_file(struct module *mod,
5335                               struct trace_eval_map **start, int len) { }
5336 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5337
5338 static void trace_insert_eval_map(struct module *mod,
5339                                   struct trace_eval_map **start, int len)
5340 {
5341         struct trace_eval_map **map;
5342
5343         if (len <= 0)
5344                 return;
5345
5346         map = start;
5347
5348         trace_event_eval_update(map, len);
5349
5350         trace_insert_eval_map_file(mod, start, len);
5351 }
5352
5353 static ssize_t
5354 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5355                        size_t cnt, loff_t *ppos)
5356 {
5357         struct trace_array *tr = filp->private_data;
5358         char buf[MAX_TRACER_SIZE+2];
5359         int r;
5360
5361         mutex_lock(&trace_types_lock);
5362         r = sprintf(buf, "%s\n", tr->current_trace->name);
5363         mutex_unlock(&trace_types_lock);
5364
5365         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5366 }
5367
5368 int tracer_init(struct tracer *t, struct trace_array *tr)
5369 {
5370         tracing_reset_online_cpus(&tr->trace_buffer);
5371         return t->init(tr);
5372 }
5373
5374 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5375 {
5376         int cpu;
5377
5378         for_each_tracing_cpu(cpu)
5379                 per_cpu_ptr(buf->data, cpu)->entries = val;
5380 }
5381
5382 #ifdef CONFIG_TRACER_MAX_TRACE
5383 /* resize @tr's buffer to the size of @size_tr's entries */
5384 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5385                                         struct trace_buffer *size_buf, int cpu_id)
5386 {
5387         int cpu, ret = 0;
5388
5389         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5390                 for_each_tracing_cpu(cpu) {
5391                         ret = ring_buffer_resize(trace_buf->buffer,
5392                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5393                         if (ret < 0)
5394                                 break;
5395                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5396                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5397                 }
5398         } else {
5399                 ret = ring_buffer_resize(trace_buf->buffer,
5400                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5401                 if (ret == 0)
5402                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5403                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5404         }
5405
5406         return ret;
5407 }
5408 #endif /* CONFIG_TRACER_MAX_TRACE */
5409
5410 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5411                                         unsigned long size, int cpu)
5412 {
5413         int ret;
5414
5415         /*
5416          * If kernel or user changes the size of the ring buffer
5417          * we use the size that was given, and we can forget about
5418          * expanding it later.
5419          */
5420         ring_buffer_expanded = true;
5421
5422         /* May be called before buffers are initialized */
5423         if (!tr->trace_buffer.buffer)
5424                 return 0;
5425
5426         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5427         if (ret < 0)
5428                 return ret;
5429
5430 #ifdef CONFIG_TRACER_MAX_TRACE
5431         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5432             !tr->current_trace->use_max_tr)
5433                 goto out;
5434
5435         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5436         if (ret < 0) {
5437                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5438                                                      &tr->trace_buffer, cpu);
5439                 if (r < 0) {
5440                         /*
5441                          * AARGH! We are left with different
5442                          * size max buffer!!!!
5443                          * The max buffer is our "snapshot" buffer.
5444                          * When a tracer needs a snapshot (one of the
5445                          * latency tracers), it swaps the max buffer
5446                          * with the saved snap shot. We succeeded to
5447                          * update the size of the main buffer, but failed to
5448                          * update the size of the max buffer. But when we tried
5449                          * to reset the main buffer to the original size, we
5450                          * failed there too. This is very unlikely to
5451                          * happen, but if it does, warn and kill all
5452                          * tracing.
5453                          */
5454                         WARN_ON(1);
5455                         tracing_disabled = 1;
5456                 }
5457                 return ret;
5458         }
5459
5460         if (cpu == RING_BUFFER_ALL_CPUS)
5461                 set_buffer_entries(&tr->max_buffer, size);
5462         else
5463                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5464
5465  out:
5466 #endif /* CONFIG_TRACER_MAX_TRACE */
5467
5468         if (cpu == RING_BUFFER_ALL_CPUS)
5469                 set_buffer_entries(&tr->trace_buffer, size);
5470         else
5471                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5472
5473         return ret;
5474 }
5475
5476 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5477                                           unsigned long size, int cpu_id)
5478 {
5479         int ret = size;
5480
5481         mutex_lock(&trace_types_lock);
5482
5483         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5484                 /* make sure, this cpu is enabled in the mask */
5485                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5486                         ret = -EINVAL;
5487                         goto out;
5488                 }
5489         }
5490
5491         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5492         if (ret < 0)
5493                 ret = -ENOMEM;
5494
5495 out:
5496         mutex_unlock(&trace_types_lock);
5497
5498         return ret;
5499 }
5500
5501
5502 /**
5503  * tracing_update_buffers - used by tracing facility to expand ring buffers
5504  *
5505  * To save on memory when the tracing is never used on a system with it
5506  * configured in. The ring buffers are set to a minimum size. But once
5507  * a user starts to use the tracing facility, then they need to grow
5508  * to their default size.
5509  *
5510  * This function is to be called when a tracer is about to be used.
5511  */
5512 int tracing_update_buffers(void)
5513 {
5514         int ret = 0;
5515
5516         mutex_lock(&trace_types_lock);
5517         if (!ring_buffer_expanded)
5518                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5519                                                 RING_BUFFER_ALL_CPUS);
5520         mutex_unlock(&trace_types_lock);
5521
5522         return ret;
5523 }
5524
5525 struct trace_option_dentry;
5526
5527 static void
5528 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5529
5530 /*
5531  * Used to clear out the tracer before deletion of an instance.
5532  * Must have trace_types_lock held.
5533  */
5534 static void tracing_set_nop(struct trace_array *tr)
5535 {
5536         if (tr->current_trace == &nop_trace)
5537                 return;
5538         
5539         tr->current_trace->enabled--;
5540
5541         if (tr->current_trace->reset)
5542                 tr->current_trace->reset(tr);
5543
5544         tr->current_trace = &nop_trace;
5545 }
5546
5547 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5548 {
5549         /* Only enable if the directory has been created already. */
5550         if (!tr->dir)
5551                 return;
5552
5553         create_trace_option_files(tr, t);
5554 }
5555
5556 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5557 {
5558         struct tracer *t;
5559 #ifdef CONFIG_TRACER_MAX_TRACE
5560         bool had_max_tr;
5561 #endif
5562         int ret = 0;
5563
5564         mutex_lock(&trace_types_lock);
5565
5566         if (!ring_buffer_expanded) {
5567                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5568                                                 RING_BUFFER_ALL_CPUS);
5569                 if (ret < 0)
5570                         goto out;
5571                 ret = 0;
5572         }
5573
5574         for (t = trace_types; t; t = t->next) {
5575                 if (strcmp(t->name, buf) == 0)
5576                         break;
5577         }
5578         if (!t) {
5579                 ret = -EINVAL;
5580                 goto out;
5581         }
5582         if (t == tr->current_trace)
5583                 goto out;
5584
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586         if (t->use_max_tr) {
5587                 arch_spin_lock(&tr->max_lock);
5588                 if (tr->cond_snapshot)
5589                         ret = -EBUSY;
5590                 arch_spin_unlock(&tr->max_lock);
5591                 if (ret)
5592                         goto out;
5593         }
5594 #endif
5595         /* Some tracers won't work on kernel command line */
5596         if (system_state < SYSTEM_RUNNING && t->noboot) {
5597                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5598                         t->name);
5599                 goto out;
5600         }
5601
5602         /* Some tracers are only allowed for the top level buffer */
5603         if (!trace_ok_for_array(t, tr)) {
5604                 ret = -EINVAL;
5605                 goto out;
5606         }
5607
5608         /* If trace pipe files are being read, we can't change the tracer */
5609         if (tr->current_trace->ref) {
5610                 ret = -EBUSY;
5611                 goto out;
5612         }
5613
5614         trace_branch_disable();
5615
5616         tr->current_trace->enabled--;
5617
5618         if (tr->current_trace->reset)
5619                 tr->current_trace->reset(tr);
5620
5621         /* Current trace needs to be nop_trace before synchronize_rcu */
5622         tr->current_trace = &nop_trace;
5623
5624 #ifdef CONFIG_TRACER_MAX_TRACE
5625         had_max_tr = tr->allocated_snapshot;
5626
5627         if (had_max_tr && !t->use_max_tr) {
5628                 /*
5629                  * We need to make sure that the update_max_tr sees that
5630                  * current_trace changed to nop_trace to keep it from
5631                  * swapping the buffers after we resize it.
5632                  * The update_max_tr is called from interrupts disabled
5633                  * so a synchronized_sched() is sufficient.
5634                  */
5635                 synchronize_rcu();
5636                 free_snapshot(tr);
5637         }
5638 #endif
5639
5640 #ifdef CONFIG_TRACER_MAX_TRACE
5641         if (t->use_max_tr && !had_max_tr) {
5642                 ret = tracing_alloc_snapshot_instance(tr);
5643                 if (ret < 0)
5644                         goto out;
5645         }
5646 #endif
5647
5648         if (t->init) {
5649                 ret = tracer_init(t, tr);
5650                 if (ret)
5651                         goto out;
5652         }
5653
5654         tr->current_trace = t;
5655         tr->current_trace->enabled++;
5656         trace_branch_enable(tr);
5657  out:
5658         mutex_unlock(&trace_types_lock);
5659
5660         return ret;
5661 }
5662
5663 static ssize_t
5664 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5665                         size_t cnt, loff_t *ppos)
5666 {
5667         struct trace_array *tr = filp->private_data;
5668         char buf[MAX_TRACER_SIZE+1];
5669         int i;
5670         size_t ret;
5671         int err;
5672
5673         ret = cnt;
5674
5675         if (cnt > MAX_TRACER_SIZE)
5676                 cnt = MAX_TRACER_SIZE;
5677
5678         if (copy_from_user(buf, ubuf, cnt))
5679                 return -EFAULT;
5680
5681         buf[cnt] = 0;
5682
5683         /* strip ending whitespace. */
5684         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5685                 buf[i] = 0;
5686
5687         err = tracing_set_tracer(tr, buf);
5688         if (err)
5689                 return err;
5690
5691         *ppos += ret;
5692
5693         return ret;
5694 }
5695
5696 static ssize_t
5697 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5698                    size_t cnt, loff_t *ppos)
5699 {
5700         char buf[64];
5701         int r;
5702
5703         r = snprintf(buf, sizeof(buf), "%ld\n",
5704                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5705         if (r > sizeof(buf))
5706                 r = sizeof(buf);
5707         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5708 }
5709
5710 static ssize_t
5711 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5712                     size_t cnt, loff_t *ppos)
5713 {
5714         unsigned long val;
5715         int ret;
5716
5717         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5718         if (ret)
5719                 return ret;
5720
5721         *ptr = val * 1000;
5722
5723         return cnt;
5724 }
5725
5726 static ssize_t
5727 tracing_thresh_read(struct file *filp, char __user *ubuf,
5728                     size_t cnt, loff_t *ppos)
5729 {
5730         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5731 }
5732
5733 static ssize_t
5734 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5735                      size_t cnt, loff_t *ppos)
5736 {
5737         struct trace_array *tr = filp->private_data;
5738         int ret;
5739
5740         mutex_lock(&trace_types_lock);
5741         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5742         if (ret < 0)
5743                 goto out;
5744
5745         if (tr->current_trace->update_thresh) {
5746                 ret = tr->current_trace->update_thresh(tr);
5747                 if (ret < 0)
5748                         goto out;
5749         }
5750
5751         ret = cnt;
5752 out:
5753         mutex_unlock(&trace_types_lock);
5754
5755         return ret;
5756 }
5757
5758 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5759
5760 static ssize_t
5761 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5762                      size_t cnt, loff_t *ppos)
5763 {
5764         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5765 }
5766
5767 static ssize_t
5768 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5769                       size_t cnt, loff_t *ppos)
5770 {
5771         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5772 }
5773
5774 #endif
5775
5776 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5777 {
5778         struct trace_array *tr = inode->i_private;
5779         struct trace_iterator *iter;
5780         int ret = 0;
5781
5782         if (tracing_disabled)
5783                 return -ENODEV;
5784
5785         if (trace_array_get(tr) < 0)
5786                 return -ENODEV;
5787
5788         mutex_lock(&trace_types_lock);
5789
5790         /* create a buffer to store the information to pass to userspace */
5791         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5792         if (!iter) {
5793                 ret = -ENOMEM;
5794                 __trace_array_put(tr);
5795                 goto out;
5796         }
5797
5798         trace_seq_init(&iter->seq);
5799         iter->trace = tr->current_trace;
5800
5801         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5802                 ret = -ENOMEM;
5803                 goto fail;
5804         }
5805
5806         /* trace pipe does not show start of buffer */
5807         cpumask_setall(iter->started);
5808
5809         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5810                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5811
5812         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5813         if (trace_clocks[tr->clock_id].in_ns)
5814                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5815
5816         iter->tr = tr;
5817         iter->trace_buffer = &tr->trace_buffer;
5818         iter->cpu_file = tracing_get_cpu(inode);
5819         mutex_init(&iter->mutex);
5820         filp->private_data = iter;
5821
5822         if (iter->trace->pipe_open)
5823                 iter->trace->pipe_open(iter);
5824
5825         nonseekable_open(inode, filp);
5826
5827         tr->current_trace->ref++;
5828 out:
5829         mutex_unlock(&trace_types_lock);
5830         return ret;
5831
5832 fail:
5833         kfree(iter);
5834         __trace_array_put(tr);
5835         mutex_unlock(&trace_types_lock);
5836         return ret;
5837 }
5838
5839 static int tracing_release_pipe(struct inode *inode, struct file *file)
5840 {
5841         struct trace_iterator *iter = file->private_data;
5842         struct trace_array *tr = inode->i_private;
5843
5844         mutex_lock(&trace_types_lock);
5845
5846         tr->current_trace->ref--;
5847
5848         if (iter->trace->pipe_close)
5849                 iter->trace->pipe_close(iter);
5850
5851         mutex_unlock(&trace_types_lock);
5852
5853         free_cpumask_var(iter->started);
5854         mutex_destroy(&iter->mutex);
5855         kfree(iter);
5856
5857         trace_array_put(tr);
5858
5859         return 0;
5860 }
5861
5862 static __poll_t
5863 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5864 {
5865         struct trace_array *tr = iter->tr;
5866
5867         /* Iterators are static, they should be filled or empty */
5868         if (trace_buffer_iter(iter, iter->cpu_file))
5869                 return EPOLLIN | EPOLLRDNORM;
5870
5871         if (tr->trace_flags & TRACE_ITER_BLOCK)
5872                 /*
5873                  * Always select as readable when in blocking mode
5874                  */
5875                 return EPOLLIN | EPOLLRDNORM;
5876         else
5877                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5878                                              filp, poll_table);
5879 }
5880
5881 static __poll_t
5882 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5883 {
5884         struct trace_iterator *iter = filp->private_data;
5885
5886         return trace_poll(iter, filp, poll_table);
5887 }
5888
5889 /* Must be called with iter->mutex held. */
5890 static int tracing_wait_pipe(struct file *filp)
5891 {
5892         struct trace_iterator *iter = filp->private_data;
5893         int ret;
5894
5895         while (trace_empty(iter)) {
5896
5897                 if ((filp->f_flags & O_NONBLOCK)) {
5898                         return -EAGAIN;
5899                 }
5900
5901                 /*
5902                  * We block until we read something and tracing is disabled.
5903                  * We still block if tracing is disabled, but we have never
5904                  * read anything. This allows a user to cat this file, and
5905                  * then enable tracing. But after we have read something,
5906                  * we give an EOF when tracing is again disabled.
5907                  *
5908                  * iter->pos will be 0 if we haven't read anything.
5909                  */
5910                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5911                         break;
5912
5913                 mutex_unlock(&iter->mutex);
5914
5915                 ret = wait_on_pipe(iter, 0);
5916
5917                 mutex_lock(&iter->mutex);
5918
5919                 if (ret)
5920                         return ret;
5921         }
5922
5923         return 1;
5924 }
5925
5926 /*
5927  * Consumer reader.
5928  */
5929 static ssize_t
5930 tracing_read_pipe(struct file *filp, char __user *ubuf,
5931                   size_t cnt, loff_t *ppos)
5932 {
5933         struct trace_iterator *iter = filp->private_data;
5934         ssize_t sret;
5935
5936         /*
5937          * Avoid more than one consumer on a single file descriptor
5938          * This is just a matter of traces coherency, the ring buffer itself
5939          * is protected.
5940          */
5941         mutex_lock(&iter->mutex);
5942
5943         /* return any leftover data */
5944         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5945         if (sret != -EBUSY)
5946                 goto out;
5947
5948         trace_seq_init(&iter->seq);
5949
5950         if (iter->trace->read) {
5951                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5952                 if (sret)
5953                         goto out;
5954         }
5955
5956 waitagain:
5957         sret = tracing_wait_pipe(filp);
5958         if (sret <= 0)
5959                 goto out;
5960
5961         /* stop when tracing is finished */
5962         if (trace_empty(iter)) {
5963                 sret = 0;
5964                 goto out;
5965         }
5966
5967         if (cnt >= PAGE_SIZE)
5968                 cnt = PAGE_SIZE - 1;
5969
5970         /* reset all but tr, trace, and overruns */
5971         memset(&iter->seq, 0,
5972                sizeof(struct trace_iterator) -
5973                offsetof(struct trace_iterator, seq));
5974         cpumask_clear(iter->started);
5975         iter->pos = -1;
5976
5977         trace_event_read_lock();
5978         trace_access_lock(iter->cpu_file);
5979         while (trace_find_next_entry_inc(iter) != NULL) {
5980                 enum print_line_t ret;
5981                 int save_len = iter->seq.seq.len;
5982
5983                 ret = print_trace_line(iter);
5984                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5985                         /* don't print partial lines */
5986                         iter->seq.seq.len = save_len;
5987                         break;
5988                 }
5989                 if (ret != TRACE_TYPE_NO_CONSUME)
5990                         trace_consume(iter);
5991
5992                 if (trace_seq_used(&iter->seq) >= cnt)
5993                         break;
5994
5995                 /*
5996                  * Setting the full flag means we reached the trace_seq buffer
5997                  * size and we should leave by partial output condition above.
5998                  * One of the trace_seq_* functions is not used properly.
5999                  */
6000                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6001                           iter->ent->type);
6002         }
6003         trace_access_unlock(iter->cpu_file);
6004         trace_event_read_unlock();
6005
6006         /* Now copy what we have to the user */
6007         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6008         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6009                 trace_seq_init(&iter->seq);
6010
6011         /*
6012          * If there was nothing to send to user, in spite of consuming trace
6013          * entries, go back to wait for more entries.
6014          */
6015         if (sret == -EBUSY)
6016                 goto waitagain;
6017
6018 out:
6019         mutex_unlock(&iter->mutex);
6020
6021         return sret;
6022 }
6023
6024 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6025                                      unsigned int idx)
6026 {
6027         __free_page(spd->pages[idx]);
6028 }
6029
6030 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6031         .confirm                = generic_pipe_buf_confirm,
6032         .release                = generic_pipe_buf_release,
6033         .steal                  = generic_pipe_buf_steal,
6034         .get                    = generic_pipe_buf_get,
6035 };
6036
6037 static size_t
6038 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6039 {
6040         size_t count;
6041         int save_len;
6042         int ret;
6043
6044         /* Seq buffer is page-sized, exactly what we need. */
6045         for (;;) {
6046                 save_len = iter->seq.seq.len;
6047                 ret = print_trace_line(iter);
6048
6049                 if (trace_seq_has_overflowed(&iter->seq)) {
6050                         iter->seq.seq.len = save_len;
6051                         break;
6052                 }
6053
6054                 /*
6055                  * This should not be hit, because it should only
6056                  * be set if the iter->seq overflowed. But check it
6057                  * anyway to be safe.
6058                  */
6059                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6060                         iter->seq.seq.len = save_len;
6061                         break;
6062                 }
6063
6064                 count = trace_seq_used(&iter->seq) - save_len;
6065                 if (rem < count) {
6066                         rem = 0;
6067                         iter->seq.seq.len = save_len;
6068                         break;
6069                 }
6070
6071                 if (ret != TRACE_TYPE_NO_CONSUME)
6072                         trace_consume(iter);
6073                 rem -= count;
6074                 if (!trace_find_next_entry_inc(iter))   {
6075                         rem = 0;
6076                         iter->ent = NULL;
6077                         break;
6078                 }
6079         }
6080
6081         return rem;
6082 }
6083
6084 static ssize_t tracing_splice_read_pipe(struct file *filp,
6085                                         loff_t *ppos,
6086                                         struct pipe_inode_info *pipe,
6087                                         size_t len,
6088                                         unsigned int flags)
6089 {
6090         struct page *pages_def[PIPE_DEF_BUFFERS];
6091         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6092         struct trace_iterator *iter = filp->private_data;
6093         struct splice_pipe_desc spd = {
6094                 .pages          = pages_def,
6095                 .partial        = partial_def,
6096                 .nr_pages       = 0, /* This gets updated below. */
6097                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6098                 .ops            = &tracing_pipe_buf_ops,
6099                 .spd_release    = tracing_spd_release_pipe,
6100         };
6101         ssize_t ret;
6102         size_t rem;
6103         unsigned int i;
6104
6105         if (splice_grow_spd(pipe, &spd))
6106                 return -ENOMEM;
6107
6108         mutex_lock(&iter->mutex);
6109
6110         if (iter->trace->splice_read) {
6111                 ret = iter->trace->splice_read(iter, filp,
6112                                                ppos, pipe, len, flags);
6113                 if (ret)
6114                         goto out_err;
6115         }
6116
6117         ret = tracing_wait_pipe(filp);
6118         if (ret <= 0)
6119                 goto out_err;
6120
6121         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6122                 ret = -EFAULT;
6123                 goto out_err;
6124         }
6125
6126         trace_event_read_lock();
6127         trace_access_lock(iter->cpu_file);
6128
6129         /* Fill as many pages as possible. */
6130         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6131                 spd.pages[i] = alloc_page(GFP_KERNEL);
6132                 if (!spd.pages[i])
6133                         break;
6134
6135                 rem = tracing_fill_pipe_page(rem, iter);
6136
6137                 /* Copy the data into the page, so we can start over. */
6138                 ret = trace_seq_to_buffer(&iter->seq,
6139                                           page_address(spd.pages[i]),
6140                                           trace_seq_used(&iter->seq));
6141                 if (ret < 0) {
6142                         __free_page(spd.pages[i]);
6143                         break;
6144                 }
6145                 spd.partial[i].offset = 0;
6146                 spd.partial[i].len = trace_seq_used(&iter->seq);
6147
6148                 trace_seq_init(&iter->seq);
6149         }
6150
6151         trace_access_unlock(iter->cpu_file);
6152         trace_event_read_unlock();
6153         mutex_unlock(&iter->mutex);
6154
6155         spd.nr_pages = i;
6156
6157         if (i)
6158                 ret = splice_to_pipe(pipe, &spd);
6159         else
6160                 ret = 0;
6161 out:
6162         splice_shrink_spd(&spd);
6163         return ret;
6164
6165 out_err:
6166         mutex_unlock(&iter->mutex);
6167         goto out;
6168 }
6169
6170 static ssize_t
6171 tracing_entries_read(struct file *filp, char __user *ubuf,
6172                      size_t cnt, loff_t *ppos)
6173 {
6174         struct inode *inode = file_inode(filp);
6175         struct trace_array *tr = inode->i_private;
6176         int cpu = tracing_get_cpu(inode);
6177         char buf[64];
6178         int r = 0;
6179         ssize_t ret;
6180
6181         mutex_lock(&trace_types_lock);
6182
6183         if (cpu == RING_BUFFER_ALL_CPUS) {
6184                 int cpu, buf_size_same;
6185                 unsigned long size;
6186
6187                 size = 0;
6188                 buf_size_same = 1;
6189                 /* check if all cpu sizes are same */
6190                 for_each_tracing_cpu(cpu) {
6191                         /* fill in the size from first enabled cpu */
6192                         if (size == 0)
6193                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6194                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6195                                 buf_size_same = 0;
6196                                 break;
6197                         }
6198                 }
6199
6200                 if (buf_size_same) {
6201                         if (!ring_buffer_expanded)
6202                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6203                                             size >> 10,
6204                                             trace_buf_size >> 10);
6205                         else
6206                                 r = sprintf(buf, "%lu\n", size >> 10);
6207                 } else
6208                         r = sprintf(buf, "X\n");
6209         } else
6210                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6211
6212         mutex_unlock(&trace_types_lock);
6213
6214         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6215         return ret;
6216 }
6217
6218 static ssize_t
6219 tracing_entries_write(struct file *filp, const char __user *ubuf,
6220                       size_t cnt, loff_t *ppos)
6221 {
6222         struct inode *inode = file_inode(filp);
6223         struct trace_array *tr = inode->i_private;
6224         unsigned long val;
6225         int ret;
6226
6227         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6228         if (ret)
6229                 return ret;
6230
6231         /* must have at least 1 entry */
6232         if (!val)
6233                 return -EINVAL;
6234
6235         /* value is in KB */
6236         val <<= 10;
6237         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6238         if (ret < 0)
6239                 return ret;
6240
6241         *ppos += cnt;
6242
6243         return cnt;
6244 }
6245
6246 static ssize_t
6247 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6248                                 size_t cnt, loff_t *ppos)
6249 {
6250         struct trace_array *tr = filp->private_data;
6251         char buf[64];
6252         int r, cpu;
6253         unsigned long size = 0, expanded_size = 0;
6254
6255         mutex_lock(&trace_types_lock);
6256         for_each_tracing_cpu(cpu) {
6257                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6258                 if (!ring_buffer_expanded)
6259                         expanded_size += trace_buf_size >> 10;
6260         }
6261         if (ring_buffer_expanded)
6262                 r = sprintf(buf, "%lu\n", size);
6263         else
6264                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6265         mutex_unlock(&trace_types_lock);
6266
6267         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6268 }
6269
6270 static ssize_t
6271 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6272                           size_t cnt, loff_t *ppos)
6273 {
6274         /*
6275          * There is no need to read what the user has written, this function
6276          * is just to make sure that there is no error when "echo" is used
6277          */
6278
6279         *ppos += cnt;
6280
6281         return cnt;
6282 }
6283
6284 static int
6285 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6286 {
6287         struct trace_array *tr = inode->i_private;
6288
6289         /* disable tracing ? */
6290         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6291                 tracer_tracing_off(tr);
6292         /* resize the ring buffer to 0 */
6293         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6294
6295         trace_array_put(tr);
6296
6297         return 0;
6298 }
6299
6300 static ssize_t
6301 tracing_mark_write(struct file *filp, const char __user *ubuf,
6302                                         size_t cnt, loff_t *fpos)
6303 {
6304         struct trace_array *tr = filp->private_data;
6305         struct ring_buffer_event *event;
6306         enum event_trigger_type tt = ETT_NONE;
6307         struct ring_buffer *buffer;
6308         struct print_entry *entry;
6309         unsigned long irq_flags;
6310         const char faulted[] = "<faulted>";
6311         ssize_t written;
6312         int size;
6313         int len;
6314
6315 /* Used in tracing_mark_raw_write() as well */
6316 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6317
6318         if (tracing_disabled)
6319                 return -EINVAL;
6320
6321         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6322                 return -EINVAL;
6323
6324         if (cnt > TRACE_BUF_SIZE)
6325                 cnt = TRACE_BUF_SIZE;
6326
6327         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6328
6329         local_save_flags(irq_flags);
6330         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6331
6332         /* If less than "<faulted>", then make sure we can still add that */
6333         if (cnt < FAULTED_SIZE)
6334                 size += FAULTED_SIZE - cnt;
6335
6336         buffer = tr->trace_buffer.buffer;
6337         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6338                                             irq_flags, preempt_count());
6339         if (unlikely(!event))
6340                 /* Ring buffer disabled, return as if not open for write */
6341                 return -EBADF;
6342
6343         entry = ring_buffer_event_data(event);
6344         entry->ip = _THIS_IP_;
6345
6346         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6347         if (len) {
6348                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6349                 cnt = FAULTED_SIZE;
6350                 written = -EFAULT;
6351         } else
6352                 written = cnt;
6353         len = cnt;
6354
6355         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6356                 /* do not add \n before testing triggers, but add \0 */
6357                 entry->buf[cnt] = '\0';
6358                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6359         }
6360
6361         if (entry->buf[cnt - 1] != '\n') {
6362                 entry->buf[cnt] = '\n';
6363                 entry->buf[cnt + 1] = '\0';
6364         } else
6365                 entry->buf[cnt] = '\0';
6366
6367         __buffer_unlock_commit(buffer, event);
6368
6369         if (tt)
6370                 event_triggers_post_call(tr->trace_marker_file, tt);
6371
6372         if (written > 0)
6373                 *fpos += written;
6374
6375         return written;
6376 }
6377
6378 /* Limit it for now to 3K (including tag) */
6379 #define RAW_DATA_MAX_SIZE (1024*3)
6380
6381 static ssize_t
6382 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6383                                         size_t cnt, loff_t *fpos)
6384 {
6385         struct trace_array *tr = filp->private_data;
6386         struct ring_buffer_event *event;
6387         struct ring_buffer *buffer;
6388         struct raw_data_entry *entry;
6389         const char faulted[] = "<faulted>";
6390         unsigned long irq_flags;
6391         ssize_t written;
6392         int size;
6393         int len;
6394
6395 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6396
6397         if (tracing_disabled)
6398                 return -EINVAL;
6399
6400         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6401                 return -EINVAL;
6402
6403         /* The marker must at least have a tag id */
6404         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6405                 return -EINVAL;
6406
6407         if (cnt > TRACE_BUF_SIZE)
6408                 cnt = TRACE_BUF_SIZE;
6409
6410         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6411
6412         local_save_flags(irq_flags);
6413         size = sizeof(*entry) + cnt;
6414         if (cnt < FAULT_SIZE_ID)
6415                 size += FAULT_SIZE_ID - cnt;
6416
6417         buffer = tr->trace_buffer.buffer;
6418         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6419                                             irq_flags, preempt_count());
6420         if (!event)
6421                 /* Ring buffer disabled, return as if not open for write */
6422                 return -EBADF;
6423
6424         entry = ring_buffer_event_data(event);
6425
6426         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6427         if (len) {
6428                 entry->id = -1;
6429                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6430                 written = -EFAULT;
6431         } else
6432                 written = cnt;
6433
6434         __buffer_unlock_commit(buffer, event);
6435
6436         if (written > 0)
6437                 *fpos += written;
6438
6439         return written;
6440 }
6441
6442 static int tracing_clock_show(struct seq_file *m, void *v)
6443 {
6444         struct trace_array *tr = m->private;
6445         int i;
6446
6447         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6448                 seq_printf(m,
6449                         "%s%s%s%s", i ? " " : "",
6450                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6451                         i == tr->clock_id ? "]" : "");
6452         seq_putc(m, '\n');
6453
6454         return 0;
6455 }
6456
6457 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6458 {
6459         int i;
6460
6461         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6462                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6463                         break;
6464         }
6465         if (i == ARRAY_SIZE(trace_clocks))
6466                 return -EINVAL;
6467
6468         mutex_lock(&trace_types_lock);
6469
6470         tr->clock_id = i;
6471
6472         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6473
6474         /*
6475          * New clock may not be consistent with the previous clock.
6476          * Reset the buffer so that it doesn't have incomparable timestamps.
6477          */
6478         tracing_reset_online_cpus(&tr->trace_buffer);
6479
6480 #ifdef CONFIG_TRACER_MAX_TRACE
6481         if (tr->max_buffer.buffer)
6482                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6483         tracing_reset_online_cpus(&tr->max_buffer);
6484 #endif
6485
6486         mutex_unlock(&trace_types_lock);
6487
6488         return 0;
6489 }
6490
6491 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6492                                    size_t cnt, loff_t *fpos)
6493 {
6494         struct seq_file *m = filp->private_data;
6495         struct trace_array *tr = m->private;
6496         char buf[64];
6497         const char *clockstr;
6498         int ret;
6499
6500         if (cnt >= sizeof(buf))
6501                 return -EINVAL;
6502
6503         if (copy_from_user(buf, ubuf, cnt))
6504                 return -EFAULT;
6505
6506         buf[cnt] = 0;
6507
6508         clockstr = strstrip(buf);
6509
6510         ret = tracing_set_clock(tr, clockstr);
6511         if (ret)
6512                 return ret;
6513
6514         *fpos += cnt;
6515
6516         return cnt;
6517 }
6518
6519 static int tracing_clock_open(struct inode *inode, struct file *file)
6520 {
6521         struct trace_array *tr = inode->i_private;
6522         int ret;
6523
6524         if (tracing_disabled)
6525                 return -ENODEV;
6526
6527         if (trace_array_get(tr))
6528                 return -ENODEV;
6529
6530         ret = single_open(file, tracing_clock_show, inode->i_private);
6531         if (ret < 0)
6532                 trace_array_put(tr);
6533
6534         return ret;
6535 }
6536
6537 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6538 {
6539         struct trace_array *tr = m->private;
6540
6541         mutex_lock(&trace_types_lock);
6542
6543         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6544                 seq_puts(m, "delta [absolute]\n");
6545         else
6546                 seq_puts(m, "[delta] absolute\n");
6547
6548         mutex_unlock(&trace_types_lock);
6549
6550         return 0;
6551 }
6552
6553 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6554 {
6555         struct trace_array *tr = inode->i_private;
6556         int ret;
6557
6558         if (tracing_disabled)
6559                 return -ENODEV;
6560
6561         if (trace_array_get(tr))
6562                 return -ENODEV;
6563
6564         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6565         if (ret < 0)
6566                 trace_array_put(tr);
6567
6568         return ret;
6569 }
6570
6571 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6572 {
6573         int ret = 0;
6574
6575         mutex_lock(&trace_types_lock);
6576
6577         if (abs && tr->time_stamp_abs_ref++)
6578                 goto out;
6579
6580         if (!abs) {
6581                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6582                         ret = -EINVAL;
6583                         goto out;
6584                 }
6585
6586                 if (--tr->time_stamp_abs_ref)
6587                         goto out;
6588         }
6589
6590         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6591
6592 #ifdef CONFIG_TRACER_MAX_TRACE
6593         if (tr->max_buffer.buffer)
6594                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6595 #endif
6596  out:
6597         mutex_unlock(&trace_types_lock);
6598
6599         return ret;
6600 }
6601
6602 struct ftrace_buffer_info {
6603         struct trace_iterator   iter;
6604         void                    *spare;
6605         unsigned int            spare_cpu;
6606         unsigned int            read;
6607 };
6608
6609 #ifdef CONFIG_TRACER_SNAPSHOT
6610 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6611 {
6612         struct trace_array *tr = inode->i_private;
6613         struct trace_iterator *iter;
6614         struct seq_file *m;
6615         int ret = 0;
6616
6617         if (trace_array_get(tr) < 0)
6618                 return -ENODEV;
6619
6620         if (file->f_mode & FMODE_READ) {
6621                 iter = __tracing_open(inode, file, true);
6622                 if (IS_ERR(iter))
6623                         ret = PTR_ERR(iter);
6624         } else {
6625                 /* Writes still need the seq_file to hold the private data */
6626                 ret = -ENOMEM;
6627                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6628                 if (!m)
6629                         goto out;
6630                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6631                 if (!iter) {
6632                         kfree(m);
6633                         goto out;
6634                 }
6635                 ret = 0;
6636
6637                 iter->tr = tr;
6638                 iter->trace_buffer = &tr->max_buffer;
6639                 iter->cpu_file = tracing_get_cpu(inode);
6640                 m->private = iter;
6641                 file->private_data = m;
6642         }
6643 out:
6644         if (ret < 0)
6645                 trace_array_put(tr);
6646
6647         return ret;
6648 }
6649
6650 static ssize_t
6651 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6652                        loff_t *ppos)
6653 {
6654         struct seq_file *m = filp->private_data;
6655         struct trace_iterator *iter = m->private;
6656         struct trace_array *tr = iter->tr;
6657         unsigned long val;
6658         int ret;
6659
6660         ret = tracing_update_buffers();
6661         if (ret < 0)
6662                 return ret;
6663
6664         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6665         if (ret)
6666                 return ret;
6667
6668         mutex_lock(&trace_types_lock);
6669
6670         if (tr->current_trace->use_max_tr) {
6671                 ret = -EBUSY;
6672                 goto out;
6673         }
6674
6675         arch_spin_lock(&tr->max_lock);
6676         if (tr->cond_snapshot)
6677                 ret = -EBUSY;
6678         arch_spin_unlock(&tr->max_lock);
6679         if (ret)
6680                 goto out;
6681
6682         switch (val) {
6683         case 0:
6684                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6685                         ret = -EINVAL;
6686                         break;
6687                 }
6688                 if (tr->allocated_snapshot)
6689                         free_snapshot(tr);
6690                 break;
6691         case 1:
6692 /* Only allow per-cpu swap if the ring buffer supports it */
6693 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6694                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6695                         ret = -EINVAL;
6696                         break;
6697                 }
6698 #endif
6699                 if (!tr->allocated_snapshot) {
6700                         ret = tracing_alloc_snapshot_instance(tr);
6701                         if (ret < 0)
6702                                 break;
6703                 }
6704                 local_irq_disable();
6705                 /* Now, we're going to swap */
6706                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6707                         update_max_tr(tr, current, smp_processor_id(), NULL);
6708                 else
6709                         update_max_tr_single(tr, current, iter->cpu_file);
6710                 local_irq_enable();
6711                 break;
6712         default:
6713                 if (tr->allocated_snapshot) {
6714                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6715                                 tracing_reset_online_cpus(&tr->max_buffer);
6716                         else
6717                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6718                 }
6719                 break;
6720         }
6721
6722         if (ret >= 0) {
6723                 *ppos += cnt;
6724                 ret = cnt;
6725         }
6726 out:
6727         mutex_unlock(&trace_types_lock);
6728         return ret;
6729 }
6730
6731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6732 {
6733         struct seq_file *m = file->private_data;
6734         int ret;
6735
6736         ret = tracing_release(inode, file);
6737
6738         if (file->f_mode & FMODE_READ)
6739                 return ret;
6740
6741         /* If write only, the seq_file is just a stub */
6742         if (m)
6743                 kfree(m->private);
6744         kfree(m);
6745
6746         return 0;
6747 }
6748
6749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6751                                     size_t count, loff_t *ppos);
6752 static int tracing_buffers_release(struct inode *inode, struct file *file);
6753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6754                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6755
6756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6757 {
6758         struct ftrace_buffer_info *info;
6759         int ret;
6760
6761         ret = tracing_buffers_open(inode, filp);
6762         if (ret < 0)
6763                 return ret;
6764
6765         info = filp->private_data;
6766
6767         if (info->iter.trace->use_max_tr) {
6768                 tracing_buffers_release(inode, filp);
6769                 return -EBUSY;
6770         }
6771
6772         info->iter.snapshot = true;
6773         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6774
6775         return ret;
6776 }
6777
6778 #endif /* CONFIG_TRACER_SNAPSHOT */
6779
6780
6781 static const struct file_operations tracing_thresh_fops = {
6782         .open           = tracing_open_generic,
6783         .read           = tracing_thresh_read,
6784         .write          = tracing_thresh_write,
6785         .llseek         = generic_file_llseek,
6786 };
6787
6788 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6789 static const struct file_operations tracing_max_lat_fops = {
6790         .open           = tracing_open_generic,
6791         .read           = tracing_max_lat_read,
6792         .write          = tracing_max_lat_write,
6793         .llseek         = generic_file_llseek,
6794 };
6795 #endif
6796
6797 static const struct file_operations set_tracer_fops = {
6798         .open           = tracing_open_generic,
6799         .read           = tracing_set_trace_read,
6800         .write          = tracing_set_trace_write,
6801         .llseek         = generic_file_llseek,
6802 };
6803
6804 static const struct file_operations tracing_pipe_fops = {
6805         .open           = tracing_open_pipe,
6806         .poll           = tracing_poll_pipe,
6807         .read           = tracing_read_pipe,
6808         .splice_read    = tracing_splice_read_pipe,
6809         .release        = tracing_release_pipe,
6810         .llseek         = no_llseek,
6811 };
6812
6813 static const struct file_operations tracing_entries_fops = {
6814         .open           = tracing_open_generic_tr,
6815         .read           = tracing_entries_read,
6816         .write          = tracing_entries_write,
6817         .llseek         = generic_file_llseek,
6818         .release        = tracing_release_generic_tr,
6819 };
6820
6821 static const struct file_operations tracing_total_entries_fops = {
6822         .open           = tracing_open_generic_tr,
6823         .read           = tracing_total_entries_read,
6824         .llseek         = generic_file_llseek,
6825         .release        = tracing_release_generic_tr,
6826 };
6827
6828 static const struct file_operations tracing_free_buffer_fops = {
6829         .open           = tracing_open_generic_tr,
6830         .write          = tracing_free_buffer_write,
6831         .release        = tracing_free_buffer_release,
6832 };
6833
6834 static const struct file_operations tracing_mark_fops = {
6835         .open           = tracing_open_generic_tr,
6836         .write          = tracing_mark_write,
6837         .llseek         = generic_file_llseek,
6838         .release        = tracing_release_generic_tr,
6839 };
6840
6841 static const struct file_operations tracing_mark_raw_fops = {
6842         .open           = tracing_open_generic_tr,
6843         .write          = tracing_mark_raw_write,
6844         .llseek         = generic_file_llseek,
6845         .release        = tracing_release_generic_tr,
6846 };
6847
6848 static const struct file_operations trace_clock_fops = {
6849         .open           = tracing_clock_open,
6850         .read           = seq_read,
6851         .llseek         = seq_lseek,
6852         .release        = tracing_single_release_tr,
6853         .write          = tracing_clock_write,
6854 };
6855
6856 static const struct file_operations trace_time_stamp_mode_fops = {
6857         .open           = tracing_time_stamp_mode_open,
6858         .read           = seq_read,
6859         .llseek         = seq_lseek,
6860         .release        = tracing_single_release_tr,
6861 };
6862
6863 #ifdef CONFIG_TRACER_SNAPSHOT
6864 static const struct file_operations snapshot_fops = {
6865         .open           = tracing_snapshot_open,
6866         .read           = seq_read,
6867         .write          = tracing_snapshot_write,
6868         .llseek         = tracing_lseek,
6869         .release        = tracing_snapshot_release,
6870 };
6871
6872 static const struct file_operations snapshot_raw_fops = {
6873         .open           = snapshot_raw_open,
6874         .read           = tracing_buffers_read,
6875         .release        = tracing_buffers_release,
6876         .splice_read    = tracing_buffers_splice_read,
6877         .llseek         = no_llseek,
6878 };
6879
6880 #endif /* CONFIG_TRACER_SNAPSHOT */
6881
6882 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6883 {
6884         struct trace_array *tr = inode->i_private;
6885         struct ftrace_buffer_info *info;
6886         int ret;
6887
6888         if (tracing_disabled)
6889                 return -ENODEV;
6890
6891         if (trace_array_get(tr) < 0)
6892                 return -ENODEV;
6893
6894         info = kzalloc(sizeof(*info), GFP_KERNEL);
6895         if (!info) {
6896                 trace_array_put(tr);
6897                 return -ENOMEM;
6898         }
6899
6900         mutex_lock(&trace_types_lock);
6901
6902         info->iter.tr           = tr;
6903         info->iter.cpu_file     = tracing_get_cpu(inode);
6904         info->iter.trace        = tr->current_trace;
6905         info->iter.trace_buffer = &tr->trace_buffer;
6906         info->spare             = NULL;
6907         /* Force reading ring buffer for first read */
6908         info->read              = (unsigned int)-1;
6909
6910         filp->private_data = info;
6911
6912         tr->current_trace->ref++;
6913
6914         mutex_unlock(&trace_types_lock);
6915
6916         ret = nonseekable_open(inode, filp);
6917         if (ret < 0)
6918                 trace_array_put(tr);
6919
6920         return ret;
6921 }
6922
6923 static __poll_t
6924 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6925 {
6926         struct ftrace_buffer_info *info = filp->private_data;
6927         struct trace_iterator *iter = &info->iter;
6928
6929         return trace_poll(iter, filp, poll_table);
6930 }
6931
6932 static ssize_t
6933 tracing_buffers_read(struct file *filp, char __user *ubuf,
6934                      size_t count, loff_t *ppos)
6935 {
6936         struct ftrace_buffer_info *info = filp->private_data;
6937         struct trace_iterator *iter = &info->iter;
6938         ssize_t ret = 0;
6939         ssize_t size;
6940
6941         if (!count)
6942                 return 0;
6943
6944 #ifdef CONFIG_TRACER_MAX_TRACE
6945         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6946                 return -EBUSY;
6947 #endif
6948
6949         if (!info->spare) {
6950                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6951                                                           iter->cpu_file);
6952                 if (IS_ERR(info->spare)) {
6953                         ret = PTR_ERR(info->spare);
6954                         info->spare = NULL;
6955                 } else {
6956                         info->spare_cpu = iter->cpu_file;
6957                 }
6958         }
6959         if (!info->spare)
6960                 return ret;
6961
6962         /* Do we have previous read data to read? */
6963         if (info->read < PAGE_SIZE)
6964                 goto read;
6965
6966  again:
6967         trace_access_lock(iter->cpu_file);
6968         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6969                                     &info->spare,
6970                                     count,
6971                                     iter->cpu_file, 0);
6972         trace_access_unlock(iter->cpu_file);
6973
6974         if (ret < 0) {
6975                 if (trace_empty(iter)) {
6976                         if ((filp->f_flags & O_NONBLOCK))
6977                                 return -EAGAIN;
6978
6979                         ret = wait_on_pipe(iter, 0);
6980                         if (ret)
6981                                 return ret;
6982
6983                         goto again;
6984                 }
6985                 return 0;
6986         }
6987
6988         info->read = 0;
6989  read:
6990         size = PAGE_SIZE - info->read;
6991         if (size > count)
6992                 size = count;
6993
6994         ret = copy_to_user(ubuf, info->spare + info->read, size);
6995         if (ret == size)
6996                 return -EFAULT;
6997
6998         size -= ret;
6999
7000         *ppos += size;
7001         info->read += size;
7002
7003         return size;
7004 }
7005
7006 static int tracing_buffers_release(struct inode *inode, struct file *file)
7007 {
7008         struct ftrace_buffer_info *info = file->private_data;
7009         struct trace_iterator *iter = &info->iter;
7010
7011         mutex_lock(&trace_types_lock);
7012
7013         iter->tr->current_trace->ref--;
7014
7015         __trace_array_put(iter->tr);
7016
7017         if (info->spare)
7018                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7019                                            info->spare_cpu, info->spare);
7020         kfree(info);
7021
7022         mutex_unlock(&trace_types_lock);
7023
7024         return 0;
7025 }
7026
7027 struct buffer_ref {
7028         struct ring_buffer      *buffer;
7029         void                    *page;
7030         int                     cpu;
7031         refcount_t              refcount;
7032 };
7033
7034 static void buffer_ref_release(struct buffer_ref *ref)
7035 {
7036         if (!refcount_dec_and_test(&ref->refcount))
7037                 return;
7038         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7039         kfree(ref);
7040 }
7041
7042 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7043                                     struct pipe_buffer *buf)
7044 {
7045         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7046
7047         buffer_ref_release(ref);
7048         buf->private = 0;
7049 }
7050
7051 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7052                                 struct pipe_buffer *buf)
7053 {
7054         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7055
7056         if (refcount_read(&ref->refcount) > INT_MAX/2)
7057                 return false;
7058
7059         refcount_inc(&ref->refcount);
7060         return true;
7061 }
7062
7063 /* Pipe buffer operations for a buffer. */
7064 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7065         .confirm                = generic_pipe_buf_confirm,
7066         .release                = buffer_pipe_buf_release,
7067         .steal                  = generic_pipe_buf_nosteal,
7068         .get                    = buffer_pipe_buf_get,
7069 };
7070
7071 /*
7072  * Callback from splice_to_pipe(), if we need to release some pages
7073  * at the end of the spd in case we error'ed out in filling the pipe.
7074  */
7075 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7076 {
7077         struct buffer_ref *ref =
7078                 (struct buffer_ref *)spd->partial[i].private;
7079
7080         buffer_ref_release(ref);
7081         spd->partial[i].private = 0;
7082 }
7083
7084 static ssize_t
7085 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7086                             struct pipe_inode_info *pipe, size_t len,
7087                             unsigned int flags)
7088 {
7089         struct ftrace_buffer_info *info = file->private_data;
7090         struct trace_iterator *iter = &info->iter;
7091         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7092         struct page *pages_def[PIPE_DEF_BUFFERS];
7093         struct splice_pipe_desc spd = {
7094                 .pages          = pages_def,
7095                 .partial        = partial_def,
7096                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7097                 .ops            = &buffer_pipe_buf_ops,
7098                 .spd_release    = buffer_spd_release,
7099         };
7100         struct buffer_ref *ref;
7101         int entries, i;
7102         ssize_t ret = 0;
7103
7104 #ifdef CONFIG_TRACER_MAX_TRACE
7105         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7106                 return -EBUSY;
7107 #endif
7108
7109         if (*ppos & (PAGE_SIZE - 1))
7110                 return -EINVAL;
7111
7112         if (len & (PAGE_SIZE - 1)) {
7113                 if (len < PAGE_SIZE)
7114                         return -EINVAL;
7115                 len &= PAGE_MASK;
7116         }
7117
7118         if (splice_grow_spd(pipe, &spd))
7119                 return -ENOMEM;
7120
7121  again:
7122         trace_access_lock(iter->cpu_file);
7123         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7124
7125         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7126                 struct page *page;
7127                 int r;
7128
7129                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7130                 if (!ref) {
7131                         ret = -ENOMEM;
7132                         break;
7133                 }
7134
7135                 refcount_set(&ref->refcount, 1);
7136                 ref->buffer = iter->trace_buffer->buffer;
7137                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7138                 if (IS_ERR(ref->page)) {
7139                         ret = PTR_ERR(ref->page);
7140                         ref->page = NULL;
7141                         kfree(ref);
7142                         break;
7143                 }
7144                 ref->cpu = iter->cpu_file;
7145
7146                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7147                                           len, iter->cpu_file, 1);
7148                 if (r < 0) {
7149                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7150                                                    ref->page);
7151                         kfree(ref);
7152                         break;
7153                 }
7154
7155                 page = virt_to_page(ref->page);
7156
7157                 spd.pages[i] = page;
7158                 spd.partial[i].len = PAGE_SIZE;
7159                 spd.partial[i].offset = 0;
7160                 spd.partial[i].private = (unsigned long)ref;
7161                 spd.nr_pages++;
7162                 *ppos += PAGE_SIZE;
7163
7164                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7165         }
7166
7167         trace_access_unlock(iter->cpu_file);
7168         spd.nr_pages = i;
7169
7170         /* did we read anything? */
7171         if (!spd.nr_pages) {
7172                 if (ret)
7173                         goto out;
7174
7175                 ret = -EAGAIN;
7176                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7177                         goto out;
7178
7179                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7180                 if (ret)
7181                         goto out;
7182
7183                 goto again;
7184         }
7185
7186         ret = splice_to_pipe(pipe, &spd);
7187 out:
7188         splice_shrink_spd(&spd);
7189
7190         return ret;
7191 }
7192
7193 static const struct file_operations tracing_buffers_fops = {
7194         .open           = tracing_buffers_open,
7195         .read           = tracing_buffers_read,
7196         .poll           = tracing_buffers_poll,
7197         .release        = tracing_buffers_release,
7198         .splice_read    = tracing_buffers_splice_read,
7199         .llseek         = no_llseek,
7200 };
7201
7202 static ssize_t
7203 tracing_stats_read(struct file *filp, char __user *ubuf,
7204                    size_t count, loff_t *ppos)
7205 {
7206         struct inode *inode = file_inode(filp);
7207         struct trace_array *tr = inode->i_private;
7208         struct trace_buffer *trace_buf = &tr->trace_buffer;
7209         int cpu = tracing_get_cpu(inode);
7210         struct trace_seq *s;
7211         unsigned long cnt;
7212         unsigned long long t;
7213         unsigned long usec_rem;
7214
7215         s = kmalloc(sizeof(*s), GFP_KERNEL);
7216         if (!s)
7217                 return -ENOMEM;
7218
7219         trace_seq_init(s);
7220
7221         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7222         trace_seq_printf(s, "entries: %ld\n", cnt);
7223
7224         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7225         trace_seq_printf(s, "overrun: %ld\n", cnt);
7226
7227         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7228         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7229
7230         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7231         trace_seq_printf(s, "bytes: %ld\n", cnt);
7232
7233         if (trace_clocks[tr->clock_id].in_ns) {
7234                 /* local or global for trace_clock */
7235                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7236                 usec_rem = do_div(t, USEC_PER_SEC);
7237                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7238                                                                 t, usec_rem);
7239
7240                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7241                 usec_rem = do_div(t, USEC_PER_SEC);
7242                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7243         } else {
7244                 /* counter or tsc mode for trace_clock */
7245                 trace_seq_printf(s, "oldest event ts: %llu\n",
7246                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7247
7248                 trace_seq_printf(s, "now ts: %llu\n",
7249                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7250         }
7251
7252         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7253         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7254
7255         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7256         trace_seq_printf(s, "read events: %ld\n", cnt);
7257
7258         count = simple_read_from_buffer(ubuf, count, ppos,
7259                                         s->buffer, trace_seq_used(s));
7260
7261         kfree(s);
7262
7263         return count;
7264 }
7265
7266 static const struct file_operations tracing_stats_fops = {
7267         .open           = tracing_open_generic_tr,
7268         .read           = tracing_stats_read,
7269         .llseek         = generic_file_llseek,
7270         .release        = tracing_release_generic_tr,
7271 };
7272
7273 #ifdef CONFIG_DYNAMIC_FTRACE
7274
7275 static ssize_t
7276 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7277                   size_t cnt, loff_t *ppos)
7278 {
7279         unsigned long *p = filp->private_data;
7280         char buf[64]; /* Not too big for a shallow stack */
7281         int r;
7282
7283         r = scnprintf(buf, 63, "%ld", *p);
7284         buf[r++] = '\n';
7285
7286         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7287 }
7288
7289 static const struct file_operations tracing_dyn_info_fops = {
7290         .open           = tracing_open_generic,
7291         .read           = tracing_read_dyn_info,
7292         .llseek         = generic_file_llseek,
7293 };
7294 #endif /* CONFIG_DYNAMIC_FTRACE */
7295
7296 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7297 static void
7298 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7299                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7300                 void *data)
7301 {
7302         tracing_snapshot_instance(tr);
7303 }
7304
7305 static void
7306 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7307                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7308                       void *data)
7309 {
7310         struct ftrace_func_mapper *mapper = data;
7311         long *count = NULL;
7312
7313         if (mapper)
7314                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7315
7316         if (count) {
7317
7318                 if (*count <= 0)
7319                         return;
7320
7321                 (*count)--;
7322         }
7323
7324         tracing_snapshot_instance(tr);
7325 }
7326
7327 static int
7328 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7329                       struct ftrace_probe_ops *ops, void *data)
7330 {
7331         struct ftrace_func_mapper *mapper = data;
7332         long *count = NULL;
7333
7334         seq_printf(m, "%ps:", (void *)ip);
7335
7336         seq_puts(m, "snapshot");
7337
7338         if (mapper)
7339                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7340
7341         if (count)
7342                 seq_printf(m, ":count=%ld\n", *count);
7343         else
7344                 seq_puts(m, ":unlimited\n");
7345
7346         return 0;
7347 }
7348
7349 static int
7350 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7351                      unsigned long ip, void *init_data, void **data)
7352 {
7353         struct ftrace_func_mapper *mapper = *data;
7354
7355         if (!mapper) {
7356                 mapper = allocate_ftrace_func_mapper();
7357                 if (!mapper)
7358                         return -ENOMEM;
7359                 *data = mapper;
7360         }
7361
7362         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7363 }
7364
7365 static void
7366 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7367                      unsigned long ip, void *data)
7368 {
7369         struct ftrace_func_mapper *mapper = data;
7370
7371         if (!ip) {
7372                 if (!mapper)
7373                         return;
7374                 free_ftrace_func_mapper(mapper, NULL);
7375                 return;
7376         }
7377
7378         ftrace_func_mapper_remove_ip(mapper, ip);
7379 }
7380
7381 static struct ftrace_probe_ops snapshot_probe_ops = {
7382         .func                   = ftrace_snapshot,
7383         .print                  = ftrace_snapshot_print,
7384 };
7385
7386 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7387         .func                   = ftrace_count_snapshot,
7388         .print                  = ftrace_snapshot_print,
7389         .init                   = ftrace_snapshot_init,
7390         .free                   = ftrace_snapshot_free,
7391 };
7392
7393 static int
7394 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7395                                char *glob, char *cmd, char *param, int enable)
7396 {
7397         struct ftrace_probe_ops *ops;
7398         void *count = (void *)-1;
7399         char *number;
7400         int ret;
7401
7402         if (!tr)
7403                 return -ENODEV;
7404
7405         /* hash funcs only work with set_ftrace_filter */
7406         if (!enable)
7407                 return -EINVAL;
7408
7409         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7410
7411         if (glob[0] == '!')
7412                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7413
7414         if (!param)
7415                 goto out_reg;
7416
7417         number = strsep(&param, ":");
7418
7419         if (!strlen(number))
7420                 goto out_reg;
7421
7422         /*
7423          * We use the callback data field (which is a pointer)
7424          * as our counter.
7425          */
7426         ret = kstrtoul(number, 0, (unsigned long *)&count);
7427         if (ret)
7428                 return ret;
7429
7430  out_reg:
7431         ret = tracing_alloc_snapshot_instance(tr);
7432         if (ret < 0)
7433                 goto out;
7434
7435         ret = register_ftrace_function_probe(glob, tr, ops, count);
7436
7437  out:
7438         return ret < 0 ? ret : 0;
7439 }
7440
7441 static struct ftrace_func_command ftrace_snapshot_cmd = {
7442         .name                   = "snapshot",
7443         .func                   = ftrace_trace_snapshot_callback,
7444 };
7445
7446 static __init int register_snapshot_cmd(void)
7447 {
7448         return register_ftrace_command(&ftrace_snapshot_cmd);
7449 }
7450 #else
7451 static inline __init int register_snapshot_cmd(void) { return 0; }
7452 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7453
7454 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7455 {
7456         if (WARN_ON(!tr->dir))
7457                 return ERR_PTR(-ENODEV);
7458
7459         /* Top directory uses NULL as the parent */
7460         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7461                 return NULL;
7462
7463         /* All sub buffers have a descriptor */
7464         return tr->dir;
7465 }
7466
7467 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7468 {
7469         struct dentry *d_tracer;
7470
7471         if (tr->percpu_dir)
7472                 return tr->percpu_dir;
7473
7474         d_tracer = tracing_get_dentry(tr);
7475         if (IS_ERR(d_tracer))
7476                 return NULL;
7477
7478         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7479
7480         WARN_ONCE(!tr->percpu_dir,
7481                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7482
7483         return tr->percpu_dir;
7484 }
7485
7486 static struct dentry *
7487 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7488                       void *data, long cpu, const struct file_operations *fops)
7489 {
7490         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7491
7492         if (ret) /* See tracing_get_cpu() */
7493                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7494         return ret;
7495 }
7496
7497 static void
7498 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7499 {
7500         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7501         struct dentry *d_cpu;
7502         char cpu_dir[30]; /* 30 characters should be more than enough */
7503
7504         if (!d_percpu)
7505                 return;
7506
7507         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7508         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7509         if (!d_cpu) {
7510                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7511                 return;
7512         }
7513
7514         /* per cpu trace_pipe */
7515         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7516                                 tr, cpu, &tracing_pipe_fops);
7517
7518         /* per cpu trace */
7519         trace_create_cpu_file("trace", 0644, d_cpu,
7520                                 tr, cpu, &tracing_fops);
7521
7522         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7523                                 tr, cpu, &tracing_buffers_fops);
7524
7525         trace_create_cpu_file("stats", 0444, d_cpu,
7526                                 tr, cpu, &tracing_stats_fops);
7527
7528         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7529                                 tr, cpu, &tracing_entries_fops);
7530
7531 #ifdef CONFIG_TRACER_SNAPSHOT
7532         trace_create_cpu_file("snapshot", 0644, d_cpu,
7533                                 tr, cpu, &snapshot_fops);
7534
7535         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7536                                 tr, cpu, &snapshot_raw_fops);
7537 #endif
7538 }
7539
7540 #ifdef CONFIG_FTRACE_SELFTEST
7541 /* Let selftest have access to static functions in this file */
7542 #include "trace_selftest.c"
7543 #endif
7544
7545 static ssize_t
7546 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7547                         loff_t *ppos)
7548 {
7549         struct trace_option_dentry *topt = filp->private_data;
7550         char *buf;
7551
7552         if (topt->flags->val & topt->opt->bit)
7553                 buf = "1\n";
7554         else
7555                 buf = "0\n";
7556
7557         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7558 }
7559
7560 static ssize_t
7561 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7562                          loff_t *ppos)
7563 {
7564         struct trace_option_dentry *topt = filp->private_data;
7565         unsigned long val;
7566         int ret;
7567
7568         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7569         if (ret)
7570                 return ret;
7571
7572         if (val != 0 && val != 1)
7573                 return -EINVAL;
7574
7575         if (!!(topt->flags->val & topt->opt->bit) != val) {
7576                 mutex_lock(&trace_types_lock);
7577                 ret = __set_tracer_option(topt->tr, topt->flags,
7578                                           topt->opt, !val);
7579                 mutex_unlock(&trace_types_lock);
7580                 if (ret)
7581                         return ret;
7582         }
7583
7584         *ppos += cnt;
7585
7586         return cnt;
7587 }
7588
7589
7590 static const struct file_operations trace_options_fops = {
7591         .open = tracing_open_generic,
7592         .read = trace_options_read,
7593         .write = trace_options_write,
7594         .llseek = generic_file_llseek,
7595 };
7596
7597 /*
7598  * In order to pass in both the trace_array descriptor as well as the index
7599  * to the flag that the trace option file represents, the trace_array
7600  * has a character array of trace_flags_index[], which holds the index
7601  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7602  * The address of this character array is passed to the flag option file
7603  * read/write callbacks.
7604  *
7605  * In order to extract both the index and the trace_array descriptor,
7606  * get_tr_index() uses the following algorithm.
7607  *
7608  *   idx = *ptr;
7609  *
7610  * As the pointer itself contains the address of the index (remember
7611  * index[1] == 1).
7612  *
7613  * Then to get the trace_array descriptor, by subtracting that index
7614  * from the ptr, we get to the start of the index itself.
7615  *
7616  *   ptr - idx == &index[0]
7617  *
7618  * Then a simple container_of() from that pointer gets us to the
7619  * trace_array descriptor.
7620  */
7621 static void get_tr_index(void *data, struct trace_array **ptr,
7622                          unsigned int *pindex)
7623 {
7624         *pindex = *(unsigned char *)data;
7625
7626         *ptr = container_of(data - *pindex, struct trace_array,
7627                             trace_flags_index);
7628 }
7629
7630 static ssize_t
7631 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7632                         loff_t *ppos)
7633 {
7634         void *tr_index = filp->private_data;
7635         struct trace_array *tr;
7636         unsigned int index;
7637         char *buf;
7638
7639         get_tr_index(tr_index, &tr, &index);
7640
7641         if (tr->trace_flags & (1 << index))
7642                 buf = "1\n";
7643         else
7644                 buf = "0\n";
7645
7646         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7647 }
7648
7649 static ssize_t
7650 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7651                          loff_t *ppos)
7652 {
7653         void *tr_index = filp->private_data;
7654         struct trace_array *tr;
7655         unsigned int index;
7656         unsigned long val;
7657         int ret;
7658
7659         get_tr_index(tr_index, &tr, &index);
7660
7661         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7662         if (ret)
7663                 return ret;
7664
7665         if (val != 0 && val != 1)
7666                 return -EINVAL;
7667
7668         mutex_lock(&trace_types_lock);
7669         ret = set_tracer_flag(tr, 1 << index, val);
7670         mutex_unlock(&trace_types_lock);
7671
7672         if (ret < 0)
7673                 return ret;
7674
7675         *ppos += cnt;
7676
7677         return cnt;
7678 }
7679
7680 static const struct file_operations trace_options_core_fops = {
7681         .open = tracing_open_generic,
7682         .read = trace_options_core_read,
7683         .write = trace_options_core_write,
7684         .llseek = generic_file_llseek,
7685 };
7686
7687 struct dentry *trace_create_file(const char *name,
7688                                  umode_t mode,
7689                                  struct dentry *parent,
7690                                  void *data,
7691                                  const struct file_operations *fops)
7692 {
7693         struct dentry *ret;
7694
7695         ret = tracefs_create_file(name, mode, parent, data, fops);
7696         if (!ret)
7697                 pr_warn("Could not create tracefs '%s' entry\n", name);
7698
7699         return ret;
7700 }
7701
7702
7703 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7704 {
7705         struct dentry *d_tracer;
7706
7707         if (tr->options)
7708                 return tr->options;
7709
7710         d_tracer = tracing_get_dentry(tr);
7711         if (IS_ERR(d_tracer))
7712                 return NULL;
7713
7714         tr->options = tracefs_create_dir("options", d_tracer);
7715         if (!tr->options) {
7716                 pr_warn("Could not create tracefs directory 'options'\n");
7717                 return NULL;
7718         }
7719
7720         return tr->options;
7721 }
7722
7723 static void
7724 create_trace_option_file(struct trace_array *tr,
7725                          struct trace_option_dentry *topt,
7726                          struct tracer_flags *flags,
7727                          struct tracer_opt *opt)
7728 {
7729         struct dentry *t_options;
7730
7731         t_options = trace_options_init_dentry(tr);
7732         if (!t_options)
7733                 return;
7734
7735         topt->flags = flags;
7736         topt->opt = opt;
7737         topt->tr = tr;
7738
7739         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7740                                     &trace_options_fops);
7741
7742 }
7743
7744 static void
7745 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7746 {
7747         struct trace_option_dentry *topts;
7748         struct trace_options *tr_topts;
7749         struct tracer_flags *flags;
7750         struct tracer_opt *opts;
7751         int cnt;
7752         int i;
7753
7754         if (!tracer)
7755                 return;
7756
7757         flags = tracer->flags;
7758
7759         if (!flags || !flags->opts)
7760                 return;
7761
7762         /*
7763          * If this is an instance, only create flags for tracers
7764          * the instance may have.
7765          */
7766         if (!trace_ok_for_array(tracer, tr))
7767                 return;
7768
7769         for (i = 0; i < tr->nr_topts; i++) {
7770                 /* Make sure there's no duplicate flags. */
7771                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7772                         return;
7773         }
7774
7775         opts = flags->opts;
7776
7777         for (cnt = 0; opts[cnt].name; cnt++)
7778                 ;
7779
7780         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7781         if (!topts)
7782                 return;
7783
7784         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7785                             GFP_KERNEL);
7786         if (!tr_topts) {
7787                 kfree(topts);
7788                 return;
7789         }
7790
7791         tr->topts = tr_topts;
7792         tr->topts[tr->nr_topts].tracer = tracer;
7793         tr->topts[tr->nr_topts].topts = topts;
7794         tr->nr_topts++;
7795
7796         for (cnt = 0; opts[cnt].name; cnt++) {
7797                 create_trace_option_file(tr, &topts[cnt], flags,
7798                                          &opts[cnt]);
7799                 WARN_ONCE(topts[cnt].entry == NULL,
7800                           "Failed to create trace option: %s",
7801                           opts[cnt].name);
7802         }
7803 }
7804
7805 static struct dentry *
7806 create_trace_option_core_file(struct trace_array *tr,
7807                               const char *option, long index)
7808 {
7809         struct dentry *t_options;
7810
7811         t_options = trace_options_init_dentry(tr);
7812         if (!t_options)
7813                 return NULL;
7814
7815         return trace_create_file(option, 0644, t_options,
7816                                  (void *)&tr->trace_flags_index[index],
7817                                  &trace_options_core_fops);
7818 }
7819
7820 static void create_trace_options_dir(struct trace_array *tr)
7821 {
7822         struct dentry *t_options;
7823         bool top_level = tr == &global_trace;
7824         int i;
7825
7826         t_options = trace_options_init_dentry(tr);
7827         if (!t_options)
7828                 return;
7829
7830         for (i = 0; trace_options[i]; i++) {
7831                 if (top_level ||
7832                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7833                         create_trace_option_core_file(tr, trace_options[i], i);
7834         }
7835 }
7836
7837 static ssize_t
7838 rb_simple_read(struct file *filp, char __user *ubuf,
7839                size_t cnt, loff_t *ppos)
7840 {
7841         struct trace_array *tr = filp->private_data;
7842         char buf[64];
7843         int r;
7844
7845         r = tracer_tracing_is_on(tr);
7846         r = sprintf(buf, "%d\n", r);
7847
7848         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7849 }
7850
7851 static ssize_t
7852 rb_simple_write(struct file *filp, const char __user *ubuf,
7853                 size_t cnt, loff_t *ppos)
7854 {
7855         struct trace_array *tr = filp->private_data;
7856         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7857         unsigned long val;
7858         int ret;
7859
7860         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7861         if (ret)
7862                 return ret;
7863
7864         if (buffer) {
7865                 mutex_lock(&trace_types_lock);
7866                 if (!!val == tracer_tracing_is_on(tr)) {
7867                         val = 0; /* do nothing */
7868                 } else if (val) {
7869                         tracer_tracing_on(tr);
7870                         if (tr->current_trace->start)
7871                                 tr->current_trace->start(tr);
7872                 } else {
7873                         tracer_tracing_off(tr);
7874                         if (tr->current_trace->stop)
7875                                 tr->current_trace->stop(tr);
7876                 }
7877                 mutex_unlock(&trace_types_lock);
7878         }
7879
7880         (*ppos)++;
7881
7882         return cnt;
7883 }
7884
7885 static const struct file_operations rb_simple_fops = {
7886         .open           = tracing_open_generic_tr,
7887         .read           = rb_simple_read,
7888         .write          = rb_simple_write,
7889         .release        = tracing_release_generic_tr,
7890         .llseek         = default_llseek,
7891 };
7892
7893 static ssize_t
7894 buffer_percent_read(struct file *filp, char __user *ubuf,
7895                     size_t cnt, loff_t *ppos)
7896 {
7897         struct trace_array *tr = filp->private_data;
7898         char buf[64];
7899         int r;
7900
7901         r = tr->buffer_percent;
7902         r = sprintf(buf, "%d\n", r);
7903
7904         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7905 }
7906
7907 static ssize_t
7908 buffer_percent_write(struct file *filp, const char __user *ubuf,
7909                      size_t cnt, loff_t *ppos)
7910 {
7911         struct trace_array *tr = filp->private_data;
7912         unsigned long val;
7913         int ret;
7914
7915         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7916         if (ret)
7917                 return ret;
7918
7919         if (val > 100)
7920                 return -EINVAL;
7921
7922         if (!val)
7923                 val = 1;
7924
7925         tr->buffer_percent = val;
7926
7927         (*ppos)++;
7928
7929         return cnt;
7930 }
7931
7932 static const struct file_operations buffer_percent_fops = {
7933         .open           = tracing_open_generic_tr,
7934         .read           = buffer_percent_read,
7935         .write          = buffer_percent_write,
7936         .release        = tracing_release_generic_tr,
7937         .llseek         = default_llseek,
7938 };
7939
7940 struct dentry *trace_instance_dir;
7941
7942 static void
7943 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7944
7945 static int
7946 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7947 {
7948         enum ring_buffer_flags rb_flags;
7949
7950         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7951
7952         buf->tr = tr;
7953
7954         buf->buffer = ring_buffer_alloc(size, rb_flags);
7955         if (!buf->buffer)
7956                 return -ENOMEM;
7957
7958         buf->data = alloc_percpu(struct trace_array_cpu);
7959         if (!buf->data) {
7960                 ring_buffer_free(buf->buffer);
7961                 buf->buffer = NULL;
7962                 return -ENOMEM;
7963         }
7964
7965         /* Allocate the first page for all buffers */
7966         set_buffer_entries(&tr->trace_buffer,
7967                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7968
7969         return 0;
7970 }
7971
7972 static int allocate_trace_buffers(struct trace_array *tr, int size)
7973 {
7974         int ret;
7975
7976         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7977         if (ret)
7978                 return ret;
7979
7980 #ifdef CONFIG_TRACER_MAX_TRACE
7981         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7982                                     allocate_snapshot ? size : 1);
7983         if (WARN_ON(ret)) {
7984                 ring_buffer_free(tr->trace_buffer.buffer);
7985                 tr->trace_buffer.buffer = NULL;
7986                 free_percpu(tr->trace_buffer.data);
7987                 tr->trace_buffer.data = NULL;
7988                 return -ENOMEM;
7989         }
7990         tr->allocated_snapshot = allocate_snapshot;
7991
7992         /*
7993          * Only the top level trace array gets its snapshot allocated
7994          * from the kernel command line.
7995          */
7996         allocate_snapshot = false;
7997 #endif
7998         return 0;
7999 }
8000
8001 static void free_trace_buffer(struct trace_buffer *buf)
8002 {
8003         if (buf->buffer) {
8004                 ring_buffer_free(buf->buffer);
8005                 buf->buffer = NULL;
8006                 free_percpu(buf->data);
8007                 buf->data = NULL;
8008         }
8009 }
8010
8011 static void free_trace_buffers(struct trace_array *tr)
8012 {
8013         if (!tr)
8014                 return;
8015
8016         free_trace_buffer(&tr->trace_buffer);
8017
8018 #ifdef CONFIG_TRACER_MAX_TRACE
8019         free_trace_buffer(&tr->max_buffer);
8020 #endif
8021 }
8022
8023 static void init_trace_flags_index(struct trace_array *tr)
8024 {
8025         int i;
8026
8027         /* Used by the trace options files */
8028         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8029                 tr->trace_flags_index[i] = i;
8030 }
8031
8032 static void __update_tracer_options(struct trace_array *tr)
8033 {
8034         struct tracer *t;
8035
8036         for (t = trace_types; t; t = t->next)
8037                 add_tracer_options(tr, t);
8038 }
8039
8040 static void update_tracer_options(struct trace_array *tr)
8041 {
8042         mutex_lock(&trace_types_lock);
8043         __update_tracer_options(tr);
8044         mutex_unlock(&trace_types_lock);
8045 }
8046
8047 static int instance_mkdir(const char *name)
8048 {
8049         struct trace_array *tr;
8050         int ret;
8051
8052         mutex_lock(&event_mutex);
8053         mutex_lock(&trace_types_lock);
8054
8055         ret = -EEXIST;
8056         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8057                 if (tr->name && strcmp(tr->name, name) == 0)
8058                         goto out_unlock;
8059         }
8060
8061         ret = -ENOMEM;
8062         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8063         if (!tr)
8064                 goto out_unlock;
8065
8066         tr->name = kstrdup(name, GFP_KERNEL);
8067         if (!tr->name)
8068                 goto out_free_tr;
8069
8070         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8071                 goto out_free_tr;
8072
8073         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8074
8075         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8076
8077         raw_spin_lock_init(&tr->start_lock);
8078
8079         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8080
8081         tr->current_trace = &nop_trace;
8082
8083         INIT_LIST_HEAD(&tr->systems);
8084         INIT_LIST_HEAD(&tr->events);
8085         INIT_LIST_HEAD(&tr->hist_vars);
8086
8087         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8088                 goto out_free_tr;
8089
8090         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8091         if (!tr->dir)
8092                 goto out_free_tr;
8093
8094         ret = event_trace_add_tracer(tr->dir, tr);
8095         if (ret) {
8096                 tracefs_remove_recursive(tr->dir);
8097                 goto out_free_tr;
8098         }
8099
8100         ftrace_init_trace_array(tr);
8101
8102         init_tracer_tracefs(tr, tr->dir);
8103         init_trace_flags_index(tr);
8104         __update_tracer_options(tr);
8105
8106         list_add(&tr->list, &ftrace_trace_arrays);
8107
8108         mutex_unlock(&trace_types_lock);
8109         mutex_unlock(&event_mutex);
8110
8111         return 0;
8112
8113  out_free_tr:
8114         free_trace_buffers(tr);
8115         free_cpumask_var(tr->tracing_cpumask);
8116         kfree(tr->name);
8117         kfree(tr);
8118
8119  out_unlock:
8120         mutex_unlock(&trace_types_lock);
8121         mutex_unlock(&event_mutex);
8122
8123         return ret;
8124
8125 }
8126
8127 static int instance_rmdir(const char *name)
8128 {
8129         struct trace_array *tr;
8130         int found = 0;
8131         int ret;
8132         int i;
8133
8134         mutex_lock(&event_mutex);
8135         mutex_lock(&trace_types_lock);
8136
8137         ret = -ENODEV;
8138         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8139                 if (tr->name && strcmp(tr->name, name) == 0) {
8140                         found = 1;
8141                         break;
8142                 }
8143         }
8144         if (!found)
8145                 goto out_unlock;
8146
8147         ret = -EBUSY;
8148         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8149                 goto out_unlock;
8150
8151         list_del(&tr->list);
8152
8153         /* Disable all the flags that were enabled coming in */
8154         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8155                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8156                         set_tracer_flag(tr, 1 << i, 0);
8157         }
8158
8159         tracing_set_nop(tr);
8160         clear_ftrace_function_probes(tr);
8161         event_trace_del_tracer(tr);
8162         ftrace_clear_pids(tr);
8163         ftrace_destroy_function_files(tr);
8164         tracefs_remove_recursive(tr->dir);
8165         free_trace_buffers(tr);
8166
8167         for (i = 0; i < tr->nr_topts; i++) {
8168                 kfree(tr->topts[i].topts);
8169         }
8170         kfree(tr->topts);
8171
8172         free_cpumask_var(tr->tracing_cpumask);
8173         kfree(tr->name);
8174         kfree(tr);
8175
8176         ret = 0;
8177
8178  out_unlock:
8179         mutex_unlock(&trace_types_lock);
8180         mutex_unlock(&event_mutex);
8181
8182         return ret;
8183 }
8184
8185 static __init void create_trace_instances(struct dentry *d_tracer)
8186 {
8187         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8188                                                          instance_mkdir,
8189                                                          instance_rmdir);
8190         if (WARN_ON(!trace_instance_dir))
8191                 return;
8192 }
8193
8194 static void
8195 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8196 {
8197         struct trace_event_file *file;
8198         int cpu;
8199
8200         trace_create_file("available_tracers", 0444, d_tracer,
8201                         tr, &show_traces_fops);
8202
8203         trace_create_file("current_tracer", 0644, d_tracer,
8204                         tr, &set_tracer_fops);
8205
8206         trace_create_file("tracing_cpumask", 0644, d_tracer,
8207                           tr, &tracing_cpumask_fops);
8208
8209         trace_create_file("trace_options", 0644, d_tracer,
8210                           tr, &tracing_iter_fops);
8211
8212         trace_create_file("trace", 0644, d_tracer,
8213                           tr, &tracing_fops);
8214
8215         trace_create_file("trace_pipe", 0444, d_tracer,
8216                           tr, &tracing_pipe_fops);
8217
8218         trace_create_file("buffer_size_kb", 0644, d_tracer,
8219                           tr, &tracing_entries_fops);
8220
8221         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8222                           tr, &tracing_total_entries_fops);
8223
8224         trace_create_file("free_buffer", 0200, d_tracer,
8225                           tr, &tracing_free_buffer_fops);
8226
8227         trace_create_file("trace_marker", 0220, d_tracer,
8228                           tr, &tracing_mark_fops);
8229
8230         file = __find_event_file(tr, "ftrace", "print");
8231         if (file && file->dir)
8232                 trace_create_file("trigger", 0644, file->dir, file,
8233                                   &event_trigger_fops);
8234         tr->trace_marker_file = file;
8235
8236         trace_create_file("trace_marker_raw", 0220, d_tracer,
8237                           tr, &tracing_mark_raw_fops);
8238
8239         trace_create_file("trace_clock", 0644, d_tracer, tr,
8240                           &trace_clock_fops);
8241
8242         trace_create_file("tracing_on", 0644, d_tracer,
8243                           tr, &rb_simple_fops);
8244
8245         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8246                           &trace_time_stamp_mode_fops);
8247
8248         tr->buffer_percent = 50;
8249
8250         trace_create_file("buffer_percent", 0444, d_tracer,
8251                         tr, &buffer_percent_fops);
8252
8253         create_trace_options_dir(tr);
8254
8255 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8256         trace_create_file("tracing_max_latency", 0644, d_tracer,
8257                         &tr->max_latency, &tracing_max_lat_fops);
8258 #endif
8259
8260         if (ftrace_create_function_files(tr, d_tracer))
8261                 WARN(1, "Could not allocate function filter files");
8262
8263 #ifdef CONFIG_TRACER_SNAPSHOT
8264         trace_create_file("snapshot", 0644, d_tracer,
8265                           tr, &snapshot_fops);
8266 #endif
8267
8268         for_each_tracing_cpu(cpu)
8269                 tracing_init_tracefs_percpu(tr, cpu);
8270
8271         ftrace_init_tracefs(tr, d_tracer);
8272 }
8273
8274 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8275 {
8276         struct vfsmount *mnt;
8277         struct file_system_type *type;
8278
8279         /*
8280          * To maintain backward compatibility for tools that mount
8281          * debugfs to get to the tracing facility, tracefs is automatically
8282          * mounted to the debugfs/tracing directory.
8283          */
8284         type = get_fs_type("tracefs");
8285         if (!type)
8286                 return NULL;
8287         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8288         put_filesystem(type);
8289         if (IS_ERR(mnt))
8290                 return NULL;
8291         mntget(mnt);
8292
8293         return mnt;
8294 }
8295
8296 /**
8297  * tracing_init_dentry - initialize top level trace array
8298  *
8299  * This is called when creating files or directories in the tracing
8300  * directory. It is called via fs_initcall() by any of the boot up code
8301  * and expects to return the dentry of the top level tracing directory.
8302  */
8303 struct dentry *tracing_init_dentry(void)
8304 {
8305         struct trace_array *tr = &global_trace;
8306
8307         /* The top level trace array uses  NULL as parent */
8308         if (tr->dir)
8309                 return NULL;
8310
8311         if (WARN_ON(!tracefs_initialized()) ||
8312                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8313                  WARN_ON(!debugfs_initialized())))
8314                 return ERR_PTR(-ENODEV);
8315
8316         /*
8317          * As there may still be users that expect the tracing
8318          * files to exist in debugfs/tracing, we must automount
8319          * the tracefs file system there, so older tools still
8320          * work with the newer kerenl.
8321          */
8322         tr->dir = debugfs_create_automount("tracing", NULL,
8323                                            trace_automount, NULL);
8324         if (!tr->dir) {
8325                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8326                 return ERR_PTR(-ENOMEM);
8327         }
8328
8329         return NULL;
8330 }
8331
8332 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8333 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8334
8335 static void __init trace_eval_init(void)
8336 {
8337         int len;
8338
8339         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8340         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8341 }
8342
8343 #ifdef CONFIG_MODULES
8344 static void trace_module_add_evals(struct module *mod)
8345 {
8346         if (!mod->num_trace_evals)
8347                 return;
8348
8349         /*
8350          * Modules with bad taint do not have events created, do
8351          * not bother with enums either.
8352          */
8353         if (trace_module_has_bad_taint(mod))
8354                 return;
8355
8356         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8357 }
8358
8359 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8360 static void trace_module_remove_evals(struct module *mod)
8361 {
8362         union trace_eval_map_item *map;
8363         union trace_eval_map_item **last = &trace_eval_maps;
8364
8365         if (!mod->num_trace_evals)
8366                 return;
8367
8368         mutex_lock(&trace_eval_mutex);
8369
8370         map = trace_eval_maps;
8371
8372         while (map) {
8373                 if (map->head.mod == mod)
8374                         break;
8375                 map = trace_eval_jmp_to_tail(map);
8376                 last = &map->tail.next;
8377                 map = map->tail.next;
8378         }
8379         if (!map)
8380                 goto out;
8381
8382         *last = trace_eval_jmp_to_tail(map)->tail.next;
8383         kfree(map);
8384  out:
8385         mutex_unlock(&trace_eval_mutex);
8386 }
8387 #else
8388 static inline void trace_module_remove_evals(struct module *mod) { }
8389 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8390
8391 static int trace_module_notify(struct notifier_block *self,
8392                                unsigned long val, void *data)
8393 {
8394         struct module *mod = data;
8395
8396         switch (val) {
8397         case MODULE_STATE_COMING:
8398                 trace_module_add_evals(mod);
8399                 break;
8400         case MODULE_STATE_GOING:
8401                 trace_module_remove_evals(mod);
8402                 break;
8403         }
8404
8405         return 0;
8406 }
8407
8408 static struct notifier_block trace_module_nb = {
8409         .notifier_call = trace_module_notify,
8410         .priority = 0,
8411 };
8412 #endif /* CONFIG_MODULES */
8413
8414 static __init int tracer_init_tracefs(void)
8415 {
8416         struct dentry *d_tracer;
8417
8418         trace_access_lock_init();
8419
8420         d_tracer = tracing_init_dentry();
8421         if (IS_ERR(d_tracer))
8422                 return 0;
8423
8424         event_trace_init();
8425
8426         init_tracer_tracefs(&global_trace, d_tracer);
8427         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8428
8429         trace_create_file("tracing_thresh", 0644, d_tracer,
8430                         &global_trace, &tracing_thresh_fops);
8431
8432         trace_create_file("README", 0444, d_tracer,
8433                         NULL, &tracing_readme_fops);
8434
8435         trace_create_file("saved_cmdlines", 0444, d_tracer,
8436                         NULL, &tracing_saved_cmdlines_fops);
8437
8438         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8439                           NULL, &tracing_saved_cmdlines_size_fops);
8440
8441         trace_create_file("saved_tgids", 0444, d_tracer,
8442                         NULL, &tracing_saved_tgids_fops);
8443
8444         trace_eval_init();
8445
8446         trace_create_eval_file(d_tracer);
8447
8448 #ifdef CONFIG_MODULES
8449         register_module_notifier(&trace_module_nb);
8450 #endif
8451
8452 #ifdef CONFIG_DYNAMIC_FTRACE
8453         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8454                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8455 #endif
8456
8457         create_trace_instances(d_tracer);
8458
8459         update_tracer_options(&global_trace);
8460
8461         return 0;
8462 }
8463
8464 static int trace_panic_handler(struct notifier_block *this,
8465                                unsigned long event, void *unused)
8466 {
8467         if (ftrace_dump_on_oops)
8468                 ftrace_dump(ftrace_dump_on_oops);
8469         return NOTIFY_OK;
8470 }
8471
8472 static struct notifier_block trace_panic_notifier = {
8473         .notifier_call  = trace_panic_handler,
8474         .next           = NULL,
8475         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8476 };
8477
8478 static int trace_die_handler(struct notifier_block *self,
8479                              unsigned long val,
8480                              void *data)
8481 {
8482         switch (val) {
8483         case DIE_OOPS:
8484                 if (ftrace_dump_on_oops)
8485                         ftrace_dump(ftrace_dump_on_oops);
8486                 break;
8487         default:
8488                 break;
8489         }
8490         return NOTIFY_OK;
8491 }
8492
8493 static struct notifier_block trace_die_notifier = {
8494         .notifier_call = trace_die_handler,
8495         .priority = 200
8496 };
8497
8498 /*
8499  * printk is set to max of 1024, we really don't need it that big.
8500  * Nothing should be printing 1000 characters anyway.
8501  */
8502 #define TRACE_MAX_PRINT         1000
8503
8504 /*
8505  * Define here KERN_TRACE so that we have one place to modify
8506  * it if we decide to change what log level the ftrace dump
8507  * should be at.
8508  */
8509 #define KERN_TRACE              KERN_EMERG
8510
8511 void
8512 trace_printk_seq(struct trace_seq *s)
8513 {
8514         /* Probably should print a warning here. */
8515         if (s->seq.len >= TRACE_MAX_PRINT)
8516                 s->seq.len = TRACE_MAX_PRINT;
8517
8518         /*
8519          * More paranoid code. Although the buffer size is set to
8520          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8521          * an extra layer of protection.
8522          */
8523         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8524                 s->seq.len = s->seq.size - 1;
8525
8526         /* should be zero ended, but we are paranoid. */
8527         s->buffer[s->seq.len] = 0;
8528
8529         printk(KERN_TRACE "%s", s->buffer);
8530
8531         trace_seq_init(s);
8532 }
8533
8534 void trace_init_global_iter(struct trace_iterator *iter)
8535 {
8536         iter->tr = &global_trace;
8537         iter->trace = iter->tr->current_trace;
8538         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8539         iter->trace_buffer = &global_trace.trace_buffer;
8540
8541         if (iter->trace && iter->trace->open)
8542                 iter->trace->open(iter);
8543
8544         /* Annotate start of buffers if we had overruns */
8545         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8546                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8547
8548         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8549         if (trace_clocks[iter->tr->clock_id].in_ns)
8550                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8551 }
8552
8553 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8554 {
8555         /* use static because iter can be a bit big for the stack */
8556         static struct trace_iterator iter;
8557         static atomic_t dump_running;
8558         struct trace_array *tr = &global_trace;
8559         unsigned int old_userobj;
8560         unsigned long flags;
8561         int cnt = 0, cpu;
8562
8563         /* Only allow one dump user at a time. */
8564         if (atomic_inc_return(&dump_running) != 1) {
8565                 atomic_dec(&dump_running);
8566                 return;
8567         }
8568
8569         /*
8570          * Always turn off tracing when we dump.
8571          * We don't need to show trace output of what happens
8572          * between multiple crashes.
8573          *
8574          * If the user does a sysrq-z, then they can re-enable
8575          * tracing with echo 1 > tracing_on.
8576          */
8577         tracing_off();
8578
8579         local_irq_save(flags);
8580         printk_nmi_direct_enter();
8581
8582         /* Simulate the iterator */
8583         trace_init_global_iter(&iter);
8584
8585         for_each_tracing_cpu(cpu) {
8586                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8587         }
8588
8589         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8590
8591         /* don't look at user memory in panic mode */
8592         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8593
8594         switch (oops_dump_mode) {
8595         case DUMP_ALL:
8596                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8597                 break;
8598         case DUMP_ORIG:
8599                 iter.cpu_file = raw_smp_processor_id();
8600                 break;
8601         case DUMP_NONE:
8602                 goto out_enable;
8603         default:
8604                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8605                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8606         }
8607
8608         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8609
8610         /* Did function tracer already get disabled? */
8611         if (ftrace_is_dead()) {
8612                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8613                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8614         }
8615
8616         /*
8617          * We need to stop all tracing on all CPUS to read the
8618          * the next buffer. This is a bit expensive, but is
8619          * not done often. We fill all what we can read,
8620          * and then release the locks again.
8621          */
8622
8623         while (!trace_empty(&iter)) {
8624
8625                 if (!cnt)
8626                         printk(KERN_TRACE "---------------------------------\n");
8627
8628                 cnt++;
8629
8630                 /* reset all but tr, trace, and overruns */
8631                 memset(&iter.seq, 0,
8632                        sizeof(struct trace_iterator) -
8633                        offsetof(struct trace_iterator, seq));
8634                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8635                 iter.pos = -1;
8636
8637                 if (trace_find_next_entry_inc(&iter) != NULL) {
8638                         int ret;
8639
8640                         ret = print_trace_line(&iter);
8641                         if (ret != TRACE_TYPE_NO_CONSUME)
8642                                 trace_consume(&iter);
8643                 }
8644                 touch_nmi_watchdog();
8645
8646                 trace_printk_seq(&iter.seq);
8647         }
8648
8649         if (!cnt)
8650                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8651         else
8652                 printk(KERN_TRACE "---------------------------------\n");
8653
8654  out_enable:
8655         tr->trace_flags |= old_userobj;
8656
8657         for_each_tracing_cpu(cpu) {
8658                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8659         }
8660         atomic_dec(&dump_running);
8661         printk_nmi_direct_exit();
8662         local_irq_restore(flags);
8663 }
8664 EXPORT_SYMBOL_GPL(ftrace_dump);
8665
8666 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8667 {
8668         char **argv;
8669         int argc, ret;
8670
8671         argc = 0;
8672         ret = 0;
8673         argv = argv_split(GFP_KERNEL, buf, &argc);
8674         if (!argv)
8675                 return -ENOMEM;
8676
8677         if (argc)
8678                 ret = createfn(argc, argv);
8679
8680         argv_free(argv);
8681
8682         return ret;
8683 }
8684
8685 #define WRITE_BUFSIZE  4096
8686
8687 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8688                                 size_t count, loff_t *ppos,
8689                                 int (*createfn)(int, char **))
8690 {
8691         char *kbuf, *buf, *tmp;
8692         int ret = 0;
8693         size_t done = 0;
8694         size_t size;
8695
8696         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8697         if (!kbuf)
8698                 return -ENOMEM;
8699
8700         while (done < count) {
8701                 size = count - done;
8702
8703                 if (size >= WRITE_BUFSIZE)
8704                         size = WRITE_BUFSIZE - 1;
8705
8706                 if (copy_from_user(kbuf, buffer + done, size)) {
8707                         ret = -EFAULT;
8708                         goto out;
8709                 }
8710                 kbuf[size] = '\0';
8711                 buf = kbuf;
8712                 do {
8713                         tmp = strchr(buf, '\n');
8714                         if (tmp) {
8715                                 *tmp = '\0';
8716                                 size = tmp - buf + 1;
8717                         } else {
8718                                 size = strlen(buf);
8719                                 if (done + size < count) {
8720                                         if (buf != kbuf)
8721                                                 break;
8722                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8723                                         pr_warn("Line length is too long: Should be less than %d\n",
8724                                                 WRITE_BUFSIZE - 2);
8725                                         ret = -EINVAL;
8726                                         goto out;
8727                                 }
8728                         }
8729                         done += size;
8730
8731                         /* Remove comments */
8732                         tmp = strchr(buf, '#');
8733
8734                         if (tmp)
8735                                 *tmp = '\0';
8736
8737                         ret = trace_run_command(buf, createfn);
8738                         if (ret)
8739                                 goto out;
8740                         buf += size;
8741
8742                 } while (done < count);
8743         }
8744         ret = done;
8745
8746 out:
8747         kfree(kbuf);
8748
8749         return ret;
8750 }
8751
8752 __init static int tracer_alloc_buffers(void)
8753 {
8754         int ring_buf_size;
8755         int ret = -ENOMEM;
8756
8757         /*
8758          * Make sure we don't accidently add more trace options
8759          * than we have bits for.
8760          */
8761         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8762
8763         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8764                 goto out;
8765
8766         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8767                 goto out_free_buffer_mask;
8768
8769         /* Only allocate trace_printk buffers if a trace_printk exists */
8770         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8771                 /* Must be called before global_trace.buffer is allocated */
8772                 trace_printk_init_buffers();
8773
8774         /* To save memory, keep the ring buffer size to its minimum */
8775         if (ring_buffer_expanded)
8776                 ring_buf_size = trace_buf_size;
8777         else
8778                 ring_buf_size = 1;
8779
8780         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8781         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8782
8783         raw_spin_lock_init(&global_trace.start_lock);
8784
8785         /*
8786          * The prepare callbacks allocates some memory for the ring buffer. We
8787          * don't free the buffer if the if the CPU goes down. If we were to free
8788          * the buffer, then the user would lose any trace that was in the
8789          * buffer. The memory will be removed once the "instance" is removed.
8790          */
8791         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8792                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8793                                       NULL);
8794         if (ret < 0)
8795                 goto out_free_cpumask;
8796         /* Used for event triggers */
8797         ret = -ENOMEM;
8798         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8799         if (!temp_buffer)
8800                 goto out_rm_hp_state;
8801
8802         if (trace_create_savedcmd() < 0)
8803                 goto out_free_temp_buffer;
8804
8805         /* TODO: make the number of buffers hot pluggable with CPUS */
8806         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8807                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8808                 WARN_ON(1);
8809                 goto out_free_savedcmd;
8810         }
8811
8812         if (global_trace.buffer_disabled)
8813                 tracing_off();
8814
8815         if (trace_boot_clock) {
8816                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8817                 if (ret < 0)
8818                         pr_warn("Trace clock %s not defined, going back to default\n",
8819                                 trace_boot_clock);
8820         }
8821
8822         /*
8823          * register_tracer() might reference current_trace, so it
8824          * needs to be set before we register anything. This is
8825          * just a bootstrap of current_trace anyway.
8826          */
8827         global_trace.current_trace = &nop_trace;
8828
8829         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8830
8831         ftrace_init_global_array_ops(&global_trace);
8832
8833         init_trace_flags_index(&global_trace);
8834
8835         register_tracer(&nop_trace);
8836
8837         /* Function tracing may start here (via kernel command line) */
8838         init_function_trace();
8839
8840         /* All seems OK, enable tracing */
8841         tracing_disabled = 0;
8842
8843         atomic_notifier_chain_register(&panic_notifier_list,
8844                                        &trace_panic_notifier);
8845
8846         register_die_notifier(&trace_die_notifier);
8847
8848         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8849
8850         INIT_LIST_HEAD(&global_trace.systems);
8851         INIT_LIST_HEAD(&global_trace.events);
8852         INIT_LIST_HEAD(&global_trace.hist_vars);
8853         list_add(&global_trace.list, &ftrace_trace_arrays);
8854
8855         apply_trace_boot_options();
8856
8857         register_snapshot_cmd();
8858
8859         return 0;
8860
8861 out_free_savedcmd:
8862         free_saved_cmdlines_buffer(savedcmd);
8863 out_free_temp_buffer:
8864         ring_buffer_free(temp_buffer);
8865 out_rm_hp_state:
8866         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8867 out_free_cpumask:
8868         free_cpumask_var(global_trace.tracing_cpumask);
8869 out_free_buffer_mask:
8870         free_cpumask_var(tracing_buffer_mask);
8871 out:
8872         return ret;
8873 }
8874
8875 void __init early_trace_init(void)
8876 {
8877         if (tracepoint_printk) {
8878                 tracepoint_print_iter =
8879                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8880                 if (WARN_ON(!tracepoint_print_iter))
8881                         tracepoint_printk = 0;
8882                 else
8883                         static_key_enable(&tracepoint_printk_key.key);
8884         }
8885         tracer_alloc_buffers();
8886 }
8887
8888 void __init trace_init(void)
8889 {
8890         trace_event_init();
8891 }
8892
8893 __init static int clear_boot_tracer(void)
8894 {
8895         /*
8896          * The default tracer at boot buffer is an init section.
8897          * This function is called in lateinit. If we did not
8898          * find the boot tracer, then clear it out, to prevent
8899          * later registration from accessing the buffer that is
8900          * about to be freed.
8901          */
8902         if (!default_bootup_tracer)
8903                 return 0;
8904
8905         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8906                default_bootup_tracer);
8907         default_bootup_tracer = NULL;
8908
8909         return 0;
8910 }
8911
8912 fs_initcall(tracer_init_tracefs);
8913 late_initcall_sync(clear_boot_tracer);
8914
8915 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8916 __init static int tracing_set_default_clock(void)
8917 {
8918         /* sched_clock_stable() is determined in late_initcall */
8919         if (!trace_boot_clock && !sched_clock_stable()) {
8920                 printk(KERN_WARNING
8921                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8922                        "If you want to keep using the local clock, then add:\n"
8923                        "  \"trace_clock=local\"\n"
8924                        "on the kernel command line\n");
8925                 tracing_set_clock(&global_trace, "global");
8926         }
8927
8928         return 0;
8929 }
8930 late_initcall_sync(tracing_set_default_clock);
8931 #endif