Merge branch 'page-refs' (page ref overflow)
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522
523         while (cnt > 0) {
524
525                 pos = 0;
526
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id(), cond_data);
924         local_irq_restore(flags);
925 }
926
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929         tracing_snapshot_instance_cond(tr, NULL);
930 }
931
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948         struct trace_array *tr = &global_trace;
949
950         tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:         The tracing instance to snapshot
957  * @cond_data:  The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969         tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:         The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989         void *cond_data = NULL;
990
991         arch_spin_lock(&tr->max_lock);
992
993         if (tr->cond_snapshot)
994                 cond_data = tr->cond_snapshot->cond_data;
995
996         arch_spin_unlock(&tr->max_lock);
997
998         return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003                                         struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008         int ret;
1009
1010         if (!tr->allocated_snapshot) {
1011
1012                 /* allocate spare buffer */
1013                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015                 if (ret < 0)
1016                         return ret;
1017
1018                 tr->allocated_snapshot = true;
1019         }
1020
1021         return 0;
1022 }
1023
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026         /*
1027          * We don't free the ring buffer. instead, resize it because
1028          * The max_tr ring buffer has some state (e.g. ring->clock) and
1029          * we want preserve it.
1030          */
1031         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032         set_buffer_entries(&tr->max_buffer, 1);
1033         tracing_reset_online_cpus(&tr->max_buffer);
1034         tr->allocated_snapshot = false;
1035 }
1036
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049         struct trace_array *tr = &global_trace;
1050         int ret;
1051
1052         ret = tracing_alloc_snapshot_instance(tr);
1053         WARN_ON(ret < 0);
1054
1055         return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072         int ret;
1073
1074         ret = tracing_alloc_snapshot();
1075         if (ret < 0)
1076                 return;
1077
1078         tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:         The tracing instance
1085  * @cond_data:  User data to associate with the snapshot
1086  * @update:     Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096                                  cond_update_fn_t update)
1097 {
1098         struct cond_snapshot *cond_snapshot;
1099         int ret = 0;
1100
1101         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102         if (!cond_snapshot)
1103                 return -ENOMEM;
1104
1105         cond_snapshot->cond_data = cond_data;
1106         cond_snapshot->update = update;
1107
1108         mutex_lock(&trace_types_lock);
1109
1110         ret = tracing_alloc_snapshot_instance(tr);
1111         if (ret)
1112                 goto fail_unlock;
1113
1114         if (tr->current_trace->use_max_tr) {
1115                 ret = -EBUSY;
1116                 goto fail_unlock;
1117         }
1118
1119         /*
1120          * The cond_snapshot can only change to NULL without the
1121          * trace_types_lock. We don't care if we race with it going
1122          * to NULL, but we want to make sure that it's not set to
1123          * something other than NULL when we get here, which we can
1124          * do safely with only holding the trace_types_lock and not
1125          * having to take the max_lock.
1126          */
1127         if (tr->cond_snapshot) {
1128                 ret = -EBUSY;
1129                 goto fail_unlock;
1130         }
1131
1132         arch_spin_lock(&tr->max_lock);
1133         tr->cond_snapshot = cond_snapshot;
1134         arch_spin_unlock(&tr->max_lock);
1135
1136         mutex_unlock(&trace_types_lock);
1137
1138         return ret;
1139
1140  fail_unlock:
1141         mutex_unlock(&trace_types_lock);
1142         kfree(cond_snapshot);
1143         return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146
1147 /**
1148  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149  * @tr:         The tracing instance
1150  *
1151  * Check whether the conditional snapshot for the given instance is
1152  * enabled; if so, free the cond_snapshot associated with it,
1153  * otherwise return -EINVAL.
1154  *
1155  * Returns 0 if successful, error otherwise.
1156  */
1157 int tracing_snapshot_cond_disable(struct trace_array *tr)
1158 {
1159         int ret = 0;
1160
1161         arch_spin_lock(&tr->max_lock);
1162
1163         if (!tr->cond_snapshot)
1164                 ret = -EINVAL;
1165         else {
1166                 kfree(tr->cond_snapshot);
1167                 tr->cond_snapshot = NULL;
1168         }
1169
1170         arch_spin_unlock(&tr->max_lock);
1171
1172         return ret;
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175 #else
1176 void tracing_snapshot(void)
1177 {
1178         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182 {
1183         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186 int tracing_alloc_snapshot(void)
1187 {
1188         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189         return -ENODEV;
1190 }
1191 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192 void tracing_snapshot_alloc(void)
1193 {
1194         /* Give warning */
1195         tracing_snapshot();
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         return NULL;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204 {
1205         return -ENODEV;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208 int tracing_snapshot_cond_disable(struct trace_array *tr)
1209 {
1210         return false;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213 #endif /* CONFIG_TRACER_SNAPSHOT */
1214
1215 void tracer_tracing_off(struct trace_array *tr)
1216 {
1217         if (tr->trace_buffer.buffer)
1218                 ring_buffer_record_off(tr->trace_buffer.buffer);
1219         /*
1220          * This flag is looked at when buffers haven't been allocated
1221          * yet, or by some tracers (like irqsoff), that just want to
1222          * know if the ring buffer has been disabled, but it can handle
1223          * races of where it gets disabled but we still do a record.
1224          * As the check is in the fast path of the tracers, it is more
1225          * important to be fast than accurate.
1226          */
1227         tr->buffer_disabled = 1;
1228         /* Make the flag seen by readers */
1229         smp_wmb();
1230 }
1231
1232 /**
1233  * tracing_off - turn off tracing buffers
1234  *
1235  * This function stops the tracing buffers from recording data.
1236  * It does not disable any overhead the tracers themselves may
1237  * be causing. This function simply causes all recording to
1238  * the ring buffers to fail.
1239  */
1240 void tracing_off(void)
1241 {
1242         tracer_tracing_off(&global_trace);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_off);
1245
1246 void disable_trace_on_warning(void)
1247 {
1248         if (__disable_trace_on_warning)
1249                 tracing_off();
1250 }
1251
1252 /**
1253  * tracer_tracing_is_on - show real state of ring buffer enabled
1254  * @tr : the trace array to know if ring buffer is enabled
1255  *
1256  * Shows real state of the ring buffer if it is enabled or not.
1257  */
1258 bool tracer_tracing_is_on(struct trace_array *tr)
1259 {
1260         if (tr->trace_buffer.buffer)
1261                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262         return !tr->buffer_disabled;
1263 }
1264
1265 /**
1266  * tracing_is_on - show state of ring buffers enabled
1267  */
1268 int tracing_is_on(void)
1269 {
1270         return tracer_tracing_is_on(&global_trace);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_is_on);
1273
1274 static int __init set_buf_size(char *str)
1275 {
1276         unsigned long buf_size;
1277
1278         if (!str)
1279                 return 0;
1280         buf_size = memparse(str, &str);
1281         /* nr_entries can not be zero */
1282         if (buf_size == 0)
1283                 return 0;
1284         trace_buf_size = buf_size;
1285         return 1;
1286 }
1287 __setup("trace_buf_size=", set_buf_size);
1288
1289 static int __init set_tracing_thresh(char *str)
1290 {
1291         unsigned long threshold;
1292         int ret;
1293
1294         if (!str)
1295                 return 0;
1296         ret = kstrtoul(str, 0, &threshold);
1297         if (ret < 0)
1298                 return 0;
1299         tracing_thresh = threshold * 1000;
1300         return 1;
1301 }
1302 __setup("tracing_thresh=", set_tracing_thresh);
1303
1304 unsigned long nsecs_to_usecs(unsigned long nsecs)
1305 {
1306         return nsecs / 1000;
1307 }
1308
1309 /*
1310  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313  * of strings in the order that the evals (enum) were defined.
1314  */
1315 #undef C
1316 #define C(a, b) b
1317
1318 /* These must match the bit postions in trace_iterator_flags */
1319 static const char *trace_options[] = {
1320         TRACE_FLAGS
1321         NULL
1322 };
1323
1324 static struct {
1325         u64 (*func)(void);
1326         const char *name;
1327         int in_ns;              /* is this clock in nanoseconds? */
1328 } trace_clocks[] = {
1329         { trace_clock_local,            "local",        1 },
1330         { trace_clock_global,           "global",       1 },
1331         { trace_clock_counter,          "counter",      0 },
1332         { trace_clock_jiffies,          "uptime",       0 },
1333         { trace_clock,                  "perf",         1 },
1334         { ktime_get_mono_fast_ns,       "mono",         1 },
1335         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1336         { ktime_get_boot_fast_ns,       "boot",         1 },
1337         ARCH_TRACE_CLOCKS
1338 };
1339
1340 bool trace_clock_in_ns(struct trace_array *tr)
1341 {
1342         if (trace_clocks[tr->clock_id].in_ns)
1343                 return true;
1344
1345         return false;
1346 }
1347
1348 /*
1349  * trace_parser_get_init - gets the buffer for trace parser
1350  */
1351 int trace_parser_get_init(struct trace_parser *parser, int size)
1352 {
1353         memset(parser, 0, sizeof(*parser));
1354
1355         parser->buffer = kmalloc(size, GFP_KERNEL);
1356         if (!parser->buffer)
1357                 return 1;
1358
1359         parser->size = size;
1360         return 0;
1361 }
1362
1363 /*
1364  * trace_parser_put - frees the buffer for trace parser
1365  */
1366 void trace_parser_put(struct trace_parser *parser)
1367 {
1368         kfree(parser->buffer);
1369         parser->buffer = NULL;
1370 }
1371
1372 /*
1373  * trace_get_user - reads the user input string separated by  space
1374  * (matched by isspace(ch))
1375  *
1376  * For each string found the 'struct trace_parser' is updated,
1377  * and the function returns.
1378  *
1379  * Returns number of bytes read.
1380  *
1381  * See kernel/trace/trace.h for 'struct trace_parser' details.
1382  */
1383 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384         size_t cnt, loff_t *ppos)
1385 {
1386         char ch;
1387         size_t read = 0;
1388         ssize_t ret;
1389
1390         if (!*ppos)
1391                 trace_parser_clear(parser);
1392
1393         ret = get_user(ch, ubuf++);
1394         if (ret)
1395                 goto out;
1396
1397         read++;
1398         cnt--;
1399
1400         /*
1401          * The parser is not finished with the last write,
1402          * continue reading the user input without skipping spaces.
1403          */
1404         if (!parser->cont) {
1405                 /* skip white space */
1406                 while (cnt && isspace(ch)) {
1407                         ret = get_user(ch, ubuf++);
1408                         if (ret)
1409                                 goto out;
1410                         read++;
1411                         cnt--;
1412                 }
1413
1414                 parser->idx = 0;
1415
1416                 /* only spaces were written */
1417                 if (isspace(ch) || !ch) {
1418                         *ppos += read;
1419                         ret = read;
1420                         goto out;
1421                 }
1422         }
1423
1424         /* read the non-space input */
1425         while (cnt && !isspace(ch) && ch) {
1426                 if (parser->idx < parser->size - 1)
1427                         parser->buffer[parser->idx++] = ch;
1428                 else {
1429                         ret = -EINVAL;
1430                         goto out;
1431                 }
1432                 ret = get_user(ch, ubuf++);
1433                 if (ret)
1434                         goto out;
1435                 read++;
1436                 cnt--;
1437         }
1438
1439         /* We either got finished input or we have to wait for another call. */
1440         if (isspace(ch) || !ch) {
1441                 parser->buffer[parser->idx] = 0;
1442                 parser->cont = false;
1443         } else if (parser->idx < parser->size - 1) {
1444                 parser->cont = true;
1445                 parser->buffer[parser->idx++] = ch;
1446                 /* Make sure the parsed string always terminates with '\0'. */
1447                 parser->buffer[parser->idx] = 0;
1448         } else {
1449                 ret = -EINVAL;
1450                 goto out;
1451         }
1452
1453         *ppos += read;
1454         ret = read;
1455
1456 out:
1457         return ret;
1458 }
1459
1460 /* TODO add a seq_buf_to_buffer() */
1461 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462 {
1463         int len;
1464
1465         if (trace_seq_used(s) <= s->seq.readpos)
1466                 return -EBUSY;
1467
1468         len = trace_seq_used(s) - s->seq.readpos;
1469         if (cnt > len)
1470                 cnt = len;
1471         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472
1473         s->seq.readpos += cnt;
1474         return cnt;
1475 }
1476
1477 unsigned long __read_mostly     tracing_thresh;
1478
1479 #ifdef CONFIG_TRACER_MAX_TRACE
1480 /*
1481  * Copy the new maximum trace into the separate maximum-trace
1482  * structure. (this way the maximum trace is permanently saved,
1483  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484  */
1485 static void
1486 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487 {
1488         struct trace_buffer *trace_buf = &tr->trace_buffer;
1489         struct trace_buffer *max_buf = &tr->max_buffer;
1490         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492
1493         max_buf->cpu = cpu;
1494         max_buf->time_start = data->preempt_timestamp;
1495
1496         max_data->saved_latency = tr->max_latency;
1497         max_data->critical_start = data->critical_start;
1498         max_data->critical_end = data->critical_end;
1499
1500         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501         max_data->pid = tsk->pid;
1502         /*
1503          * If tsk == current, then use current_uid(), as that does not use
1504          * RCU. The irq tracer can be called out of RCU scope.
1505          */
1506         if (tsk == current)
1507                 max_data->uid = current_uid();
1508         else
1509                 max_data->uid = task_uid(tsk);
1510
1511         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512         max_data->policy = tsk->policy;
1513         max_data->rt_priority = tsk->rt_priority;
1514
1515         /* record this tasks comm */
1516         tracing_record_cmdline(tsk);
1517 }
1518
1519 /**
1520  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521  * @tr: tracer
1522  * @tsk: the task with the latency
1523  * @cpu: The cpu that initiated the trace.
1524  * @cond_data: User data associated with a conditional snapshot
1525  *
1526  * Flip the buffers between the @tr and the max_tr and record information
1527  * about which task was the cause of this latency.
1528  */
1529 void
1530 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531               void *cond_data)
1532 {
1533         if (tr->stop_count)
1534                 return;
1535
1536         WARN_ON_ONCE(!irqs_disabled());
1537
1538         if (!tr->allocated_snapshot) {
1539                 /* Only the nop tracer should hit this when disabling */
1540                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541                 return;
1542         }
1543
1544         arch_spin_lock(&tr->max_lock);
1545
1546         /* Inherit the recordable setting from trace_buffer */
1547         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548                 ring_buffer_record_on(tr->max_buffer.buffer);
1549         else
1550                 ring_buffer_record_off(tr->max_buffer.buffer);
1551
1552 #ifdef CONFIG_TRACER_SNAPSHOT
1553         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554                 goto out_unlock;
1555 #endif
1556         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557
1558         __update_max_tr(tr, tsk, cpu);
1559
1560  out_unlock:
1561         arch_spin_unlock(&tr->max_lock);
1562 }
1563
1564 /**
1565  * update_max_tr_single - only copy one trace over, and reset the rest
1566  * @tr - tracer
1567  * @tsk - task with the latency
1568  * @cpu - the cpu of the buffer to copy.
1569  *
1570  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571  */
1572 void
1573 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574 {
1575         int ret;
1576
1577         if (tr->stop_count)
1578                 return;
1579
1580         WARN_ON_ONCE(!irqs_disabled());
1581         if (!tr->allocated_snapshot) {
1582                 /* Only the nop tracer should hit this when disabling */
1583                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584                 return;
1585         }
1586
1587         arch_spin_lock(&tr->max_lock);
1588
1589         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590
1591         if (ret == -EBUSY) {
1592                 /*
1593                  * We failed to swap the buffer due to a commit taking
1594                  * place on this CPU. We fail to record, but we reset
1595                  * the max trace buffer (no one writes directly to it)
1596                  * and flag that it failed.
1597                  */
1598                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599                         "Failed to swap buffers due to commit in progress\n");
1600         }
1601
1602         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603
1604         __update_max_tr(tr, tsk, cpu);
1605         arch_spin_unlock(&tr->max_lock);
1606 }
1607 #endif /* CONFIG_TRACER_MAX_TRACE */
1608
1609 static int wait_on_pipe(struct trace_iterator *iter, int full)
1610 {
1611         /* Iterators are static, they should be filled or empty */
1612         if (trace_buffer_iter(iter, iter->cpu_file))
1613                 return 0;
1614
1615         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616                                 full);
1617 }
1618
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static bool selftests_can_run;
1621
1622 struct trace_selftests {
1623         struct list_head                list;
1624         struct tracer                   *type;
1625 };
1626
1627 static LIST_HEAD(postponed_selftests);
1628
1629 static int save_selftest(struct tracer *type)
1630 {
1631         struct trace_selftests *selftest;
1632
1633         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634         if (!selftest)
1635                 return -ENOMEM;
1636
1637         selftest->type = type;
1638         list_add(&selftest->list, &postponed_selftests);
1639         return 0;
1640 }
1641
1642 static int run_tracer_selftest(struct tracer *type)
1643 {
1644         struct trace_array *tr = &global_trace;
1645         struct tracer *saved_tracer = tr->current_trace;
1646         int ret;
1647
1648         if (!type->selftest || tracing_selftest_disabled)
1649                 return 0;
1650
1651         /*
1652          * If a tracer registers early in boot up (before scheduling is
1653          * initialized and such), then do not run its selftests yet.
1654          * Instead, run it a little later in the boot process.
1655          */
1656         if (!selftests_can_run)
1657                 return save_selftest(type);
1658
1659         /*
1660          * Run a selftest on this tracer.
1661          * Here we reset the trace buffer, and set the current
1662          * tracer to be this tracer. The tracer can then run some
1663          * internal tracing to verify that everything is in order.
1664          * If we fail, we do not register this tracer.
1665          */
1666         tracing_reset_online_cpus(&tr->trace_buffer);
1667
1668         tr->current_trace = type;
1669
1670 #ifdef CONFIG_TRACER_MAX_TRACE
1671         if (type->use_max_tr) {
1672                 /* If we expanded the buffers, make sure the max is expanded too */
1673                 if (ring_buffer_expanded)
1674                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675                                            RING_BUFFER_ALL_CPUS);
1676                 tr->allocated_snapshot = true;
1677         }
1678 #endif
1679
1680         /* the test is responsible for initializing and enabling */
1681         pr_info("Testing tracer %s: ", type->name);
1682         ret = type->selftest(type, tr);
1683         /* the test is responsible for resetting too */
1684         tr->current_trace = saved_tracer;
1685         if (ret) {
1686                 printk(KERN_CONT "FAILED!\n");
1687                 /* Add the warning after printing 'FAILED' */
1688                 WARN_ON(1);
1689                 return -1;
1690         }
1691         /* Only reset on passing, to avoid touching corrupted buffers */
1692         tracing_reset_online_cpus(&tr->trace_buffer);
1693
1694 #ifdef CONFIG_TRACER_MAX_TRACE
1695         if (type->use_max_tr) {
1696                 tr->allocated_snapshot = false;
1697
1698                 /* Shrink the max buffer again */
1699                 if (ring_buffer_expanded)
1700                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1701                                            RING_BUFFER_ALL_CPUS);
1702         }
1703 #endif
1704
1705         printk(KERN_CONT "PASSED\n");
1706         return 0;
1707 }
1708
1709 static __init int init_trace_selftests(void)
1710 {
1711         struct trace_selftests *p, *n;
1712         struct tracer *t, **last;
1713         int ret;
1714
1715         selftests_can_run = true;
1716
1717         mutex_lock(&trace_types_lock);
1718
1719         if (list_empty(&postponed_selftests))
1720                 goto out;
1721
1722         pr_info("Running postponed tracer tests:\n");
1723
1724         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725                 ret = run_tracer_selftest(p->type);
1726                 /* If the test fails, then warn and remove from available_tracers */
1727                 if (ret < 0) {
1728                         WARN(1, "tracer: %s failed selftest, disabling\n",
1729                              p->type->name);
1730                         last = &trace_types;
1731                         for (t = trace_types; t; t = t->next) {
1732                                 if (t == p->type) {
1733                                         *last = t->next;
1734                                         break;
1735                                 }
1736                                 last = &t->next;
1737                         }
1738                 }
1739                 list_del(&p->list);
1740                 kfree(p);
1741         }
1742
1743  out:
1744         mutex_unlock(&trace_types_lock);
1745
1746         return 0;
1747 }
1748 core_initcall(init_trace_selftests);
1749 #else
1750 static inline int run_tracer_selftest(struct tracer *type)
1751 {
1752         return 0;
1753 }
1754 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1755
1756 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757
1758 static void __init apply_trace_boot_options(void);
1759
1760 /**
1761  * register_tracer - register a tracer with the ftrace system.
1762  * @type - the plugin for the tracer
1763  *
1764  * Register a new plugin tracer.
1765  */
1766 int __init register_tracer(struct tracer *type)
1767 {
1768         struct tracer *t;
1769         int ret = 0;
1770
1771         if (!type->name) {
1772                 pr_info("Tracer must have a name\n");
1773                 return -1;
1774         }
1775
1776         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778                 return -1;
1779         }
1780
1781         mutex_lock(&trace_types_lock);
1782
1783         tracing_selftest_running = true;
1784
1785         for (t = trace_types; t; t = t->next) {
1786                 if (strcmp(type->name, t->name) == 0) {
1787                         /* already found */
1788                         pr_info("Tracer %s already registered\n",
1789                                 type->name);
1790                         ret = -1;
1791                         goto out;
1792                 }
1793         }
1794
1795         if (!type->set_flag)
1796                 type->set_flag = &dummy_set_flag;
1797         if (!type->flags) {
1798                 /*allocate a dummy tracer_flags*/
1799                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800                 if (!type->flags) {
1801                         ret = -ENOMEM;
1802                         goto out;
1803                 }
1804                 type->flags->val = 0;
1805                 type->flags->opts = dummy_tracer_opt;
1806         } else
1807                 if (!type->flags->opts)
1808                         type->flags->opts = dummy_tracer_opt;
1809
1810         /* store the tracer for __set_tracer_option */
1811         type->flags->trace = type;
1812
1813         ret = run_tracer_selftest(type);
1814         if (ret < 0)
1815                 goto out;
1816
1817         type->next = trace_types;
1818         trace_types = type;
1819         add_tracer_options(&global_trace, type);
1820
1821  out:
1822         tracing_selftest_running = false;
1823         mutex_unlock(&trace_types_lock);
1824
1825         if (ret || !default_bootup_tracer)
1826                 goto out_unlock;
1827
1828         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829                 goto out_unlock;
1830
1831         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832         /* Do we want this tracer to start on bootup? */
1833         tracing_set_tracer(&global_trace, type->name);
1834         default_bootup_tracer = NULL;
1835
1836         apply_trace_boot_options();
1837
1838         /* disable other selftests, since this will break it. */
1839         tracing_selftest_disabled = true;
1840 #ifdef CONFIG_FTRACE_STARTUP_TEST
1841         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842                type->name);
1843 #endif
1844
1845  out_unlock:
1846         return ret;
1847 }
1848
1849 void tracing_reset(struct trace_buffer *buf, int cpu)
1850 {
1851         struct ring_buffer *buffer = buf->buffer;
1852
1853         if (!buffer)
1854                 return;
1855
1856         ring_buffer_record_disable(buffer);
1857
1858         /* Make sure all commits have finished */
1859         synchronize_rcu();
1860         ring_buffer_reset_cpu(buffer, cpu);
1861
1862         ring_buffer_record_enable(buffer);
1863 }
1864
1865 void tracing_reset_online_cpus(struct trace_buffer *buf)
1866 {
1867         struct ring_buffer *buffer = buf->buffer;
1868         int cpu;
1869
1870         if (!buffer)
1871                 return;
1872
1873         ring_buffer_record_disable(buffer);
1874
1875         /* Make sure all commits have finished */
1876         synchronize_rcu();
1877
1878         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879
1880         for_each_online_cpu(cpu)
1881                 ring_buffer_reset_cpu(buffer, cpu);
1882
1883         ring_buffer_record_enable(buffer);
1884 }
1885
1886 /* Must have trace_types_lock held */
1887 void tracing_reset_all_online_cpus(void)
1888 {
1889         struct trace_array *tr;
1890
1891         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892                 if (!tr->clear_trace)
1893                         continue;
1894                 tr->clear_trace = false;
1895                 tracing_reset_online_cpus(&tr->trace_buffer);
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897                 tracing_reset_online_cpus(&tr->max_buffer);
1898 #endif
1899         }
1900 }
1901
1902 static int *tgid_map;
1903
1904 #define SAVED_CMDLINES_DEFAULT 128
1905 #define NO_CMDLINE_MAP UINT_MAX
1906 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907 struct saved_cmdlines_buffer {
1908         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909         unsigned *map_cmdline_to_pid;
1910         unsigned cmdline_num;
1911         int cmdline_idx;
1912         char *saved_cmdlines;
1913 };
1914 static struct saved_cmdlines_buffer *savedcmd;
1915
1916 /* temporary disable recording */
1917 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918
1919 static inline char *get_saved_cmdlines(int idx)
1920 {
1921         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922 }
1923
1924 static inline void set_cmdline(int idx, const char *cmdline)
1925 {
1926         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927 }
1928
1929 static int allocate_cmdlines_buffer(unsigned int val,
1930                                     struct saved_cmdlines_buffer *s)
1931 {
1932         s->map_cmdline_to_pid = kmalloc_array(val,
1933                                               sizeof(*s->map_cmdline_to_pid),
1934                                               GFP_KERNEL);
1935         if (!s->map_cmdline_to_pid)
1936                 return -ENOMEM;
1937
1938         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939         if (!s->saved_cmdlines) {
1940                 kfree(s->map_cmdline_to_pid);
1941                 return -ENOMEM;
1942         }
1943
1944         s->cmdline_idx = 0;
1945         s->cmdline_num = val;
1946         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947                sizeof(s->map_pid_to_cmdline));
1948         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949                val * sizeof(*s->map_cmdline_to_pid));
1950
1951         return 0;
1952 }
1953
1954 static int trace_create_savedcmd(void)
1955 {
1956         int ret;
1957
1958         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959         if (!savedcmd)
1960                 return -ENOMEM;
1961
1962         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963         if (ret < 0) {
1964                 kfree(savedcmd);
1965                 savedcmd = NULL;
1966                 return -ENOMEM;
1967         }
1968
1969         return 0;
1970 }
1971
1972 int is_tracing_stopped(void)
1973 {
1974         return global_trace.stop_count;
1975 }
1976
1977 /**
1978  * tracing_start - quick start of the tracer
1979  *
1980  * If tracing is enabled but was stopped by tracing_stop,
1981  * this will start the tracer back up.
1982  */
1983 void tracing_start(void)
1984 {
1985         struct ring_buffer *buffer;
1986         unsigned long flags;
1987
1988         if (tracing_disabled)
1989                 return;
1990
1991         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992         if (--global_trace.stop_count) {
1993                 if (global_trace.stop_count < 0) {
1994                         /* Someone screwed up their debugging */
1995                         WARN_ON_ONCE(1);
1996                         global_trace.stop_count = 0;
1997                 }
1998                 goto out;
1999         }
2000
2001         /* Prevent the buffers from switching */
2002         arch_spin_lock(&global_trace.max_lock);
2003
2004         buffer = global_trace.trace_buffer.buffer;
2005         if (buffer)
2006                 ring_buffer_record_enable(buffer);
2007
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009         buffer = global_trace.max_buffer.buffer;
2010         if (buffer)
2011                 ring_buffer_record_enable(buffer);
2012 #endif
2013
2014         arch_spin_unlock(&global_trace.max_lock);
2015
2016  out:
2017         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018 }
2019
2020 static void tracing_start_tr(struct trace_array *tr)
2021 {
2022         struct ring_buffer *buffer;
2023         unsigned long flags;
2024
2025         if (tracing_disabled)
2026                 return;
2027
2028         /* If global, we need to also start the max tracer */
2029         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030                 return tracing_start();
2031
2032         raw_spin_lock_irqsave(&tr->start_lock, flags);
2033
2034         if (--tr->stop_count) {
2035                 if (tr->stop_count < 0) {
2036                         /* Someone screwed up their debugging */
2037                         WARN_ON_ONCE(1);
2038                         tr->stop_count = 0;
2039                 }
2040                 goto out;
2041         }
2042
2043         buffer = tr->trace_buffer.buffer;
2044         if (buffer)
2045                 ring_buffer_record_enable(buffer);
2046
2047  out:
2048         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049 }
2050
2051 /**
2052  * tracing_stop - quick stop of the tracer
2053  *
2054  * Light weight way to stop tracing. Use in conjunction with
2055  * tracing_start.
2056  */
2057 void tracing_stop(void)
2058 {
2059         struct ring_buffer *buffer;
2060         unsigned long flags;
2061
2062         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063         if (global_trace.stop_count++)
2064                 goto out;
2065
2066         /* Prevent the buffers from switching */
2067         arch_spin_lock(&global_trace.max_lock);
2068
2069         buffer = global_trace.trace_buffer.buffer;
2070         if (buffer)
2071                 ring_buffer_record_disable(buffer);
2072
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074         buffer = global_trace.max_buffer.buffer;
2075         if (buffer)
2076                 ring_buffer_record_disable(buffer);
2077 #endif
2078
2079         arch_spin_unlock(&global_trace.max_lock);
2080
2081  out:
2082         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084
2085 static void tracing_stop_tr(struct trace_array *tr)
2086 {
2087         struct ring_buffer *buffer;
2088         unsigned long flags;
2089
2090         /* If global, we need to also stop the max tracer */
2091         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092                 return tracing_stop();
2093
2094         raw_spin_lock_irqsave(&tr->start_lock, flags);
2095         if (tr->stop_count++)
2096                 goto out;
2097
2098         buffer = tr->trace_buffer.buffer;
2099         if (buffer)
2100                 ring_buffer_record_disable(buffer);
2101
2102  out:
2103         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104 }
2105
2106 static int trace_save_cmdline(struct task_struct *tsk)
2107 {
2108         unsigned pid, idx;
2109
2110         /* treat recording of idle task as a success */
2111         if (!tsk->pid)
2112                 return 1;
2113
2114         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115                 return 0;
2116
2117         /*
2118          * It's not the end of the world if we don't get
2119          * the lock, but we also don't want to spin
2120          * nor do we want to disable interrupts,
2121          * so if we miss here, then better luck next time.
2122          */
2123         if (!arch_spin_trylock(&trace_cmdline_lock))
2124                 return 0;
2125
2126         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127         if (idx == NO_CMDLINE_MAP) {
2128                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129
2130                 /*
2131                  * Check whether the cmdline buffer at idx has a pid
2132                  * mapped. We are going to overwrite that entry so we
2133                  * need to clear the map_pid_to_cmdline. Otherwise we
2134                  * would read the new comm for the old pid.
2135                  */
2136                 pid = savedcmd->map_cmdline_to_pid[idx];
2137                 if (pid != NO_CMDLINE_MAP)
2138                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139
2140                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142
2143                 savedcmd->cmdline_idx = idx;
2144         }
2145
2146         set_cmdline(idx, tsk->comm);
2147
2148         arch_spin_unlock(&trace_cmdline_lock);
2149
2150         return 1;
2151 }
2152
2153 static void __trace_find_cmdline(int pid, char comm[])
2154 {
2155         unsigned map;
2156
2157         if (!pid) {
2158                 strcpy(comm, "<idle>");
2159                 return;
2160         }
2161
2162         if (WARN_ON_ONCE(pid < 0)) {
2163                 strcpy(comm, "<XXX>");
2164                 return;
2165         }
2166
2167         if (pid > PID_MAX_DEFAULT) {
2168                 strcpy(comm, "<...>");
2169                 return;
2170         }
2171
2172         map = savedcmd->map_pid_to_cmdline[pid];
2173         if (map != NO_CMDLINE_MAP)
2174                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175         else
2176                 strcpy(comm, "<...>");
2177 }
2178
2179 void trace_find_cmdline(int pid, char comm[])
2180 {
2181         preempt_disable();
2182         arch_spin_lock(&trace_cmdline_lock);
2183
2184         __trace_find_cmdline(pid, comm);
2185
2186         arch_spin_unlock(&trace_cmdline_lock);
2187         preempt_enable();
2188 }
2189
2190 int trace_find_tgid(int pid)
2191 {
2192         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193                 return 0;
2194
2195         return tgid_map[pid];
2196 }
2197
2198 static int trace_save_tgid(struct task_struct *tsk)
2199 {
2200         /* treat recording of idle task as a success */
2201         if (!tsk->pid)
2202                 return 1;
2203
2204         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205                 return 0;
2206
2207         tgid_map[tsk->pid] = tsk->tgid;
2208         return 1;
2209 }
2210
2211 static bool tracing_record_taskinfo_skip(int flags)
2212 {
2213         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214                 return true;
2215         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216                 return true;
2217         if (!__this_cpu_read(trace_taskinfo_save))
2218                 return true;
2219         return false;
2220 }
2221
2222 /**
2223  * tracing_record_taskinfo - record the task info of a task
2224  *
2225  * @task  - task to record
2226  * @flags - TRACE_RECORD_CMDLINE for recording comm
2227  *        - TRACE_RECORD_TGID for recording tgid
2228  */
2229 void tracing_record_taskinfo(struct task_struct *task, int flags)
2230 {
2231         bool done;
2232
2233         if (tracing_record_taskinfo_skip(flags))
2234                 return;
2235
2236         /*
2237          * Record as much task information as possible. If some fail, continue
2238          * to try to record the others.
2239          */
2240         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242
2243         /* If recording any information failed, retry again soon. */
2244         if (!done)
2245                 return;
2246
2247         __this_cpu_write(trace_taskinfo_save, false);
2248 }
2249
2250 /**
2251  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252  *
2253  * @prev - previous task during sched_switch
2254  * @next - next task during sched_switch
2255  * @flags - TRACE_RECORD_CMDLINE for recording comm
2256  *          TRACE_RECORD_TGID for recording tgid
2257  */
2258 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259                                           struct task_struct *next, int flags)
2260 {
2261         bool done;
2262
2263         if (tracing_record_taskinfo_skip(flags))
2264                 return;
2265
2266         /*
2267          * Record as much task information as possible. If some fail, continue
2268          * to try to record the others.
2269          */
2270         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274
2275         /* If recording any information failed, retry again soon. */
2276         if (!done)
2277                 return;
2278
2279         __this_cpu_write(trace_taskinfo_save, false);
2280 }
2281
2282 /* Helpers to record a specific task information */
2283 void tracing_record_cmdline(struct task_struct *task)
2284 {
2285         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286 }
2287
2288 void tracing_record_tgid(struct task_struct *task)
2289 {
2290         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291 }
2292
2293 /*
2294  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296  * simplifies those functions and keeps them in sync.
2297  */
2298 enum print_line_t trace_handle_return(struct trace_seq *s)
2299 {
2300         return trace_seq_has_overflowed(s) ?
2301                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302 }
2303 EXPORT_SYMBOL_GPL(trace_handle_return);
2304
2305 void
2306 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307                              int pc)
2308 {
2309         struct task_struct *tsk = current;
2310
2311         entry->preempt_count            = pc & 0xff;
2312         entry->pid                      = (tsk) ? tsk->pid : 0;
2313         entry->flags =
2314 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316 #else
2317                 TRACE_FLAG_IRQS_NOSUPPORT |
2318 #endif
2319                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2320                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324 }
2325 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326
2327 struct ring_buffer_event *
2328 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329                           int type,
2330                           unsigned long len,
2331                           unsigned long flags, int pc)
2332 {
2333         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334 }
2335
2336 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338 static int trace_buffered_event_ref;
2339
2340 /**
2341  * trace_buffered_event_enable - enable buffering events
2342  *
2343  * When events are being filtered, it is quicker to use a temporary
2344  * buffer to write the event data into if there's a likely chance
2345  * that it will not be committed. The discard of the ring buffer
2346  * is not as fast as committing, and is much slower than copying
2347  * a commit.
2348  *
2349  * When an event is to be filtered, allocate per cpu buffers to
2350  * write the event data into, and if the event is filtered and discarded
2351  * it is simply dropped, otherwise, the entire data is to be committed
2352  * in one shot.
2353  */
2354 void trace_buffered_event_enable(void)
2355 {
2356         struct ring_buffer_event *event;
2357         struct page *page;
2358         int cpu;
2359
2360         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361
2362         if (trace_buffered_event_ref++)
2363                 return;
2364
2365         for_each_tracing_cpu(cpu) {
2366                 page = alloc_pages_node(cpu_to_node(cpu),
2367                                         GFP_KERNEL | __GFP_NORETRY, 0);
2368                 if (!page)
2369                         goto failed;
2370
2371                 event = page_address(page);
2372                 memset(event, 0, sizeof(*event));
2373
2374                 per_cpu(trace_buffered_event, cpu) = event;
2375
2376                 preempt_disable();
2377                 if (cpu == smp_processor_id() &&
2378                     this_cpu_read(trace_buffered_event) !=
2379                     per_cpu(trace_buffered_event, cpu))
2380                         WARN_ON_ONCE(1);
2381                 preempt_enable();
2382         }
2383
2384         return;
2385  failed:
2386         trace_buffered_event_disable();
2387 }
2388
2389 static void enable_trace_buffered_event(void *data)
2390 {
2391         /* Probably not needed, but do it anyway */
2392         smp_rmb();
2393         this_cpu_dec(trace_buffered_event_cnt);
2394 }
2395
2396 static void disable_trace_buffered_event(void *data)
2397 {
2398         this_cpu_inc(trace_buffered_event_cnt);
2399 }
2400
2401 /**
2402  * trace_buffered_event_disable - disable buffering events
2403  *
2404  * When a filter is removed, it is faster to not use the buffered
2405  * events, and to commit directly into the ring buffer. Free up
2406  * the temp buffers when there are no more users. This requires
2407  * special synchronization with current events.
2408  */
2409 void trace_buffered_event_disable(void)
2410 {
2411         int cpu;
2412
2413         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414
2415         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416                 return;
2417
2418         if (--trace_buffered_event_ref)
2419                 return;
2420
2421         preempt_disable();
2422         /* For each CPU, set the buffer as used. */
2423         smp_call_function_many(tracing_buffer_mask,
2424                                disable_trace_buffered_event, NULL, 1);
2425         preempt_enable();
2426
2427         /* Wait for all current users to finish */
2428         synchronize_rcu();
2429
2430         for_each_tracing_cpu(cpu) {
2431                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432                 per_cpu(trace_buffered_event, cpu) = NULL;
2433         }
2434         /*
2435          * Make sure trace_buffered_event is NULL before clearing
2436          * trace_buffered_event_cnt.
2437          */
2438         smp_wmb();
2439
2440         preempt_disable();
2441         /* Do the work on each cpu */
2442         smp_call_function_many(tracing_buffer_mask,
2443                                enable_trace_buffered_event, NULL, 1);
2444         preempt_enable();
2445 }
2446
2447 static struct ring_buffer *temp_buffer;
2448
2449 struct ring_buffer_event *
2450 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451                           struct trace_event_file *trace_file,
2452                           int type, unsigned long len,
2453                           unsigned long flags, int pc)
2454 {
2455         struct ring_buffer_event *entry;
2456         int val;
2457
2458         *current_rb = trace_file->tr->trace_buffer.buffer;
2459
2460         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462             (entry = this_cpu_read(trace_buffered_event))) {
2463                 /* Try to use the per cpu buffer first */
2464                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2465                 if (val == 1) {
2466                         trace_event_setup(entry, type, flags, pc);
2467                         entry->array[0] = len;
2468                         return entry;
2469                 }
2470                 this_cpu_dec(trace_buffered_event_cnt);
2471         }
2472
2473         entry = __trace_buffer_lock_reserve(*current_rb,
2474                                             type, len, flags, pc);
2475         /*
2476          * If tracing is off, but we have triggers enabled
2477          * we still need to look at the event data. Use the temp_buffer
2478          * to store the trace event for the tigger to use. It's recusive
2479          * safe and will not be recorded anywhere.
2480          */
2481         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482                 *current_rb = temp_buffer;
2483                 entry = __trace_buffer_lock_reserve(*current_rb,
2484                                                     type, len, flags, pc);
2485         }
2486         return entry;
2487 }
2488 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489
2490 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491 static DEFINE_MUTEX(tracepoint_printk_mutex);
2492
2493 static void output_printk(struct trace_event_buffer *fbuffer)
2494 {
2495         struct trace_event_call *event_call;
2496         struct trace_event *event;
2497         unsigned long flags;
2498         struct trace_iterator *iter = tracepoint_print_iter;
2499
2500         /* We should never get here if iter is NULL */
2501         if (WARN_ON_ONCE(!iter))
2502                 return;
2503
2504         event_call = fbuffer->trace_file->event_call;
2505         if (!event_call || !event_call->event.funcs ||
2506             !event_call->event.funcs->trace)
2507                 return;
2508
2509         event = &fbuffer->trace_file->event_call->event;
2510
2511         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512         trace_seq_init(&iter->seq);
2513         iter->ent = fbuffer->entry;
2514         event_call->event.funcs->trace(iter, 0, event);
2515         trace_seq_putc(&iter->seq, 0);
2516         printk("%s", iter->seq.buffer);
2517
2518         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519 }
2520
2521 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522                              void __user *buffer, size_t *lenp,
2523                              loff_t *ppos)
2524 {
2525         int save_tracepoint_printk;
2526         int ret;
2527
2528         mutex_lock(&tracepoint_printk_mutex);
2529         save_tracepoint_printk = tracepoint_printk;
2530
2531         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532
2533         /*
2534          * This will force exiting early, as tracepoint_printk
2535          * is always zero when tracepoint_printk_iter is not allocated
2536          */
2537         if (!tracepoint_print_iter)
2538                 tracepoint_printk = 0;
2539
2540         if (save_tracepoint_printk == tracepoint_printk)
2541                 goto out;
2542
2543         if (tracepoint_printk)
2544                 static_key_enable(&tracepoint_printk_key.key);
2545         else
2546                 static_key_disable(&tracepoint_printk_key.key);
2547
2548  out:
2549         mutex_unlock(&tracepoint_printk_mutex);
2550
2551         return ret;
2552 }
2553
2554 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555 {
2556         if (static_key_false(&tracepoint_printk_key.key))
2557                 output_printk(fbuffer);
2558
2559         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560                                     fbuffer->event, fbuffer->entry,
2561                                     fbuffer->flags, fbuffer->pc);
2562 }
2563 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564
2565 /*
2566  * Skip 3:
2567  *
2568  *   trace_buffer_unlock_commit_regs()
2569  *   trace_event_buffer_commit()
2570  *   trace_event_raw_event_xxx()
2571  */
2572 # define STACK_SKIP 3
2573
2574 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575                                      struct ring_buffer *buffer,
2576                                      struct ring_buffer_event *event,
2577                                      unsigned long flags, int pc,
2578                                      struct pt_regs *regs)
2579 {
2580         __buffer_unlock_commit(buffer, event);
2581
2582         /*
2583          * If regs is not set, then skip the necessary functions.
2584          * Note, we can still get here via blktrace, wakeup tracer
2585          * and mmiotrace, but that's ok if they lose a function or
2586          * two. They are not that meaningful.
2587          */
2588         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589         ftrace_trace_userstack(buffer, flags, pc);
2590 }
2591
2592 /*
2593  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594  */
2595 void
2596 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597                                    struct ring_buffer_event *event)
2598 {
2599         __buffer_unlock_commit(buffer, event);
2600 }
2601
2602 static void
2603 trace_process_export(struct trace_export *export,
2604                struct ring_buffer_event *event)
2605 {
2606         struct trace_entry *entry;
2607         unsigned int size = 0;
2608
2609         entry = ring_buffer_event_data(event);
2610         size = ring_buffer_event_length(event);
2611         export->write(export, entry, size);
2612 }
2613
2614 static DEFINE_MUTEX(ftrace_export_lock);
2615
2616 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617
2618 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619
2620 static inline void ftrace_exports_enable(void)
2621 {
2622         static_branch_enable(&ftrace_exports_enabled);
2623 }
2624
2625 static inline void ftrace_exports_disable(void)
2626 {
2627         static_branch_disable(&ftrace_exports_enabled);
2628 }
2629
2630 static void ftrace_exports(struct ring_buffer_event *event)
2631 {
2632         struct trace_export *export;
2633
2634         preempt_disable_notrace();
2635
2636         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637         while (export) {
2638                 trace_process_export(export, event);
2639                 export = rcu_dereference_raw_notrace(export->next);
2640         }
2641
2642         preempt_enable_notrace();
2643 }
2644
2645 static inline void
2646 add_trace_export(struct trace_export **list, struct trace_export *export)
2647 {
2648         rcu_assign_pointer(export->next, *list);
2649         /*
2650          * We are entering export into the list but another
2651          * CPU might be walking that list. We need to make sure
2652          * the export->next pointer is valid before another CPU sees
2653          * the export pointer included into the list.
2654          */
2655         rcu_assign_pointer(*list, export);
2656 }
2657
2658 static inline int
2659 rm_trace_export(struct trace_export **list, struct trace_export *export)
2660 {
2661         struct trace_export **p;
2662
2663         for (p = list; *p != NULL; p = &(*p)->next)
2664                 if (*p == export)
2665                         break;
2666
2667         if (*p != export)
2668                 return -1;
2669
2670         rcu_assign_pointer(*p, (*p)->next);
2671
2672         return 0;
2673 }
2674
2675 static inline void
2676 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677 {
2678         if (*list == NULL)
2679                 ftrace_exports_enable();
2680
2681         add_trace_export(list, export);
2682 }
2683
2684 static inline int
2685 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687         int ret;
2688
2689         ret = rm_trace_export(list, export);
2690         if (*list == NULL)
2691                 ftrace_exports_disable();
2692
2693         return ret;
2694 }
2695
2696 int register_ftrace_export(struct trace_export *export)
2697 {
2698         if (WARN_ON_ONCE(!export->write))
2699                 return -1;
2700
2701         mutex_lock(&ftrace_export_lock);
2702
2703         add_ftrace_export(&ftrace_exports_list, export);
2704
2705         mutex_unlock(&ftrace_export_lock);
2706
2707         return 0;
2708 }
2709 EXPORT_SYMBOL_GPL(register_ftrace_export);
2710
2711 int unregister_ftrace_export(struct trace_export *export)
2712 {
2713         int ret;
2714
2715         mutex_lock(&ftrace_export_lock);
2716
2717         ret = rm_ftrace_export(&ftrace_exports_list, export);
2718
2719         mutex_unlock(&ftrace_export_lock);
2720
2721         return ret;
2722 }
2723 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724
2725 void
2726 trace_function(struct trace_array *tr,
2727                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728                int pc)
2729 {
2730         struct trace_event_call *call = &event_function;
2731         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732         struct ring_buffer_event *event;
2733         struct ftrace_entry *entry;
2734
2735         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736                                             flags, pc);
2737         if (!event)
2738                 return;
2739         entry   = ring_buffer_event_data(event);
2740         entry->ip                       = ip;
2741         entry->parent_ip                = parent_ip;
2742
2743         if (!call_filter_check_discard(call, entry, buffer, event)) {
2744                 if (static_branch_unlikely(&ftrace_exports_enabled))
2745                         ftrace_exports(event);
2746                 __buffer_unlock_commit(buffer, event);
2747         }
2748 }
2749
2750 #ifdef CONFIG_STACKTRACE
2751
2752 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753 struct ftrace_stack {
2754         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2755 };
2756
2757 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759
2760 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761                                  unsigned long flags,
2762                                  int skip, int pc, struct pt_regs *regs)
2763 {
2764         struct trace_event_call *call = &event_kernel_stack;
2765         struct ring_buffer_event *event;
2766         struct stack_entry *entry;
2767         struct stack_trace trace;
2768         int use_stack;
2769         int size = FTRACE_STACK_ENTRIES;
2770
2771         trace.nr_entries        = 0;
2772         trace.skip              = skip;
2773
2774         /*
2775          * Add one, for this function and the call to save_stack_trace()
2776          * If regs is set, then these functions will not be in the way.
2777          */
2778 #ifndef CONFIG_UNWINDER_ORC
2779         if (!regs)
2780                 trace.skip++;
2781 #endif
2782
2783         /*
2784          * Since events can happen in NMIs there's no safe way to
2785          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786          * or NMI comes in, it will just have to use the default
2787          * FTRACE_STACK_SIZE.
2788          */
2789         preempt_disable_notrace();
2790
2791         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792         /*
2793          * We don't need any atomic variables, just a barrier.
2794          * If an interrupt comes in, we don't care, because it would
2795          * have exited and put the counter back to what we want.
2796          * We just need a barrier to keep gcc from moving things
2797          * around.
2798          */
2799         barrier();
2800         if (use_stack == 1) {
2801                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2802                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2803
2804                 if (regs)
2805                         save_stack_trace_regs(regs, &trace);
2806                 else
2807                         save_stack_trace(&trace);
2808
2809                 if (trace.nr_entries > size)
2810                         size = trace.nr_entries;
2811         } else
2812                 /* From now on, use_stack is a boolean */
2813                 use_stack = 0;
2814
2815         size *= sizeof(unsigned long);
2816
2817         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818                                             sizeof(*entry) + size, flags, pc);
2819         if (!event)
2820                 goto out;
2821         entry = ring_buffer_event_data(event);
2822
2823         memset(&entry->caller, 0, size);
2824
2825         if (use_stack)
2826                 memcpy(&entry->caller, trace.entries,
2827                        trace.nr_entries * sizeof(unsigned long));
2828         else {
2829                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2830                 trace.entries           = entry->caller;
2831                 if (regs)
2832                         save_stack_trace_regs(regs, &trace);
2833                 else
2834                         save_stack_trace(&trace);
2835         }
2836
2837         entry->size = trace.nr_entries;
2838
2839         if (!call_filter_check_discard(call, entry, buffer, event))
2840                 __buffer_unlock_commit(buffer, event);
2841
2842  out:
2843         /* Again, don't let gcc optimize things here */
2844         barrier();
2845         __this_cpu_dec(ftrace_stack_reserve);
2846         preempt_enable_notrace();
2847
2848 }
2849
2850 static inline void ftrace_trace_stack(struct trace_array *tr,
2851                                       struct ring_buffer *buffer,
2852                                       unsigned long flags,
2853                                       int skip, int pc, struct pt_regs *regs)
2854 {
2855         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856                 return;
2857
2858         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859 }
2860
2861 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862                    int pc)
2863 {
2864         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865
2866         if (rcu_is_watching()) {
2867                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868                 return;
2869         }
2870
2871         /*
2872          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873          * but if the above rcu_is_watching() failed, then the NMI
2874          * triggered someplace critical, and rcu_irq_enter() should
2875          * not be called from NMI.
2876          */
2877         if (unlikely(in_nmi()))
2878                 return;
2879
2880         rcu_irq_enter_irqson();
2881         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882         rcu_irq_exit_irqson();
2883 }
2884
2885 /**
2886  * trace_dump_stack - record a stack back trace in the trace buffer
2887  * @skip: Number of functions to skip (helper handlers)
2888  */
2889 void trace_dump_stack(int skip)
2890 {
2891         unsigned long flags;
2892
2893         if (tracing_disabled || tracing_selftest_running)
2894                 return;
2895
2896         local_save_flags(flags);
2897
2898 #ifndef CONFIG_UNWINDER_ORC
2899         /* Skip 1 to skip this function. */
2900         skip++;
2901 #endif
2902         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903                              flags, skip, preempt_count(), NULL);
2904 }
2905 EXPORT_SYMBOL_GPL(trace_dump_stack);
2906
2907 static DEFINE_PER_CPU(int, user_stack_count);
2908
2909 void
2910 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911 {
2912         struct trace_event_call *call = &event_user_stack;
2913         struct ring_buffer_event *event;
2914         struct userstack_entry *entry;
2915         struct stack_trace trace;
2916
2917         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918                 return;
2919
2920         /*
2921          * NMIs can not handle page faults, even with fix ups.
2922          * The save user stack can (and often does) fault.
2923          */
2924         if (unlikely(in_nmi()))
2925                 return;
2926
2927         /*
2928          * prevent recursion, since the user stack tracing may
2929          * trigger other kernel events.
2930          */
2931         preempt_disable();
2932         if (__this_cpu_read(user_stack_count))
2933                 goto out;
2934
2935         __this_cpu_inc(user_stack_count);
2936
2937         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938                                             sizeof(*entry), flags, pc);
2939         if (!event)
2940                 goto out_drop_count;
2941         entry   = ring_buffer_event_data(event);
2942
2943         entry->tgid             = current->tgid;
2944         memset(&entry->caller, 0, sizeof(entry->caller));
2945
2946         trace.nr_entries        = 0;
2947         trace.max_entries       = FTRACE_STACK_ENTRIES;
2948         trace.skip              = 0;
2949         trace.entries           = entry->caller;
2950
2951         save_stack_trace_user(&trace);
2952         if (!call_filter_check_discard(call, entry, buffer, event))
2953                 __buffer_unlock_commit(buffer, event);
2954
2955  out_drop_count:
2956         __this_cpu_dec(user_stack_count);
2957  out:
2958         preempt_enable();
2959 }
2960
2961 #ifdef UNUSED
2962 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963 {
2964         ftrace_trace_userstack(tr, flags, preempt_count());
2965 }
2966 #endif /* UNUSED */
2967
2968 #endif /* CONFIG_STACKTRACE */
2969
2970 /* created for use with alloc_percpu */
2971 struct trace_buffer_struct {
2972         int nesting;
2973         char buffer[4][TRACE_BUF_SIZE];
2974 };
2975
2976 static struct trace_buffer_struct *trace_percpu_buffer;
2977
2978 /*
2979  * Thise allows for lockless recording.  If we're nested too deeply, then
2980  * this returns NULL.
2981  */
2982 static char *get_trace_buf(void)
2983 {
2984         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985
2986         if (!buffer || buffer->nesting >= 4)
2987                 return NULL;
2988
2989         buffer->nesting++;
2990
2991         /* Interrupts must see nesting incremented before we use the buffer */
2992         barrier();
2993         return &buffer->buffer[buffer->nesting][0];
2994 }
2995
2996 static void put_trace_buf(void)
2997 {
2998         /* Don't let the decrement of nesting leak before this */
2999         barrier();
3000         this_cpu_dec(trace_percpu_buffer->nesting);
3001 }
3002
3003 static int alloc_percpu_trace_buffer(void)
3004 {
3005         struct trace_buffer_struct *buffers;
3006
3007         buffers = alloc_percpu(struct trace_buffer_struct);
3008         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009                 return -ENOMEM;
3010
3011         trace_percpu_buffer = buffers;
3012         return 0;
3013 }
3014
3015 static int buffers_allocated;
3016
3017 void trace_printk_init_buffers(void)
3018 {
3019         if (buffers_allocated)
3020                 return;
3021
3022         if (alloc_percpu_trace_buffer())
3023                 return;
3024
3025         /* trace_printk() is for debug use only. Don't use it in production. */
3026
3027         pr_warn("\n");
3028         pr_warn("**********************************************************\n");
3029         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3030         pr_warn("**                                                      **\n");
3031         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3032         pr_warn("**                                                      **\n");
3033         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3034         pr_warn("** unsafe for production use.                           **\n");
3035         pr_warn("**                                                      **\n");
3036         pr_warn("** If you see this message and you are not debugging    **\n");
3037         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3038         pr_warn("**                                                      **\n");
3039         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3040         pr_warn("**********************************************************\n");
3041
3042         /* Expand the buffers to set size */
3043         tracing_update_buffers();
3044
3045         buffers_allocated = 1;
3046
3047         /*
3048          * trace_printk_init_buffers() can be called by modules.
3049          * If that happens, then we need to start cmdline recording
3050          * directly here. If the global_trace.buffer is already
3051          * allocated here, then this was called by module code.
3052          */
3053         if (global_trace.trace_buffer.buffer)
3054                 tracing_start_cmdline_record();
3055 }
3056
3057 void trace_printk_start_comm(void)
3058 {
3059         /* Start tracing comms if trace printk is set */
3060         if (!buffers_allocated)
3061                 return;
3062         tracing_start_cmdline_record();
3063 }
3064
3065 static void trace_printk_start_stop_comm(int enabled)
3066 {
3067         if (!buffers_allocated)
3068                 return;
3069
3070         if (enabled)
3071                 tracing_start_cmdline_record();
3072         else
3073                 tracing_stop_cmdline_record();
3074 }
3075
3076 /**
3077  * trace_vbprintk - write binary msg to tracing buffer
3078  *
3079  */
3080 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082         struct trace_event_call *call = &event_bprint;
3083         struct ring_buffer_event *event;
3084         struct ring_buffer *buffer;
3085         struct trace_array *tr = &global_trace;
3086         struct bprint_entry *entry;
3087         unsigned long flags;
3088         char *tbuffer;
3089         int len = 0, size, pc;
3090
3091         if (unlikely(tracing_selftest_running || tracing_disabled))
3092                 return 0;
3093
3094         /* Don't pollute graph traces with trace_vprintk internals */
3095         pause_graph_tracing();
3096
3097         pc = preempt_count();
3098         preempt_disable_notrace();
3099
3100         tbuffer = get_trace_buf();
3101         if (!tbuffer) {
3102                 len = 0;
3103                 goto out_nobuffer;
3104         }
3105
3106         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3107
3108         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3109                 goto out;
3110
3111         local_save_flags(flags);
3112         size = sizeof(*entry) + sizeof(u32) * len;
3113         buffer = tr->trace_buffer.buffer;
3114         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3115                                             flags, pc);
3116         if (!event)
3117                 goto out;
3118         entry = ring_buffer_event_data(event);
3119         entry->ip                       = ip;
3120         entry->fmt                      = fmt;
3121
3122         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3123         if (!call_filter_check_discard(call, entry, buffer, event)) {
3124                 __buffer_unlock_commit(buffer, event);
3125                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3126         }
3127
3128 out:
3129         put_trace_buf();
3130
3131 out_nobuffer:
3132         preempt_enable_notrace();
3133         unpause_graph_tracing();
3134
3135         return len;
3136 }
3137 EXPORT_SYMBOL_GPL(trace_vbprintk);
3138
3139 __printf(3, 0)
3140 static int
3141 __trace_array_vprintk(struct ring_buffer *buffer,
3142                       unsigned long ip, const char *fmt, va_list args)
3143 {
3144         struct trace_event_call *call = &event_print;
3145         struct ring_buffer_event *event;
3146         int len = 0, size, pc;
3147         struct print_entry *entry;
3148         unsigned long flags;
3149         char *tbuffer;
3150
3151         if (tracing_disabled || tracing_selftest_running)
3152                 return 0;
3153
3154         /* Don't pollute graph traces with trace_vprintk internals */
3155         pause_graph_tracing();
3156
3157         pc = preempt_count();
3158         preempt_disable_notrace();
3159
3160
3161         tbuffer = get_trace_buf();
3162         if (!tbuffer) {
3163                 len = 0;
3164                 goto out_nobuffer;
3165         }
3166
3167         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3168
3169         local_save_flags(flags);
3170         size = sizeof(*entry) + len + 1;
3171         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3172                                             flags, pc);
3173         if (!event)
3174                 goto out;
3175         entry = ring_buffer_event_data(event);
3176         entry->ip = ip;
3177
3178         memcpy(&entry->buf, tbuffer, len + 1);
3179         if (!call_filter_check_discard(call, entry, buffer, event)) {
3180                 __buffer_unlock_commit(buffer, event);
3181                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3182         }
3183
3184 out:
3185         put_trace_buf();
3186
3187 out_nobuffer:
3188         preempt_enable_notrace();
3189         unpause_graph_tracing();
3190
3191         return len;
3192 }
3193
3194 __printf(3, 0)
3195 int trace_array_vprintk(struct trace_array *tr,
3196                         unsigned long ip, const char *fmt, va_list args)
3197 {
3198         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3199 }
3200
3201 __printf(3, 0)
3202 int trace_array_printk(struct trace_array *tr,
3203                        unsigned long ip, const char *fmt, ...)
3204 {
3205         int ret;
3206         va_list ap;
3207
3208         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3209                 return 0;
3210
3211         va_start(ap, fmt);
3212         ret = trace_array_vprintk(tr, ip, fmt, ap);
3213         va_end(ap);
3214         return ret;
3215 }
3216
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219                            unsigned long ip, const char *fmt, ...)
3220 {
3221         int ret;
3222         va_list ap;
3223
3224         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225                 return 0;
3226
3227         va_start(ap, fmt);
3228         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229         va_end(ap);
3230         return ret;
3231 }
3232
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236         return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243
3244         iter->idx++;
3245         if (buf_iter)
3246                 ring_buffer_read(buf_iter, NULL);
3247 }
3248
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251                 unsigned long *lost_events)
3252 {
3253         struct ring_buffer_event *event;
3254         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255
3256         if (buf_iter)
3257                 event = ring_buffer_iter_peek(buf_iter, ts);
3258         else
3259                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260                                          lost_events);
3261
3262         if (event) {
3263                 iter->ent_size = ring_buffer_event_length(event);
3264                 return ring_buffer_event_data(event);
3265         }
3266         iter->ent_size = 0;
3267         return NULL;
3268 }
3269
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272                   unsigned long *missing_events, u64 *ent_ts)
3273 {
3274         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275         struct trace_entry *ent, *next = NULL;
3276         unsigned long lost_events = 0, next_lost = 0;
3277         int cpu_file = iter->cpu_file;
3278         u64 next_ts = 0, ts;
3279         int next_cpu = -1;
3280         int next_size = 0;
3281         int cpu;
3282
3283         /*
3284          * If we are in a per_cpu trace file, don't bother by iterating over
3285          * all cpu and peek directly.
3286          */
3287         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3289                         return NULL;
3290                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291                 if (ent_cpu)
3292                         *ent_cpu = cpu_file;
3293
3294                 return ent;
3295         }
3296
3297         for_each_tracing_cpu(cpu) {
3298
3299                 if (ring_buffer_empty_cpu(buffer, cpu))
3300                         continue;
3301
3302                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303
3304                 /*
3305                  * Pick the entry with the smallest timestamp:
3306                  */
3307                 if (ent && (!next || ts < next_ts)) {
3308                         next = ent;
3309                         next_cpu = cpu;
3310                         next_ts = ts;
3311                         next_lost = lost_events;
3312                         next_size = iter->ent_size;
3313                 }
3314         }
3315
3316         iter->ent_size = next_size;
3317
3318         if (ent_cpu)
3319                 *ent_cpu = next_cpu;
3320
3321         if (ent_ts)
3322                 *ent_ts = next_ts;
3323
3324         if (missing_events)
3325                 *missing_events = next_lost;
3326
3327         return next;
3328 }
3329
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332                                           int *ent_cpu, u64 *ent_ts)
3333 {
3334         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340         iter->ent = __find_next_entry(iter, &iter->cpu,
3341                                       &iter->lost_events, &iter->ts);
3342
3343         if (iter->ent)
3344                 trace_iterator_increment(iter);
3345
3346         return iter->ent ? iter : NULL;
3347 }
3348
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352                             &iter->lost_events);
3353 }
3354
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357         struct trace_iterator *iter = m->private;
3358         int i = (int)*pos;
3359         void *ent;
3360
3361         WARN_ON_ONCE(iter->leftover);
3362
3363         (*pos)++;
3364
3365         /* can't go backwards */
3366         if (iter->idx > i)
3367                 return NULL;
3368
3369         if (iter->idx < 0)
3370                 ent = trace_find_next_entry_inc(iter);
3371         else
3372                 ent = iter;
3373
3374         while (ent && iter->idx < i)
3375                 ent = trace_find_next_entry_inc(iter);
3376
3377         iter->pos = *pos;
3378
3379         return ent;
3380 }
3381
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384         struct ring_buffer_event *event;
3385         struct ring_buffer_iter *buf_iter;
3386         unsigned long entries = 0;
3387         u64 ts;
3388
3389         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390
3391         buf_iter = trace_buffer_iter(iter, cpu);
3392         if (!buf_iter)
3393                 return;
3394
3395         ring_buffer_iter_reset(buf_iter);
3396
3397         /*
3398          * We could have the case with the max latency tracers
3399          * that a reset never took place on a cpu. This is evident
3400          * by the timestamp being before the start of the buffer.
3401          */
3402         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403                 if (ts >= iter->trace_buffer->time_start)
3404                         break;
3405                 entries++;
3406                 ring_buffer_read(buf_iter, NULL);
3407         }
3408
3409         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418         struct trace_iterator *iter = m->private;
3419         struct trace_array *tr = iter->tr;
3420         int cpu_file = iter->cpu_file;
3421         void *p = NULL;
3422         loff_t l = 0;
3423         int cpu;
3424
3425         /*
3426          * copy the tracer to avoid using a global lock all around.
3427          * iter->trace is a copy of current_trace, the pointer to the
3428          * name may be used instead of a strcmp(), as iter->trace->name
3429          * will point to the same string as current_trace->name.
3430          */
3431         mutex_lock(&trace_types_lock);
3432         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433                 *iter->trace = *tr->current_trace;
3434         mutex_unlock(&trace_types_lock);
3435
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437         if (iter->snapshot && iter->trace->use_max_tr)
3438                 return ERR_PTR(-EBUSY);
3439 #endif
3440
3441         if (!iter->snapshot)
3442                 atomic_inc(&trace_record_taskinfo_disabled);
3443
3444         if (*pos != iter->pos) {
3445                 iter->ent = NULL;
3446                 iter->cpu = 0;
3447                 iter->idx = -1;
3448
3449                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450                         for_each_tracing_cpu(cpu)
3451                                 tracing_iter_reset(iter, cpu);
3452                 } else
3453                         tracing_iter_reset(iter, cpu_file);
3454
3455                 iter->leftover = 0;
3456                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457                         ;
3458
3459         } else {
3460                 /*
3461                  * If we overflowed the seq_file before, then we want
3462                  * to just reuse the trace_seq buffer again.
3463                  */
3464                 if (iter->leftover)
3465                         p = iter;
3466                 else {
3467                         l = *pos - 1;
3468                         p = s_next(m, p, &l);
3469                 }
3470         }
3471
3472         trace_event_read_lock();
3473         trace_access_lock(cpu_file);
3474         return p;
3475 }
3476
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479         struct trace_iterator *iter = m->private;
3480
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482         if (iter->snapshot && iter->trace->use_max_tr)
3483                 return;
3484 #endif
3485
3486         if (!iter->snapshot)
3487                 atomic_dec(&trace_record_taskinfo_disabled);
3488
3489         trace_access_unlock(iter->cpu_file);
3490         trace_event_read_unlock();
3491 }
3492
3493 static void
3494 get_total_entries(struct trace_buffer *buf,
3495                   unsigned long *total, unsigned long *entries)
3496 {
3497         unsigned long count;
3498         int cpu;
3499
3500         *total = 0;
3501         *entries = 0;
3502
3503         for_each_tracing_cpu(cpu) {
3504                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3505                 /*
3506                  * If this buffer has skipped entries, then we hold all
3507                  * entries for the trace and we need to ignore the
3508                  * ones before the time stamp.
3509                  */
3510                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3511                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3512                         /* total is the same as the entries */
3513                         *total += count;
3514                 } else
3515                         *total += count +
3516                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3517                 *entries += count;
3518         }
3519 }
3520
3521 static void print_lat_help_header(struct seq_file *m)
3522 {
3523         seq_puts(m, "#                  _------=> CPU#            \n"
3524                     "#                 / _-----=> irqs-off        \n"
3525                     "#                | / _----=> need-resched    \n"
3526                     "#                || / _---=> hardirq/softirq \n"
3527                     "#                ||| / _--=> preempt-depth   \n"
3528                     "#                |||| /     delay            \n"
3529                     "#  cmd     pid   ||||| time  |   caller      \n"
3530                     "#     \\   /      |||||  \\    |   /         \n");
3531 }
3532
3533 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3534 {
3535         unsigned long total;
3536         unsigned long entries;
3537
3538         get_total_entries(buf, &total, &entries);
3539         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3540                    entries, total, num_online_cpus());
3541         seq_puts(m, "#\n");
3542 }
3543
3544 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3545                                    unsigned int flags)
3546 {
3547         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3548
3549         print_event_info(buf, m);
3550
3551         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3552         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3553 }
3554
3555 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3556                                        unsigned int flags)
3557 {
3558         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3559         const char tgid_space[] = "          ";
3560         const char space[] = "  ";
3561
3562         print_event_info(buf, m);
3563
3564         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3565                    tgid ? tgid_space : space);
3566         seq_printf(m, "#                          %s / _----=> need-resched\n",
3567                    tgid ? tgid_space : space);
3568         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3569                    tgid ? tgid_space : space);
3570         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3571                    tgid ? tgid_space : space);
3572         seq_printf(m, "#                          %s||| /     delay\n",
3573                    tgid ? tgid_space : space);
3574         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3575                    tgid ? "   TGID   " : space);
3576         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3577                    tgid ? "     |    " : space);
3578 }
3579
3580 void
3581 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3582 {
3583         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3584         struct trace_buffer *buf = iter->trace_buffer;
3585         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3586         struct tracer *type = iter->trace;
3587         unsigned long entries;
3588         unsigned long total;
3589         const char *name = "preemption";
3590
3591         name = type->name;
3592
3593         get_total_entries(buf, &total, &entries);
3594
3595         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3596                    name, UTS_RELEASE);
3597         seq_puts(m, "# -----------------------------------"
3598                  "---------------------------------\n");
3599         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3600                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3601                    nsecs_to_usecs(data->saved_latency),
3602                    entries,
3603                    total,
3604                    buf->cpu,
3605 #if defined(CONFIG_PREEMPT_NONE)
3606                    "server",
3607 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3608                    "desktop",
3609 #elif defined(CONFIG_PREEMPT)
3610                    "preempt",
3611 #else
3612                    "unknown",
3613 #endif
3614                    /* These are reserved for later use */
3615                    0, 0, 0, 0);
3616 #ifdef CONFIG_SMP
3617         seq_printf(m, " #P:%d)\n", num_online_cpus());
3618 #else
3619         seq_puts(m, ")\n");
3620 #endif
3621         seq_puts(m, "#    -----------------\n");
3622         seq_printf(m, "#    | task: %.16s-%d "
3623                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3624                    data->comm, data->pid,
3625                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3626                    data->policy, data->rt_priority);
3627         seq_puts(m, "#    -----------------\n");
3628
3629         if (data->critical_start) {
3630                 seq_puts(m, "#  => started at: ");
3631                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3632                 trace_print_seq(m, &iter->seq);
3633                 seq_puts(m, "\n#  => ended at:   ");
3634                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3635                 trace_print_seq(m, &iter->seq);
3636                 seq_puts(m, "\n#\n");
3637         }
3638
3639         seq_puts(m, "#\n");
3640 }
3641
3642 static void test_cpu_buff_start(struct trace_iterator *iter)
3643 {
3644         struct trace_seq *s = &iter->seq;
3645         struct trace_array *tr = iter->tr;
3646
3647         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3648                 return;
3649
3650         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3651                 return;
3652
3653         if (cpumask_available(iter->started) &&
3654             cpumask_test_cpu(iter->cpu, iter->started))
3655                 return;
3656
3657         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3658                 return;
3659
3660         if (cpumask_available(iter->started))
3661                 cpumask_set_cpu(iter->cpu, iter->started);
3662
3663         /* Don't print started cpu buffer for the first entry of the trace */
3664         if (iter->idx > 1)
3665                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3666                                 iter->cpu);
3667 }
3668
3669 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3670 {
3671         struct trace_array *tr = iter->tr;
3672         struct trace_seq *s = &iter->seq;
3673         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3674         struct trace_entry *entry;
3675         struct trace_event *event;
3676
3677         entry = iter->ent;
3678
3679         test_cpu_buff_start(iter);
3680
3681         event = ftrace_find_event(entry->type);
3682
3683         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3684                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3685                         trace_print_lat_context(iter);
3686                 else
3687                         trace_print_context(iter);
3688         }
3689
3690         if (trace_seq_has_overflowed(s))
3691                 return TRACE_TYPE_PARTIAL_LINE;
3692
3693         if (event)
3694                 return event->funcs->trace(iter, sym_flags, event);
3695
3696         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3697
3698         return trace_handle_return(s);
3699 }
3700
3701 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3702 {
3703         struct trace_array *tr = iter->tr;
3704         struct trace_seq *s = &iter->seq;
3705         struct trace_entry *entry;
3706         struct trace_event *event;
3707
3708         entry = iter->ent;
3709
3710         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3711                 trace_seq_printf(s, "%d %d %llu ",
3712                                  entry->pid, iter->cpu, iter->ts);
3713
3714         if (trace_seq_has_overflowed(s))
3715                 return TRACE_TYPE_PARTIAL_LINE;
3716
3717         event = ftrace_find_event(entry->type);
3718         if (event)
3719                 return event->funcs->raw(iter, 0, event);
3720
3721         trace_seq_printf(s, "%d ?\n", entry->type);
3722
3723         return trace_handle_return(s);
3724 }
3725
3726 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3727 {
3728         struct trace_array *tr = iter->tr;
3729         struct trace_seq *s = &iter->seq;
3730         unsigned char newline = '\n';
3731         struct trace_entry *entry;
3732         struct trace_event *event;
3733
3734         entry = iter->ent;
3735
3736         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3737                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3738                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3739                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3740                 if (trace_seq_has_overflowed(s))
3741                         return TRACE_TYPE_PARTIAL_LINE;
3742         }
3743
3744         event = ftrace_find_event(entry->type);
3745         if (event) {
3746                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3747                 if (ret != TRACE_TYPE_HANDLED)
3748                         return ret;
3749         }
3750
3751         SEQ_PUT_FIELD(s, newline);
3752
3753         return trace_handle_return(s);
3754 }
3755
3756 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3757 {
3758         struct trace_array *tr = iter->tr;
3759         struct trace_seq *s = &iter->seq;
3760         struct trace_entry *entry;
3761         struct trace_event *event;
3762
3763         entry = iter->ent;
3764
3765         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3766                 SEQ_PUT_FIELD(s, entry->pid);
3767                 SEQ_PUT_FIELD(s, iter->cpu);
3768                 SEQ_PUT_FIELD(s, iter->ts);
3769                 if (trace_seq_has_overflowed(s))
3770                         return TRACE_TYPE_PARTIAL_LINE;
3771         }
3772
3773         event = ftrace_find_event(entry->type);
3774         return event ? event->funcs->binary(iter, 0, event) :
3775                 TRACE_TYPE_HANDLED;
3776 }
3777
3778 int trace_empty(struct trace_iterator *iter)
3779 {
3780         struct ring_buffer_iter *buf_iter;
3781         int cpu;
3782
3783         /* If we are looking at one CPU buffer, only check that one */
3784         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3785                 cpu = iter->cpu_file;
3786                 buf_iter = trace_buffer_iter(iter, cpu);
3787                 if (buf_iter) {
3788                         if (!ring_buffer_iter_empty(buf_iter))
3789                                 return 0;
3790                 } else {
3791                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3792                                 return 0;
3793                 }
3794                 return 1;
3795         }
3796
3797         for_each_tracing_cpu(cpu) {
3798                 buf_iter = trace_buffer_iter(iter, cpu);
3799                 if (buf_iter) {
3800                         if (!ring_buffer_iter_empty(buf_iter))
3801                                 return 0;
3802                 } else {
3803                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3804                                 return 0;
3805                 }
3806         }
3807
3808         return 1;
3809 }
3810
3811 /*  Called with trace_event_read_lock() held. */
3812 enum print_line_t print_trace_line(struct trace_iterator *iter)
3813 {
3814         struct trace_array *tr = iter->tr;
3815         unsigned long trace_flags = tr->trace_flags;
3816         enum print_line_t ret;
3817
3818         if (iter->lost_events) {
3819                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3820                                  iter->cpu, iter->lost_events);
3821                 if (trace_seq_has_overflowed(&iter->seq))
3822                         return TRACE_TYPE_PARTIAL_LINE;
3823         }
3824
3825         if (iter->trace && iter->trace->print_line) {
3826                 ret = iter->trace->print_line(iter);
3827                 if (ret != TRACE_TYPE_UNHANDLED)
3828                         return ret;
3829         }
3830
3831         if (iter->ent->type == TRACE_BPUTS &&
3832                         trace_flags & TRACE_ITER_PRINTK &&
3833                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834                 return trace_print_bputs_msg_only(iter);
3835
3836         if (iter->ent->type == TRACE_BPRINT &&
3837                         trace_flags & TRACE_ITER_PRINTK &&
3838                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3839                 return trace_print_bprintk_msg_only(iter);
3840
3841         if (iter->ent->type == TRACE_PRINT &&
3842                         trace_flags & TRACE_ITER_PRINTK &&
3843                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3844                 return trace_print_printk_msg_only(iter);
3845
3846         if (trace_flags & TRACE_ITER_BIN)
3847                 return print_bin_fmt(iter);
3848
3849         if (trace_flags & TRACE_ITER_HEX)
3850                 return print_hex_fmt(iter);
3851
3852         if (trace_flags & TRACE_ITER_RAW)
3853                 return print_raw_fmt(iter);
3854
3855         return print_trace_fmt(iter);
3856 }
3857
3858 void trace_latency_header(struct seq_file *m)
3859 {
3860         struct trace_iterator *iter = m->private;
3861         struct trace_array *tr = iter->tr;
3862
3863         /* print nothing if the buffers are empty */
3864         if (trace_empty(iter))
3865                 return;
3866
3867         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3868                 print_trace_header(m, iter);
3869
3870         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3871                 print_lat_help_header(m);
3872 }
3873
3874 void trace_default_header(struct seq_file *m)
3875 {
3876         struct trace_iterator *iter = m->private;
3877         struct trace_array *tr = iter->tr;
3878         unsigned long trace_flags = tr->trace_flags;
3879
3880         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3881                 return;
3882
3883         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3884                 /* print nothing if the buffers are empty */
3885                 if (trace_empty(iter))
3886                         return;
3887                 print_trace_header(m, iter);
3888                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3889                         print_lat_help_header(m);
3890         } else {
3891                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3892                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3893                                 print_func_help_header_irq(iter->trace_buffer,
3894                                                            m, trace_flags);
3895                         else
3896                                 print_func_help_header(iter->trace_buffer, m,
3897                                                        trace_flags);
3898                 }
3899         }
3900 }
3901
3902 static void test_ftrace_alive(struct seq_file *m)
3903 {
3904         if (!ftrace_is_dead())
3905                 return;
3906         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3907                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3908 }
3909
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 static void show_snapshot_main_help(struct seq_file *m)
3912 {
3913         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3914                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3915                     "#                      Takes a snapshot of the main buffer.\n"
3916                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3917                     "#                      (Doesn't have to be '2' works with any number that\n"
3918                     "#                       is not a '0' or '1')\n");
3919 }
3920
3921 static void show_snapshot_percpu_help(struct seq_file *m)
3922 {
3923         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3924 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3925         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3926                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3927 #else
3928         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3929                     "#                     Must use main snapshot file to allocate.\n");
3930 #endif
3931         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3932                     "#                      (Doesn't have to be '2' works with any number that\n"
3933                     "#                       is not a '0' or '1')\n");
3934 }
3935
3936 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3937 {
3938         if (iter->tr->allocated_snapshot)
3939                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3940         else
3941                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3942
3943         seq_puts(m, "# Snapshot commands:\n");
3944         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3945                 show_snapshot_main_help(m);
3946         else
3947                 show_snapshot_percpu_help(m);
3948 }
3949 #else
3950 /* Should never be called */
3951 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3952 #endif
3953
3954 static int s_show(struct seq_file *m, void *v)
3955 {
3956         struct trace_iterator *iter = v;
3957         int ret;
3958
3959         if (iter->ent == NULL) {
3960                 if (iter->tr) {
3961                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3962                         seq_puts(m, "#\n");
3963                         test_ftrace_alive(m);
3964                 }
3965                 if (iter->snapshot && trace_empty(iter))
3966                         print_snapshot_help(m, iter);
3967                 else if (iter->trace && iter->trace->print_header)
3968                         iter->trace->print_header(m);
3969                 else
3970                         trace_default_header(m);
3971
3972         } else if (iter->leftover) {
3973                 /*
3974                  * If we filled the seq_file buffer earlier, we
3975                  * want to just show it now.
3976                  */
3977                 ret = trace_print_seq(m, &iter->seq);
3978
3979                 /* ret should this time be zero, but you never know */
3980                 iter->leftover = ret;
3981
3982         } else {
3983                 print_trace_line(iter);
3984                 ret = trace_print_seq(m, &iter->seq);
3985                 /*
3986                  * If we overflow the seq_file buffer, then it will
3987                  * ask us for this data again at start up.
3988                  * Use that instead.
3989                  *  ret is 0 if seq_file write succeeded.
3990                  *        -1 otherwise.
3991                  */
3992                 iter->leftover = ret;
3993         }
3994
3995         return 0;
3996 }
3997
3998 /*
3999  * Should be used after trace_array_get(), trace_types_lock
4000  * ensures that i_cdev was already initialized.
4001  */
4002 static inline int tracing_get_cpu(struct inode *inode)
4003 {
4004         if (inode->i_cdev) /* See trace_create_cpu_file() */
4005                 return (long)inode->i_cdev - 1;
4006         return RING_BUFFER_ALL_CPUS;
4007 }
4008
4009 static const struct seq_operations tracer_seq_ops = {
4010         .start          = s_start,
4011         .next           = s_next,
4012         .stop           = s_stop,
4013         .show           = s_show,
4014 };
4015
4016 static struct trace_iterator *
4017 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4018 {
4019         struct trace_array *tr = inode->i_private;
4020         struct trace_iterator *iter;
4021         int cpu;
4022
4023         if (tracing_disabled)
4024                 return ERR_PTR(-ENODEV);
4025
4026         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4027         if (!iter)
4028                 return ERR_PTR(-ENOMEM);
4029
4030         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4031                                     GFP_KERNEL);
4032         if (!iter->buffer_iter)
4033                 goto release;
4034
4035         /*
4036          * We make a copy of the current tracer to avoid concurrent
4037          * changes on it while we are reading.
4038          */
4039         mutex_lock(&trace_types_lock);
4040         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4041         if (!iter->trace)
4042                 goto fail;
4043
4044         *iter->trace = *tr->current_trace;
4045
4046         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4047                 goto fail;
4048
4049         iter->tr = tr;
4050
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052         /* Currently only the top directory has a snapshot */
4053         if (tr->current_trace->print_max || snapshot)
4054                 iter->trace_buffer = &tr->max_buffer;
4055         else
4056 #endif
4057                 iter->trace_buffer = &tr->trace_buffer;
4058         iter->snapshot = snapshot;
4059         iter->pos = -1;
4060         iter->cpu_file = tracing_get_cpu(inode);
4061         mutex_init(&iter->mutex);
4062
4063         /* Notify the tracer early; before we stop tracing. */
4064         if (iter->trace && iter->trace->open)
4065                 iter->trace->open(iter);
4066
4067         /* Annotate start of buffers if we had overruns */
4068         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4069                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4070
4071         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4072         if (trace_clocks[tr->clock_id].in_ns)
4073                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4074
4075         /* stop the trace while dumping if we are not opening "snapshot" */
4076         if (!iter->snapshot)
4077                 tracing_stop_tr(tr);
4078
4079         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4080                 for_each_tracing_cpu(cpu) {
4081                         iter->buffer_iter[cpu] =
4082                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4083                                                          cpu, GFP_KERNEL);
4084                 }
4085                 ring_buffer_read_prepare_sync();
4086                 for_each_tracing_cpu(cpu) {
4087                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4088                         tracing_iter_reset(iter, cpu);
4089                 }
4090         } else {
4091                 cpu = iter->cpu_file;
4092                 iter->buffer_iter[cpu] =
4093                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4094                                                  cpu, GFP_KERNEL);
4095                 ring_buffer_read_prepare_sync();
4096                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4097                 tracing_iter_reset(iter, cpu);
4098         }
4099
4100         mutex_unlock(&trace_types_lock);
4101
4102         return iter;
4103
4104  fail:
4105         mutex_unlock(&trace_types_lock);
4106         kfree(iter->trace);
4107         kfree(iter->buffer_iter);
4108 release:
4109         seq_release_private(inode, file);
4110         return ERR_PTR(-ENOMEM);
4111 }
4112
4113 int tracing_open_generic(struct inode *inode, struct file *filp)
4114 {
4115         if (tracing_disabled)
4116                 return -ENODEV;
4117
4118         filp->private_data = inode->i_private;
4119         return 0;
4120 }
4121
4122 bool tracing_is_disabled(void)
4123 {
4124         return (tracing_disabled) ? true: false;
4125 }
4126
4127 /*
4128  * Open and update trace_array ref count.
4129  * Must have the current trace_array passed to it.
4130  */
4131 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4132 {
4133         struct trace_array *tr = inode->i_private;
4134
4135         if (tracing_disabled)
4136                 return -ENODEV;
4137
4138         if (trace_array_get(tr) < 0)
4139                 return -ENODEV;
4140
4141         filp->private_data = inode->i_private;
4142
4143         return 0;
4144 }
4145
4146 static int tracing_release(struct inode *inode, struct file *file)
4147 {
4148         struct trace_array *tr = inode->i_private;
4149         struct seq_file *m = file->private_data;
4150         struct trace_iterator *iter;
4151         int cpu;
4152
4153         if (!(file->f_mode & FMODE_READ)) {
4154                 trace_array_put(tr);
4155                 return 0;
4156         }
4157
4158         /* Writes do not use seq_file */
4159         iter = m->private;
4160         mutex_lock(&trace_types_lock);
4161
4162         for_each_tracing_cpu(cpu) {
4163                 if (iter->buffer_iter[cpu])
4164                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4165         }
4166
4167         if (iter->trace && iter->trace->close)
4168                 iter->trace->close(iter);
4169
4170         if (!iter->snapshot)
4171                 /* reenable tracing if it was previously enabled */
4172                 tracing_start_tr(tr);
4173
4174         __trace_array_put(tr);
4175
4176         mutex_unlock(&trace_types_lock);
4177
4178         mutex_destroy(&iter->mutex);
4179         free_cpumask_var(iter->started);
4180         kfree(iter->trace);
4181         kfree(iter->buffer_iter);
4182         seq_release_private(inode, file);
4183
4184         return 0;
4185 }
4186
4187 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4188 {
4189         struct trace_array *tr = inode->i_private;
4190
4191         trace_array_put(tr);
4192         return 0;
4193 }
4194
4195 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4196 {
4197         struct trace_array *tr = inode->i_private;
4198
4199         trace_array_put(tr);
4200
4201         return single_release(inode, file);
4202 }
4203
4204 static int tracing_open(struct inode *inode, struct file *file)
4205 {
4206         struct trace_array *tr = inode->i_private;
4207         struct trace_iterator *iter;
4208         int ret = 0;
4209
4210         if (trace_array_get(tr) < 0)
4211                 return -ENODEV;
4212
4213         /* If this file was open for write, then erase contents */
4214         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4215                 int cpu = tracing_get_cpu(inode);
4216                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4217
4218 #ifdef CONFIG_TRACER_MAX_TRACE
4219                 if (tr->current_trace->print_max)
4220                         trace_buf = &tr->max_buffer;
4221 #endif
4222
4223                 if (cpu == RING_BUFFER_ALL_CPUS)
4224                         tracing_reset_online_cpus(trace_buf);
4225                 else
4226                         tracing_reset(trace_buf, cpu);
4227         }
4228
4229         if (file->f_mode & FMODE_READ) {
4230                 iter = __tracing_open(inode, file, false);
4231                 if (IS_ERR(iter))
4232                         ret = PTR_ERR(iter);
4233                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4234                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4235         }
4236
4237         if (ret < 0)
4238                 trace_array_put(tr);
4239
4240         return ret;
4241 }
4242
4243 /*
4244  * Some tracers are not suitable for instance buffers.
4245  * A tracer is always available for the global array (toplevel)
4246  * or if it explicitly states that it is.
4247  */
4248 static bool
4249 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4250 {
4251         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4252 }
4253
4254 /* Find the next tracer that this trace array may use */
4255 static struct tracer *
4256 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4257 {
4258         while (t && !trace_ok_for_array(t, tr))
4259                 t = t->next;
4260
4261         return t;
4262 }
4263
4264 static void *
4265 t_next(struct seq_file *m, void *v, loff_t *pos)
4266 {
4267         struct trace_array *tr = m->private;
4268         struct tracer *t = v;
4269
4270         (*pos)++;
4271
4272         if (t)
4273                 t = get_tracer_for_array(tr, t->next);
4274
4275         return t;
4276 }
4277
4278 static void *t_start(struct seq_file *m, loff_t *pos)
4279 {
4280         struct trace_array *tr = m->private;
4281         struct tracer *t;
4282         loff_t l = 0;
4283
4284         mutex_lock(&trace_types_lock);
4285
4286         t = get_tracer_for_array(tr, trace_types);
4287         for (; t && l < *pos; t = t_next(m, t, &l))
4288                         ;
4289
4290         return t;
4291 }
4292
4293 static void t_stop(struct seq_file *m, void *p)
4294 {
4295         mutex_unlock(&trace_types_lock);
4296 }
4297
4298 static int t_show(struct seq_file *m, void *v)
4299 {
4300         struct tracer *t = v;
4301
4302         if (!t)
4303                 return 0;
4304
4305         seq_puts(m, t->name);
4306         if (t->next)
4307                 seq_putc(m, ' ');
4308         else
4309                 seq_putc(m, '\n');
4310
4311         return 0;
4312 }
4313
4314 static const struct seq_operations show_traces_seq_ops = {
4315         .start          = t_start,
4316         .next           = t_next,
4317         .stop           = t_stop,
4318         .show           = t_show,
4319 };
4320
4321 static int show_traces_open(struct inode *inode, struct file *file)
4322 {
4323         struct trace_array *tr = inode->i_private;
4324         struct seq_file *m;
4325         int ret;
4326
4327         if (tracing_disabled)
4328                 return -ENODEV;
4329
4330         ret = seq_open(file, &show_traces_seq_ops);
4331         if (ret)
4332                 return ret;
4333
4334         m = file->private_data;
4335         m->private = tr;
4336
4337         return 0;
4338 }
4339
4340 static ssize_t
4341 tracing_write_stub(struct file *filp, const char __user *ubuf,
4342                    size_t count, loff_t *ppos)
4343 {
4344         return count;
4345 }
4346
4347 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4348 {
4349         int ret;
4350
4351         if (file->f_mode & FMODE_READ)
4352                 ret = seq_lseek(file, offset, whence);
4353         else
4354                 file->f_pos = ret = 0;
4355
4356         return ret;
4357 }
4358
4359 static const struct file_operations tracing_fops = {
4360         .open           = tracing_open,
4361         .read           = seq_read,
4362         .write          = tracing_write_stub,
4363         .llseek         = tracing_lseek,
4364         .release        = tracing_release,
4365 };
4366
4367 static const struct file_operations show_traces_fops = {
4368         .open           = show_traces_open,
4369         .read           = seq_read,
4370         .release        = seq_release,
4371         .llseek         = seq_lseek,
4372 };
4373
4374 static ssize_t
4375 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4376                      size_t count, loff_t *ppos)
4377 {
4378         struct trace_array *tr = file_inode(filp)->i_private;
4379         char *mask_str;
4380         int len;
4381
4382         len = snprintf(NULL, 0, "%*pb\n",
4383                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4384         mask_str = kmalloc(len, GFP_KERNEL);
4385         if (!mask_str)
4386                 return -ENOMEM;
4387
4388         len = snprintf(mask_str, len, "%*pb\n",
4389                        cpumask_pr_args(tr->tracing_cpumask));
4390         if (len >= count) {
4391                 count = -EINVAL;
4392                 goto out_err;
4393         }
4394         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4395
4396 out_err:
4397         kfree(mask_str);
4398
4399         return count;
4400 }
4401
4402 static ssize_t
4403 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4404                       size_t count, loff_t *ppos)
4405 {
4406         struct trace_array *tr = file_inode(filp)->i_private;
4407         cpumask_var_t tracing_cpumask_new;
4408         int err, cpu;
4409
4410         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4411                 return -ENOMEM;
4412
4413         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4414         if (err)
4415                 goto err_unlock;
4416
4417         local_irq_disable();
4418         arch_spin_lock(&tr->max_lock);
4419         for_each_tracing_cpu(cpu) {
4420                 /*
4421                  * Increase/decrease the disabled counter if we are
4422                  * about to flip a bit in the cpumask:
4423                  */
4424                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4425                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4426                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4427                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4428                 }
4429                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4430                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4431                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4432                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4433                 }
4434         }
4435         arch_spin_unlock(&tr->max_lock);
4436         local_irq_enable();
4437
4438         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4439         free_cpumask_var(tracing_cpumask_new);
4440
4441         return count;
4442
4443 err_unlock:
4444         free_cpumask_var(tracing_cpumask_new);
4445
4446         return err;
4447 }
4448
4449 static const struct file_operations tracing_cpumask_fops = {
4450         .open           = tracing_open_generic_tr,
4451         .read           = tracing_cpumask_read,
4452         .write          = tracing_cpumask_write,
4453         .release        = tracing_release_generic_tr,
4454         .llseek         = generic_file_llseek,
4455 };
4456
4457 static int tracing_trace_options_show(struct seq_file *m, void *v)
4458 {
4459         struct tracer_opt *trace_opts;
4460         struct trace_array *tr = m->private;
4461         u32 tracer_flags;
4462         int i;
4463
4464         mutex_lock(&trace_types_lock);
4465         tracer_flags = tr->current_trace->flags->val;
4466         trace_opts = tr->current_trace->flags->opts;
4467
4468         for (i = 0; trace_options[i]; i++) {
4469                 if (tr->trace_flags & (1 << i))
4470                         seq_printf(m, "%s\n", trace_options[i]);
4471                 else
4472                         seq_printf(m, "no%s\n", trace_options[i]);
4473         }
4474
4475         for (i = 0; trace_opts[i].name; i++) {
4476                 if (tracer_flags & trace_opts[i].bit)
4477                         seq_printf(m, "%s\n", trace_opts[i].name);
4478                 else
4479                         seq_printf(m, "no%s\n", trace_opts[i].name);
4480         }
4481         mutex_unlock(&trace_types_lock);
4482
4483         return 0;
4484 }
4485
4486 static int __set_tracer_option(struct trace_array *tr,
4487                                struct tracer_flags *tracer_flags,
4488                                struct tracer_opt *opts, int neg)
4489 {
4490         struct tracer *trace = tracer_flags->trace;
4491         int ret;
4492
4493         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4494         if (ret)
4495                 return ret;
4496
4497         if (neg)
4498                 tracer_flags->val &= ~opts->bit;
4499         else
4500                 tracer_flags->val |= opts->bit;
4501         return 0;
4502 }
4503
4504 /* Try to assign a tracer specific option */
4505 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4506 {
4507         struct tracer *trace = tr->current_trace;
4508         struct tracer_flags *tracer_flags = trace->flags;
4509         struct tracer_opt *opts = NULL;
4510         int i;
4511
4512         for (i = 0; tracer_flags->opts[i].name; i++) {
4513                 opts = &tracer_flags->opts[i];
4514
4515                 if (strcmp(cmp, opts->name) == 0)
4516                         return __set_tracer_option(tr, trace->flags, opts, neg);
4517         }
4518
4519         return -EINVAL;
4520 }
4521
4522 /* Some tracers require overwrite to stay enabled */
4523 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4524 {
4525         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4526                 return -1;
4527
4528         return 0;
4529 }
4530
4531 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4532 {
4533         /* do nothing if flag is already set */
4534         if (!!(tr->trace_flags & mask) == !!enabled)
4535                 return 0;
4536
4537         /* Give the tracer a chance to approve the change */
4538         if (tr->current_trace->flag_changed)
4539                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4540                         return -EINVAL;
4541
4542         if (enabled)
4543                 tr->trace_flags |= mask;
4544         else
4545                 tr->trace_flags &= ~mask;
4546
4547         if (mask == TRACE_ITER_RECORD_CMD)
4548                 trace_event_enable_cmd_record(enabled);
4549
4550         if (mask == TRACE_ITER_RECORD_TGID) {
4551                 if (!tgid_map)
4552                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4553                                            sizeof(*tgid_map),
4554                                            GFP_KERNEL);
4555                 if (!tgid_map) {
4556                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4557                         return -ENOMEM;
4558                 }
4559
4560                 trace_event_enable_tgid_record(enabled);
4561         }
4562
4563         if (mask == TRACE_ITER_EVENT_FORK)
4564                 trace_event_follow_fork(tr, enabled);
4565
4566         if (mask == TRACE_ITER_FUNC_FORK)
4567                 ftrace_pid_follow_fork(tr, enabled);
4568
4569         if (mask == TRACE_ITER_OVERWRITE) {
4570                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4571 #ifdef CONFIG_TRACER_MAX_TRACE
4572                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4573 #endif
4574         }
4575
4576         if (mask == TRACE_ITER_PRINTK) {
4577                 trace_printk_start_stop_comm(enabled);
4578                 trace_printk_control(enabled);
4579         }
4580
4581         return 0;
4582 }
4583
4584 static int trace_set_options(struct trace_array *tr, char *option)
4585 {
4586         char *cmp;
4587         int neg = 0;
4588         int ret;
4589         size_t orig_len = strlen(option);
4590         int len;
4591
4592         cmp = strstrip(option);
4593
4594         len = str_has_prefix(cmp, "no");
4595         if (len)
4596                 neg = 1;
4597
4598         cmp += len;
4599
4600         mutex_lock(&trace_types_lock);
4601
4602         ret = match_string(trace_options, -1, cmp);
4603         /* If no option could be set, test the specific tracer options */
4604         if (ret < 0)
4605                 ret = set_tracer_option(tr, cmp, neg);
4606         else
4607                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4608
4609         mutex_unlock(&trace_types_lock);
4610
4611         /*
4612          * If the first trailing whitespace is replaced with '\0' by strstrip,
4613          * turn it back into a space.
4614          */
4615         if (orig_len > strlen(option))
4616                 option[strlen(option)] = ' ';
4617
4618         return ret;
4619 }
4620
4621 static void __init apply_trace_boot_options(void)
4622 {
4623         char *buf = trace_boot_options_buf;
4624         char *option;
4625
4626         while (true) {
4627                 option = strsep(&buf, ",");
4628
4629                 if (!option)
4630                         break;
4631
4632                 if (*option)
4633                         trace_set_options(&global_trace, option);
4634
4635                 /* Put back the comma to allow this to be called again */
4636                 if (buf)
4637                         *(buf - 1) = ',';
4638         }
4639 }
4640
4641 static ssize_t
4642 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4643                         size_t cnt, loff_t *ppos)
4644 {
4645         struct seq_file *m = filp->private_data;
4646         struct trace_array *tr = m->private;
4647         char buf[64];
4648         int ret;
4649
4650         if (cnt >= sizeof(buf))
4651                 return -EINVAL;
4652
4653         if (copy_from_user(buf, ubuf, cnt))
4654                 return -EFAULT;
4655
4656         buf[cnt] = 0;
4657
4658         ret = trace_set_options(tr, buf);
4659         if (ret < 0)
4660                 return ret;
4661
4662         *ppos += cnt;
4663
4664         return cnt;
4665 }
4666
4667 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4668 {
4669         struct trace_array *tr = inode->i_private;
4670         int ret;
4671
4672         if (tracing_disabled)
4673                 return -ENODEV;
4674
4675         if (trace_array_get(tr) < 0)
4676                 return -ENODEV;
4677
4678         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4679         if (ret < 0)
4680                 trace_array_put(tr);
4681
4682         return ret;
4683 }
4684
4685 static const struct file_operations tracing_iter_fops = {
4686         .open           = tracing_trace_options_open,
4687         .read           = seq_read,
4688         .llseek         = seq_lseek,
4689         .release        = tracing_single_release_tr,
4690         .write          = tracing_trace_options_write,
4691 };
4692
4693 static const char readme_msg[] =
4694         "tracing mini-HOWTO:\n\n"
4695         "# echo 0 > tracing_on : quick way to disable tracing\n"
4696         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4697         " Important files:\n"
4698         "  trace\t\t\t- The static contents of the buffer\n"
4699         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4700         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4701         "  current_tracer\t- function and latency tracers\n"
4702         "  available_tracers\t- list of configured tracers for current_tracer\n"
4703         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4704         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4705         "  trace_clock\t\t-change the clock used to order events\n"
4706         "       local:   Per cpu clock but may not be synced across CPUs\n"
4707         "      global:   Synced across CPUs but slows tracing down.\n"
4708         "     counter:   Not a clock, but just an increment\n"
4709         "      uptime:   Jiffy counter from time of boot\n"
4710         "        perf:   Same clock that perf events use\n"
4711 #ifdef CONFIG_X86_64
4712         "     x86-tsc:   TSC cycle counter\n"
4713 #endif
4714         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4715         "       delta:   Delta difference against a buffer-wide timestamp\n"
4716         "    absolute:   Absolute (standalone) timestamp\n"
4717         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4718         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4719         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4720         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4721         "\t\t\t  Remove sub-buffer with rmdir\n"
4722         "  trace_options\t\t- Set format or modify how tracing happens\n"
4723         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4724         "\t\t\t  option name\n"
4725         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4726 #ifdef CONFIG_DYNAMIC_FTRACE
4727         "\n  available_filter_functions - list of functions that can be filtered on\n"
4728         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4729         "\t\t\t  functions\n"
4730         "\t     accepts: func_full_name or glob-matching-pattern\n"
4731         "\t     modules: Can select a group via module\n"
4732         "\t      Format: :mod:<module-name>\n"
4733         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4734         "\t    triggers: a command to perform when function is hit\n"
4735         "\t      Format: <function>:<trigger>[:count]\n"
4736         "\t     trigger: traceon, traceoff\n"
4737         "\t\t      enable_event:<system>:<event>\n"
4738         "\t\t      disable_event:<system>:<event>\n"
4739 #ifdef CONFIG_STACKTRACE
4740         "\t\t      stacktrace\n"
4741 #endif
4742 #ifdef CONFIG_TRACER_SNAPSHOT
4743         "\t\t      snapshot\n"
4744 #endif
4745         "\t\t      dump\n"
4746         "\t\t      cpudump\n"
4747         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4748         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4749         "\t     The first one will disable tracing every time do_fault is hit\n"
4750         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4751         "\t       The first time do trap is hit and it disables tracing, the\n"
4752         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4753         "\t       the counter will not decrement. It only decrements when the\n"
4754         "\t       trigger did work\n"
4755         "\t     To remove trigger without count:\n"
4756         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4757         "\t     To remove trigger with a count:\n"
4758         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4759         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4760         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4761         "\t    modules: Can select a group via module command :mod:\n"
4762         "\t    Does not accept triggers\n"
4763 #endif /* CONFIG_DYNAMIC_FTRACE */
4764 #ifdef CONFIG_FUNCTION_TRACER
4765         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4766         "\t\t    (function)\n"
4767 #endif
4768 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4769         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4770         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4771         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4772 #endif
4773 #ifdef CONFIG_TRACER_SNAPSHOT
4774         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4775         "\t\t\t  snapshot buffer. Read the contents for more\n"
4776         "\t\t\t  information\n"
4777 #endif
4778 #ifdef CONFIG_STACK_TRACER
4779         "  stack_trace\t\t- Shows the max stack trace when active\n"
4780         "  stack_max_size\t- Shows current max stack size that was traced\n"
4781         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4782         "\t\t\t  new trace)\n"
4783 #ifdef CONFIG_DYNAMIC_FTRACE
4784         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4785         "\t\t\t  traces\n"
4786 #endif
4787 #endif /* CONFIG_STACK_TRACER */
4788 #ifdef CONFIG_DYNAMIC_EVENTS
4789         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4790         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4791 #endif
4792 #ifdef CONFIG_KPROBE_EVENTS
4793         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4794         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4795 #endif
4796 #ifdef CONFIG_UPROBE_EVENTS
4797         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4798         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4799 #endif
4800 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4801         "\t  accepts: event-definitions (one definition per line)\n"
4802         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4803         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4804 #ifdef CONFIG_HIST_TRIGGERS
4805         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4806 #endif
4807         "\t           -:[<group>/]<event>\n"
4808 #ifdef CONFIG_KPROBE_EVENTS
4809         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4810   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4811 #endif
4812 #ifdef CONFIG_UPROBE_EVENTS
4813   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4814 #endif
4815         "\t     args: <name>=fetcharg[:type]\n"
4816         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4817 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4818         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4819 #else
4820         "\t           $stack<index>, $stack, $retval, $comm\n"
4821 #endif
4822         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4823         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4824         "\t           <type>\\[<array-size>\\]\n"
4825 #ifdef CONFIG_HIST_TRIGGERS
4826         "\t    field: <stype> <name>;\n"
4827         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4828         "\t           [unsigned] char/int/long\n"
4829 #endif
4830 #endif
4831         "  events/\t\t- Directory containing all trace event subsystems:\n"
4832         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4833         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4834         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4835         "\t\t\t  events\n"
4836         "      filter\t\t- If set, only events passing filter are traced\n"
4837         "  events/<system>/<event>/\t- Directory containing control files for\n"
4838         "\t\t\t  <event>:\n"
4839         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4840         "      filter\t\t- If set, only events passing filter are traced\n"
4841         "      trigger\t\t- If set, a command to perform when event is hit\n"
4842         "\t    Format: <trigger>[:count][if <filter>]\n"
4843         "\t   trigger: traceon, traceoff\n"
4844         "\t            enable_event:<system>:<event>\n"
4845         "\t            disable_event:<system>:<event>\n"
4846 #ifdef CONFIG_HIST_TRIGGERS
4847         "\t            enable_hist:<system>:<event>\n"
4848         "\t            disable_hist:<system>:<event>\n"
4849 #endif
4850 #ifdef CONFIG_STACKTRACE
4851         "\t\t    stacktrace\n"
4852 #endif
4853 #ifdef CONFIG_TRACER_SNAPSHOT
4854         "\t\t    snapshot\n"
4855 #endif
4856 #ifdef CONFIG_HIST_TRIGGERS
4857         "\t\t    hist (see below)\n"
4858 #endif
4859         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4860         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4861         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4862         "\t                  events/block/block_unplug/trigger\n"
4863         "\t   The first disables tracing every time block_unplug is hit.\n"
4864         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4865         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4866         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4867         "\t   Like function triggers, the counter is only decremented if it\n"
4868         "\t    enabled or disabled tracing.\n"
4869         "\t   To remove a trigger without a count:\n"
4870         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4871         "\t   To remove a trigger with a count:\n"
4872         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4873         "\t   Filters can be ignored when removing a trigger.\n"
4874 #ifdef CONFIG_HIST_TRIGGERS
4875         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4876         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4877         "\t            [:values=<field1[,field2,...]>]\n"
4878         "\t            [:sort=<field1[,field2,...]>]\n"
4879         "\t            [:size=#entries]\n"
4880         "\t            [:pause][:continue][:clear]\n"
4881         "\t            [:name=histname1]\n"
4882         "\t            [:<handler>.<action>]\n"
4883         "\t            [if <filter>]\n\n"
4884         "\t    When a matching event is hit, an entry is added to a hash\n"
4885         "\t    table using the key(s) and value(s) named, and the value of a\n"
4886         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4887         "\t    correspond to fields in the event's format description.  Keys\n"
4888         "\t    can be any field, or the special string 'stacktrace'.\n"
4889         "\t    Compound keys consisting of up to two fields can be specified\n"
4890         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4891         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4892         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4893         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4894         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4895         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4896         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4897         "\t    its histogram data will be shared with other triggers of the\n"
4898         "\t    same name, and trigger hits will update this common data.\n\n"
4899         "\t    Reading the 'hist' file for the event will dump the hash\n"
4900         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4901         "\t    triggers attached to an event, there will be a table for each\n"
4902         "\t    trigger in the output.  The table displayed for a named\n"
4903         "\t    trigger will be the same as any other instance having the\n"
4904         "\t    same name.  The default format used to display a given field\n"
4905         "\t    can be modified by appending any of the following modifiers\n"
4906         "\t    to the field name, as applicable:\n\n"
4907         "\t            .hex        display a number as a hex value\n"
4908         "\t            .sym        display an address as a symbol\n"
4909         "\t            .sym-offset display an address as a symbol and offset\n"
4910         "\t            .execname   display a common_pid as a program name\n"
4911         "\t            .syscall    display a syscall id as a syscall name\n"
4912         "\t            .log2       display log2 value rather than raw number\n"
4913         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4914         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4915         "\t    trigger or to start a hist trigger but not log any events\n"
4916         "\t    until told to do so.  'continue' can be used to start or\n"
4917         "\t    restart a paused hist trigger.\n\n"
4918         "\t    The 'clear' parameter will clear the contents of a running\n"
4919         "\t    hist trigger and leave its current paused/active state\n"
4920         "\t    unchanged.\n\n"
4921         "\t    The enable_hist and disable_hist triggers can be used to\n"
4922         "\t    have one event conditionally start and stop another event's\n"
4923         "\t    already-attached hist trigger.  The syntax is analogous to\n"
4924         "\t    the enable_event and disable_event triggers.\n\n"
4925         "\t    Hist trigger handlers and actions are executed whenever a\n"
4926         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4927         "\t        <handler>.<action>\n\n"
4928         "\t    The available handlers are:\n\n"
4929         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4930         "\t        onmax(var)               - invoke if var exceeds current max\n"
4931         "\t        onchange(var)            - invoke action if var changes\n\n"
4932         "\t    The available actions are:\n\n"
4933         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4934         "\t        save(field,...)                      - save current event fields\n"
4935 #ifdef CONFIG_TRACER_SNAPSHOT
4936         "\t        snapshot()                           - snapshot the trace buffer\n"
4937 #endif
4938 #endif
4939 ;
4940
4941 static ssize_t
4942 tracing_readme_read(struct file *filp, char __user *ubuf,
4943                        size_t cnt, loff_t *ppos)
4944 {
4945         return simple_read_from_buffer(ubuf, cnt, ppos,
4946                                         readme_msg, strlen(readme_msg));
4947 }
4948
4949 static const struct file_operations tracing_readme_fops = {
4950         .open           = tracing_open_generic,
4951         .read           = tracing_readme_read,
4952         .llseek         = generic_file_llseek,
4953 };
4954
4955 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4956 {
4957         int *ptr = v;
4958
4959         if (*pos || m->count)
4960                 ptr++;
4961
4962         (*pos)++;
4963
4964         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4965                 if (trace_find_tgid(*ptr))
4966                         return ptr;
4967         }
4968
4969         return NULL;
4970 }
4971
4972 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4973 {
4974         void *v;
4975         loff_t l = 0;
4976
4977         if (!tgid_map)
4978                 return NULL;
4979
4980         v = &tgid_map[0];
4981         while (l <= *pos) {
4982                 v = saved_tgids_next(m, v, &l);
4983                 if (!v)
4984                         return NULL;
4985         }
4986
4987         return v;
4988 }
4989
4990 static void saved_tgids_stop(struct seq_file *m, void *v)
4991 {
4992 }
4993
4994 static int saved_tgids_show(struct seq_file *m, void *v)
4995 {
4996         int pid = (int *)v - tgid_map;
4997
4998         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4999         return 0;
5000 }
5001
5002 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5003         .start          = saved_tgids_start,
5004         .stop           = saved_tgids_stop,
5005         .next           = saved_tgids_next,
5006         .show           = saved_tgids_show,
5007 };
5008
5009 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5010 {
5011         if (tracing_disabled)
5012                 return -ENODEV;
5013
5014         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5015 }
5016
5017
5018 static const struct file_operations tracing_saved_tgids_fops = {
5019         .open           = tracing_saved_tgids_open,
5020         .read           = seq_read,
5021         .llseek         = seq_lseek,
5022         .release        = seq_release,
5023 };
5024
5025 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5026 {
5027         unsigned int *ptr = v;
5028
5029         if (*pos || m->count)
5030                 ptr++;
5031
5032         (*pos)++;
5033
5034         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5035              ptr++) {
5036                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5037                         continue;
5038
5039                 return ptr;
5040         }
5041
5042         return NULL;
5043 }
5044
5045 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5046 {
5047         void *v;
5048         loff_t l = 0;
5049
5050         preempt_disable();
5051         arch_spin_lock(&trace_cmdline_lock);
5052
5053         v = &savedcmd->map_cmdline_to_pid[0];
5054         while (l <= *pos) {
5055                 v = saved_cmdlines_next(m, v, &l);
5056                 if (!v)
5057                         return NULL;
5058         }
5059
5060         return v;
5061 }
5062
5063 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5064 {
5065         arch_spin_unlock(&trace_cmdline_lock);
5066         preempt_enable();
5067 }
5068
5069 static int saved_cmdlines_show(struct seq_file *m, void *v)
5070 {
5071         char buf[TASK_COMM_LEN];
5072         unsigned int *pid = v;
5073
5074         __trace_find_cmdline(*pid, buf);
5075         seq_printf(m, "%d %s\n", *pid, buf);
5076         return 0;
5077 }
5078
5079 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5080         .start          = saved_cmdlines_start,
5081         .next           = saved_cmdlines_next,
5082         .stop           = saved_cmdlines_stop,
5083         .show           = saved_cmdlines_show,
5084 };
5085
5086 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5087 {
5088         if (tracing_disabled)
5089                 return -ENODEV;
5090
5091         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5092 }
5093
5094 static const struct file_operations tracing_saved_cmdlines_fops = {
5095         .open           = tracing_saved_cmdlines_open,
5096         .read           = seq_read,
5097         .llseek         = seq_lseek,
5098         .release        = seq_release,
5099 };
5100
5101 static ssize_t
5102 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5103                                  size_t cnt, loff_t *ppos)
5104 {
5105         char buf[64];
5106         int r;
5107
5108         arch_spin_lock(&trace_cmdline_lock);
5109         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5110         arch_spin_unlock(&trace_cmdline_lock);
5111
5112         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5113 }
5114
5115 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5116 {
5117         kfree(s->saved_cmdlines);
5118         kfree(s->map_cmdline_to_pid);
5119         kfree(s);
5120 }
5121
5122 static int tracing_resize_saved_cmdlines(unsigned int val)
5123 {
5124         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5125
5126         s = kmalloc(sizeof(*s), GFP_KERNEL);
5127         if (!s)
5128                 return -ENOMEM;
5129
5130         if (allocate_cmdlines_buffer(val, s) < 0) {
5131                 kfree(s);
5132                 return -ENOMEM;
5133         }
5134
5135         arch_spin_lock(&trace_cmdline_lock);
5136         savedcmd_temp = savedcmd;
5137         savedcmd = s;
5138         arch_spin_unlock(&trace_cmdline_lock);
5139         free_saved_cmdlines_buffer(savedcmd_temp);
5140
5141         return 0;
5142 }
5143
5144 static ssize_t
5145 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5146                                   size_t cnt, loff_t *ppos)
5147 {
5148         unsigned long val;
5149         int ret;
5150
5151         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5152         if (ret)
5153                 return ret;
5154
5155         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5156         if (!val || val > PID_MAX_DEFAULT)
5157                 return -EINVAL;
5158
5159         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5160         if (ret < 0)
5161                 return ret;
5162
5163         *ppos += cnt;
5164
5165         return cnt;
5166 }
5167
5168 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5169         .open           = tracing_open_generic,
5170         .read           = tracing_saved_cmdlines_size_read,
5171         .write          = tracing_saved_cmdlines_size_write,
5172 };
5173
5174 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5175 static union trace_eval_map_item *
5176 update_eval_map(union trace_eval_map_item *ptr)
5177 {
5178         if (!ptr->map.eval_string) {
5179                 if (ptr->tail.next) {
5180                         ptr = ptr->tail.next;
5181                         /* Set ptr to the next real item (skip head) */
5182                         ptr++;
5183                 } else
5184                         return NULL;
5185         }
5186         return ptr;
5187 }
5188
5189 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5190 {
5191         union trace_eval_map_item *ptr = v;
5192
5193         /*
5194          * Paranoid! If ptr points to end, we don't want to increment past it.
5195          * This really should never happen.
5196          */
5197         ptr = update_eval_map(ptr);
5198         if (WARN_ON_ONCE(!ptr))
5199                 return NULL;
5200
5201         ptr++;
5202
5203         (*pos)++;
5204
5205         ptr = update_eval_map(ptr);
5206
5207         return ptr;
5208 }
5209
5210 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5211 {
5212         union trace_eval_map_item *v;
5213         loff_t l = 0;
5214
5215         mutex_lock(&trace_eval_mutex);
5216
5217         v = trace_eval_maps;
5218         if (v)
5219                 v++;
5220
5221         while (v && l < *pos) {
5222                 v = eval_map_next(m, v, &l);
5223         }
5224
5225         return v;
5226 }
5227
5228 static void eval_map_stop(struct seq_file *m, void *v)
5229 {
5230         mutex_unlock(&trace_eval_mutex);
5231 }
5232
5233 static int eval_map_show(struct seq_file *m, void *v)
5234 {
5235         union trace_eval_map_item *ptr = v;
5236
5237         seq_printf(m, "%s %ld (%s)\n",
5238                    ptr->map.eval_string, ptr->map.eval_value,
5239                    ptr->map.system);
5240
5241         return 0;
5242 }
5243
5244 static const struct seq_operations tracing_eval_map_seq_ops = {
5245         .start          = eval_map_start,
5246         .next           = eval_map_next,
5247         .stop           = eval_map_stop,
5248         .show           = eval_map_show,
5249 };
5250
5251 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5252 {
5253         if (tracing_disabled)
5254                 return -ENODEV;
5255
5256         return seq_open(filp, &tracing_eval_map_seq_ops);
5257 }
5258
5259 static const struct file_operations tracing_eval_map_fops = {
5260         .open           = tracing_eval_map_open,
5261         .read           = seq_read,
5262         .llseek         = seq_lseek,
5263         .release        = seq_release,
5264 };
5265
5266 static inline union trace_eval_map_item *
5267 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5268 {
5269         /* Return tail of array given the head */
5270         return ptr + ptr->head.length + 1;
5271 }
5272
5273 static void
5274 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5275                            int len)
5276 {
5277         struct trace_eval_map **stop;
5278         struct trace_eval_map **map;
5279         union trace_eval_map_item *map_array;
5280         union trace_eval_map_item *ptr;
5281
5282         stop = start + len;
5283
5284         /*
5285          * The trace_eval_maps contains the map plus a head and tail item,
5286          * where the head holds the module and length of array, and the
5287          * tail holds a pointer to the next list.
5288          */
5289         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5290         if (!map_array) {
5291                 pr_warn("Unable to allocate trace eval mapping\n");
5292                 return;
5293         }
5294
5295         mutex_lock(&trace_eval_mutex);
5296
5297         if (!trace_eval_maps)
5298                 trace_eval_maps = map_array;
5299         else {
5300                 ptr = trace_eval_maps;
5301                 for (;;) {
5302                         ptr = trace_eval_jmp_to_tail(ptr);
5303                         if (!ptr->tail.next)
5304                                 break;
5305                         ptr = ptr->tail.next;
5306
5307                 }
5308                 ptr->tail.next = map_array;
5309         }
5310         map_array->head.mod = mod;
5311         map_array->head.length = len;
5312         map_array++;
5313
5314         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5315                 map_array->map = **map;
5316                 map_array++;
5317         }
5318         memset(map_array, 0, sizeof(*map_array));
5319
5320         mutex_unlock(&trace_eval_mutex);
5321 }
5322
5323 static void trace_create_eval_file(struct dentry *d_tracer)
5324 {
5325         trace_create_file("eval_map", 0444, d_tracer,
5326                           NULL, &tracing_eval_map_fops);
5327 }
5328
5329 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5330 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5331 static inline void trace_insert_eval_map_file(struct module *mod,
5332                               struct trace_eval_map **start, int len) { }
5333 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5334
5335 static void trace_insert_eval_map(struct module *mod,
5336                                   struct trace_eval_map **start, int len)
5337 {
5338         struct trace_eval_map **map;
5339
5340         if (len <= 0)
5341                 return;
5342
5343         map = start;
5344
5345         trace_event_eval_update(map, len);
5346
5347         trace_insert_eval_map_file(mod, start, len);
5348 }
5349
5350 static ssize_t
5351 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5352                        size_t cnt, loff_t *ppos)
5353 {
5354         struct trace_array *tr = filp->private_data;
5355         char buf[MAX_TRACER_SIZE+2];
5356         int r;
5357
5358         mutex_lock(&trace_types_lock);
5359         r = sprintf(buf, "%s\n", tr->current_trace->name);
5360         mutex_unlock(&trace_types_lock);
5361
5362         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5363 }
5364
5365 int tracer_init(struct tracer *t, struct trace_array *tr)
5366 {
5367         tracing_reset_online_cpus(&tr->trace_buffer);
5368         return t->init(tr);
5369 }
5370
5371 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5372 {
5373         int cpu;
5374
5375         for_each_tracing_cpu(cpu)
5376                 per_cpu_ptr(buf->data, cpu)->entries = val;
5377 }
5378
5379 #ifdef CONFIG_TRACER_MAX_TRACE
5380 /* resize @tr's buffer to the size of @size_tr's entries */
5381 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5382                                         struct trace_buffer *size_buf, int cpu_id)
5383 {
5384         int cpu, ret = 0;
5385
5386         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5387                 for_each_tracing_cpu(cpu) {
5388                         ret = ring_buffer_resize(trace_buf->buffer,
5389                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5390                         if (ret < 0)
5391                                 break;
5392                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5393                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5394                 }
5395         } else {
5396                 ret = ring_buffer_resize(trace_buf->buffer,
5397                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5398                 if (ret == 0)
5399                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5400                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5401         }
5402
5403         return ret;
5404 }
5405 #endif /* CONFIG_TRACER_MAX_TRACE */
5406
5407 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5408                                         unsigned long size, int cpu)
5409 {
5410         int ret;
5411
5412         /*
5413          * If kernel or user changes the size of the ring buffer
5414          * we use the size that was given, and we can forget about
5415          * expanding it later.
5416          */
5417         ring_buffer_expanded = true;
5418
5419         /* May be called before buffers are initialized */
5420         if (!tr->trace_buffer.buffer)
5421                 return 0;
5422
5423         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5424         if (ret < 0)
5425                 return ret;
5426
5427 #ifdef CONFIG_TRACER_MAX_TRACE
5428         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5429             !tr->current_trace->use_max_tr)
5430                 goto out;
5431
5432         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5433         if (ret < 0) {
5434                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5435                                                      &tr->trace_buffer, cpu);
5436                 if (r < 0) {
5437                         /*
5438                          * AARGH! We are left with different
5439                          * size max buffer!!!!
5440                          * The max buffer is our "snapshot" buffer.
5441                          * When a tracer needs a snapshot (one of the
5442                          * latency tracers), it swaps the max buffer
5443                          * with the saved snap shot. We succeeded to
5444                          * update the size of the main buffer, but failed to
5445                          * update the size of the max buffer. But when we tried
5446                          * to reset the main buffer to the original size, we
5447                          * failed there too. This is very unlikely to
5448                          * happen, but if it does, warn and kill all
5449                          * tracing.
5450                          */
5451                         WARN_ON(1);
5452                         tracing_disabled = 1;
5453                 }
5454                 return ret;
5455         }
5456
5457         if (cpu == RING_BUFFER_ALL_CPUS)
5458                 set_buffer_entries(&tr->max_buffer, size);
5459         else
5460                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5461
5462  out:
5463 #endif /* CONFIG_TRACER_MAX_TRACE */
5464
5465         if (cpu == RING_BUFFER_ALL_CPUS)
5466                 set_buffer_entries(&tr->trace_buffer, size);
5467         else
5468                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5469
5470         return ret;
5471 }
5472
5473 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5474                                           unsigned long size, int cpu_id)
5475 {
5476         int ret = size;
5477
5478         mutex_lock(&trace_types_lock);
5479
5480         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5481                 /* make sure, this cpu is enabled in the mask */
5482                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5483                         ret = -EINVAL;
5484                         goto out;
5485                 }
5486         }
5487
5488         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5489         if (ret < 0)
5490                 ret = -ENOMEM;
5491
5492 out:
5493         mutex_unlock(&trace_types_lock);
5494
5495         return ret;
5496 }
5497
5498
5499 /**
5500  * tracing_update_buffers - used by tracing facility to expand ring buffers
5501  *
5502  * To save on memory when the tracing is never used on a system with it
5503  * configured in. The ring buffers are set to a minimum size. But once
5504  * a user starts to use the tracing facility, then they need to grow
5505  * to their default size.
5506  *
5507  * This function is to be called when a tracer is about to be used.
5508  */
5509 int tracing_update_buffers(void)
5510 {
5511         int ret = 0;
5512
5513         mutex_lock(&trace_types_lock);
5514         if (!ring_buffer_expanded)
5515                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5516                                                 RING_BUFFER_ALL_CPUS);
5517         mutex_unlock(&trace_types_lock);
5518
5519         return ret;
5520 }
5521
5522 struct trace_option_dentry;
5523
5524 static void
5525 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5526
5527 /*
5528  * Used to clear out the tracer before deletion of an instance.
5529  * Must have trace_types_lock held.
5530  */
5531 static void tracing_set_nop(struct trace_array *tr)
5532 {
5533         if (tr->current_trace == &nop_trace)
5534                 return;
5535         
5536         tr->current_trace->enabled--;
5537
5538         if (tr->current_trace->reset)
5539                 tr->current_trace->reset(tr);
5540
5541         tr->current_trace = &nop_trace;
5542 }
5543
5544 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5545 {
5546         /* Only enable if the directory has been created already. */
5547         if (!tr->dir)
5548                 return;
5549
5550         create_trace_option_files(tr, t);
5551 }
5552
5553 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5554 {
5555         struct tracer *t;
5556 #ifdef CONFIG_TRACER_MAX_TRACE
5557         bool had_max_tr;
5558 #endif
5559         int ret = 0;
5560
5561         mutex_lock(&trace_types_lock);
5562
5563         if (!ring_buffer_expanded) {
5564                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5565                                                 RING_BUFFER_ALL_CPUS);
5566                 if (ret < 0)
5567                         goto out;
5568                 ret = 0;
5569         }
5570
5571         for (t = trace_types; t; t = t->next) {
5572                 if (strcmp(t->name, buf) == 0)
5573                         break;
5574         }
5575         if (!t) {
5576                 ret = -EINVAL;
5577                 goto out;
5578         }
5579         if (t == tr->current_trace)
5580                 goto out;
5581
5582 #ifdef CONFIG_TRACER_SNAPSHOT
5583         if (t->use_max_tr) {
5584                 arch_spin_lock(&tr->max_lock);
5585                 if (tr->cond_snapshot)
5586                         ret = -EBUSY;
5587                 arch_spin_unlock(&tr->max_lock);
5588                 if (ret)
5589                         goto out;
5590         }
5591 #endif
5592         /* Some tracers won't work on kernel command line */
5593         if (system_state < SYSTEM_RUNNING && t->noboot) {
5594                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5595                         t->name);
5596                 goto out;
5597         }
5598
5599         /* Some tracers are only allowed for the top level buffer */
5600         if (!trace_ok_for_array(t, tr)) {
5601                 ret = -EINVAL;
5602                 goto out;
5603         }
5604
5605         /* If trace pipe files are being read, we can't change the tracer */
5606         if (tr->current_trace->ref) {
5607                 ret = -EBUSY;
5608                 goto out;
5609         }
5610
5611         trace_branch_disable();
5612
5613         tr->current_trace->enabled--;
5614
5615         if (tr->current_trace->reset)
5616                 tr->current_trace->reset(tr);
5617
5618         /* Current trace needs to be nop_trace before synchronize_rcu */
5619         tr->current_trace = &nop_trace;
5620
5621 #ifdef CONFIG_TRACER_MAX_TRACE
5622         had_max_tr = tr->allocated_snapshot;
5623
5624         if (had_max_tr && !t->use_max_tr) {
5625                 /*
5626                  * We need to make sure that the update_max_tr sees that
5627                  * current_trace changed to nop_trace to keep it from
5628                  * swapping the buffers after we resize it.
5629                  * The update_max_tr is called from interrupts disabled
5630                  * so a synchronized_sched() is sufficient.
5631                  */
5632                 synchronize_rcu();
5633                 free_snapshot(tr);
5634         }
5635 #endif
5636
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638         if (t->use_max_tr && !had_max_tr) {
5639                 ret = tracing_alloc_snapshot_instance(tr);
5640                 if (ret < 0)
5641                         goto out;
5642         }
5643 #endif
5644
5645         if (t->init) {
5646                 ret = tracer_init(t, tr);
5647                 if (ret)
5648                         goto out;
5649         }
5650
5651         tr->current_trace = t;
5652         tr->current_trace->enabled++;
5653         trace_branch_enable(tr);
5654  out:
5655         mutex_unlock(&trace_types_lock);
5656
5657         return ret;
5658 }
5659
5660 static ssize_t
5661 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5662                         size_t cnt, loff_t *ppos)
5663 {
5664         struct trace_array *tr = filp->private_data;
5665         char buf[MAX_TRACER_SIZE+1];
5666         int i;
5667         size_t ret;
5668         int err;
5669
5670         ret = cnt;
5671
5672         if (cnt > MAX_TRACER_SIZE)
5673                 cnt = MAX_TRACER_SIZE;
5674
5675         if (copy_from_user(buf, ubuf, cnt))
5676                 return -EFAULT;
5677
5678         buf[cnt] = 0;
5679
5680         /* strip ending whitespace. */
5681         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5682                 buf[i] = 0;
5683
5684         err = tracing_set_tracer(tr, buf);
5685         if (err)
5686                 return err;
5687
5688         *ppos += ret;
5689
5690         return ret;
5691 }
5692
5693 static ssize_t
5694 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5695                    size_t cnt, loff_t *ppos)
5696 {
5697         char buf[64];
5698         int r;
5699
5700         r = snprintf(buf, sizeof(buf), "%ld\n",
5701                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5702         if (r > sizeof(buf))
5703                 r = sizeof(buf);
5704         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5705 }
5706
5707 static ssize_t
5708 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5709                     size_t cnt, loff_t *ppos)
5710 {
5711         unsigned long val;
5712         int ret;
5713
5714         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5715         if (ret)
5716                 return ret;
5717
5718         *ptr = val * 1000;
5719
5720         return cnt;
5721 }
5722
5723 static ssize_t
5724 tracing_thresh_read(struct file *filp, char __user *ubuf,
5725                     size_t cnt, loff_t *ppos)
5726 {
5727         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5728 }
5729
5730 static ssize_t
5731 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5732                      size_t cnt, loff_t *ppos)
5733 {
5734         struct trace_array *tr = filp->private_data;
5735         int ret;
5736
5737         mutex_lock(&trace_types_lock);
5738         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5739         if (ret < 0)
5740                 goto out;
5741
5742         if (tr->current_trace->update_thresh) {
5743                 ret = tr->current_trace->update_thresh(tr);
5744                 if (ret < 0)
5745                         goto out;
5746         }
5747
5748         ret = cnt;
5749 out:
5750         mutex_unlock(&trace_types_lock);
5751
5752         return ret;
5753 }
5754
5755 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5756
5757 static ssize_t
5758 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5759                      size_t cnt, loff_t *ppos)
5760 {
5761         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5762 }
5763
5764 static ssize_t
5765 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5766                       size_t cnt, loff_t *ppos)
5767 {
5768         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5769 }
5770
5771 #endif
5772
5773 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5774 {
5775         struct trace_array *tr = inode->i_private;
5776         struct trace_iterator *iter;
5777         int ret = 0;
5778
5779         if (tracing_disabled)
5780                 return -ENODEV;
5781
5782         if (trace_array_get(tr) < 0)
5783                 return -ENODEV;
5784
5785         mutex_lock(&trace_types_lock);
5786
5787         /* create a buffer to store the information to pass to userspace */
5788         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5789         if (!iter) {
5790                 ret = -ENOMEM;
5791                 __trace_array_put(tr);
5792                 goto out;
5793         }
5794
5795         trace_seq_init(&iter->seq);
5796         iter->trace = tr->current_trace;
5797
5798         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5799                 ret = -ENOMEM;
5800                 goto fail;
5801         }
5802
5803         /* trace pipe does not show start of buffer */
5804         cpumask_setall(iter->started);
5805
5806         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5807                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5808
5809         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5810         if (trace_clocks[tr->clock_id].in_ns)
5811                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5812
5813         iter->tr = tr;
5814         iter->trace_buffer = &tr->trace_buffer;
5815         iter->cpu_file = tracing_get_cpu(inode);
5816         mutex_init(&iter->mutex);
5817         filp->private_data = iter;
5818
5819         if (iter->trace->pipe_open)
5820                 iter->trace->pipe_open(iter);
5821
5822         nonseekable_open(inode, filp);
5823
5824         tr->current_trace->ref++;
5825 out:
5826         mutex_unlock(&trace_types_lock);
5827         return ret;
5828
5829 fail:
5830         kfree(iter);
5831         __trace_array_put(tr);
5832         mutex_unlock(&trace_types_lock);
5833         return ret;
5834 }
5835
5836 static int tracing_release_pipe(struct inode *inode, struct file *file)
5837 {
5838         struct trace_iterator *iter = file->private_data;
5839         struct trace_array *tr = inode->i_private;
5840
5841         mutex_lock(&trace_types_lock);
5842
5843         tr->current_trace->ref--;
5844
5845         if (iter->trace->pipe_close)
5846                 iter->trace->pipe_close(iter);
5847
5848         mutex_unlock(&trace_types_lock);
5849
5850         free_cpumask_var(iter->started);
5851         mutex_destroy(&iter->mutex);
5852         kfree(iter);
5853
5854         trace_array_put(tr);
5855
5856         return 0;
5857 }
5858
5859 static __poll_t
5860 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5861 {
5862         struct trace_array *tr = iter->tr;
5863
5864         /* Iterators are static, they should be filled or empty */
5865         if (trace_buffer_iter(iter, iter->cpu_file))
5866                 return EPOLLIN | EPOLLRDNORM;
5867
5868         if (tr->trace_flags & TRACE_ITER_BLOCK)
5869                 /*
5870                  * Always select as readable when in blocking mode
5871                  */
5872                 return EPOLLIN | EPOLLRDNORM;
5873         else
5874                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5875                                              filp, poll_table);
5876 }
5877
5878 static __poll_t
5879 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5880 {
5881         struct trace_iterator *iter = filp->private_data;
5882
5883         return trace_poll(iter, filp, poll_table);
5884 }
5885
5886 /* Must be called with iter->mutex held. */
5887 static int tracing_wait_pipe(struct file *filp)
5888 {
5889         struct trace_iterator *iter = filp->private_data;
5890         int ret;
5891
5892         while (trace_empty(iter)) {
5893
5894                 if ((filp->f_flags & O_NONBLOCK)) {
5895                         return -EAGAIN;
5896                 }
5897
5898                 /*
5899                  * We block until we read something and tracing is disabled.
5900                  * We still block if tracing is disabled, but we have never
5901                  * read anything. This allows a user to cat this file, and
5902                  * then enable tracing. But after we have read something,
5903                  * we give an EOF when tracing is again disabled.
5904                  *
5905                  * iter->pos will be 0 if we haven't read anything.
5906                  */
5907                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5908                         break;
5909
5910                 mutex_unlock(&iter->mutex);
5911
5912                 ret = wait_on_pipe(iter, 0);
5913
5914                 mutex_lock(&iter->mutex);
5915
5916                 if (ret)
5917                         return ret;
5918         }
5919
5920         return 1;
5921 }
5922
5923 /*
5924  * Consumer reader.
5925  */
5926 static ssize_t
5927 tracing_read_pipe(struct file *filp, char __user *ubuf,
5928                   size_t cnt, loff_t *ppos)
5929 {
5930         struct trace_iterator *iter = filp->private_data;
5931         ssize_t sret;
5932
5933         /*
5934          * Avoid more than one consumer on a single file descriptor
5935          * This is just a matter of traces coherency, the ring buffer itself
5936          * is protected.
5937          */
5938         mutex_lock(&iter->mutex);
5939
5940         /* return any leftover data */
5941         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5942         if (sret != -EBUSY)
5943                 goto out;
5944
5945         trace_seq_init(&iter->seq);
5946
5947         if (iter->trace->read) {
5948                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5949                 if (sret)
5950                         goto out;
5951         }
5952
5953 waitagain:
5954         sret = tracing_wait_pipe(filp);
5955         if (sret <= 0)
5956                 goto out;
5957
5958         /* stop when tracing is finished */
5959         if (trace_empty(iter)) {
5960                 sret = 0;
5961                 goto out;
5962         }
5963
5964         if (cnt >= PAGE_SIZE)
5965                 cnt = PAGE_SIZE - 1;
5966
5967         /* reset all but tr, trace, and overruns */
5968         memset(&iter->seq, 0,
5969                sizeof(struct trace_iterator) -
5970                offsetof(struct trace_iterator, seq));
5971         cpumask_clear(iter->started);
5972         iter->pos = -1;
5973
5974         trace_event_read_lock();
5975         trace_access_lock(iter->cpu_file);
5976         while (trace_find_next_entry_inc(iter) != NULL) {
5977                 enum print_line_t ret;
5978                 int save_len = iter->seq.seq.len;
5979
5980                 ret = print_trace_line(iter);
5981                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5982                         /* don't print partial lines */
5983                         iter->seq.seq.len = save_len;
5984                         break;
5985                 }
5986                 if (ret != TRACE_TYPE_NO_CONSUME)
5987                         trace_consume(iter);
5988
5989                 if (trace_seq_used(&iter->seq) >= cnt)
5990                         break;
5991
5992                 /*
5993                  * Setting the full flag means we reached the trace_seq buffer
5994                  * size and we should leave by partial output condition above.
5995                  * One of the trace_seq_* functions is not used properly.
5996                  */
5997                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5998                           iter->ent->type);
5999         }
6000         trace_access_unlock(iter->cpu_file);
6001         trace_event_read_unlock();
6002
6003         /* Now copy what we have to the user */
6004         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6005         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6006                 trace_seq_init(&iter->seq);
6007
6008         /*
6009          * If there was nothing to send to user, in spite of consuming trace
6010          * entries, go back to wait for more entries.
6011          */
6012         if (sret == -EBUSY)
6013                 goto waitagain;
6014
6015 out:
6016         mutex_unlock(&iter->mutex);
6017
6018         return sret;
6019 }
6020
6021 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6022                                      unsigned int idx)
6023 {
6024         __free_page(spd->pages[idx]);
6025 }
6026
6027 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6028         .confirm                = generic_pipe_buf_confirm,
6029         .release                = generic_pipe_buf_release,
6030         .steal                  = generic_pipe_buf_steal,
6031         .get                    = generic_pipe_buf_get,
6032 };
6033
6034 static size_t
6035 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6036 {
6037         size_t count;
6038         int save_len;
6039         int ret;
6040
6041         /* Seq buffer is page-sized, exactly what we need. */
6042         for (;;) {
6043                 save_len = iter->seq.seq.len;
6044                 ret = print_trace_line(iter);
6045
6046                 if (trace_seq_has_overflowed(&iter->seq)) {
6047                         iter->seq.seq.len = save_len;
6048                         break;
6049                 }
6050
6051                 /*
6052                  * This should not be hit, because it should only
6053                  * be set if the iter->seq overflowed. But check it
6054                  * anyway to be safe.
6055                  */
6056                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6057                         iter->seq.seq.len = save_len;
6058                         break;
6059                 }
6060
6061                 count = trace_seq_used(&iter->seq) - save_len;
6062                 if (rem < count) {
6063                         rem = 0;
6064                         iter->seq.seq.len = save_len;
6065                         break;
6066                 }
6067
6068                 if (ret != TRACE_TYPE_NO_CONSUME)
6069                         trace_consume(iter);
6070                 rem -= count;
6071                 if (!trace_find_next_entry_inc(iter))   {
6072                         rem = 0;
6073                         iter->ent = NULL;
6074                         break;
6075                 }
6076         }
6077
6078         return rem;
6079 }
6080
6081 static ssize_t tracing_splice_read_pipe(struct file *filp,
6082                                         loff_t *ppos,
6083                                         struct pipe_inode_info *pipe,
6084                                         size_t len,
6085                                         unsigned int flags)
6086 {
6087         struct page *pages_def[PIPE_DEF_BUFFERS];
6088         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6089         struct trace_iterator *iter = filp->private_data;
6090         struct splice_pipe_desc spd = {
6091                 .pages          = pages_def,
6092                 .partial        = partial_def,
6093                 .nr_pages       = 0, /* This gets updated below. */
6094                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6095                 .ops            = &tracing_pipe_buf_ops,
6096                 .spd_release    = tracing_spd_release_pipe,
6097         };
6098         ssize_t ret;
6099         size_t rem;
6100         unsigned int i;
6101
6102         if (splice_grow_spd(pipe, &spd))
6103                 return -ENOMEM;
6104
6105         mutex_lock(&iter->mutex);
6106
6107         if (iter->trace->splice_read) {
6108                 ret = iter->trace->splice_read(iter, filp,
6109                                                ppos, pipe, len, flags);
6110                 if (ret)
6111                         goto out_err;
6112         }
6113
6114         ret = tracing_wait_pipe(filp);
6115         if (ret <= 0)
6116                 goto out_err;
6117
6118         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6119                 ret = -EFAULT;
6120                 goto out_err;
6121         }
6122
6123         trace_event_read_lock();
6124         trace_access_lock(iter->cpu_file);
6125
6126         /* Fill as many pages as possible. */
6127         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6128                 spd.pages[i] = alloc_page(GFP_KERNEL);
6129                 if (!spd.pages[i])
6130                         break;
6131
6132                 rem = tracing_fill_pipe_page(rem, iter);
6133
6134                 /* Copy the data into the page, so we can start over. */
6135                 ret = trace_seq_to_buffer(&iter->seq,
6136                                           page_address(spd.pages[i]),
6137                                           trace_seq_used(&iter->seq));
6138                 if (ret < 0) {
6139                         __free_page(spd.pages[i]);
6140                         break;
6141                 }
6142                 spd.partial[i].offset = 0;
6143                 spd.partial[i].len = trace_seq_used(&iter->seq);
6144
6145                 trace_seq_init(&iter->seq);
6146         }
6147
6148         trace_access_unlock(iter->cpu_file);
6149         trace_event_read_unlock();
6150         mutex_unlock(&iter->mutex);
6151
6152         spd.nr_pages = i;
6153
6154         if (i)
6155                 ret = splice_to_pipe(pipe, &spd);
6156         else
6157                 ret = 0;
6158 out:
6159         splice_shrink_spd(&spd);
6160         return ret;
6161
6162 out_err:
6163         mutex_unlock(&iter->mutex);
6164         goto out;
6165 }
6166
6167 static ssize_t
6168 tracing_entries_read(struct file *filp, char __user *ubuf,
6169                      size_t cnt, loff_t *ppos)
6170 {
6171         struct inode *inode = file_inode(filp);
6172         struct trace_array *tr = inode->i_private;
6173         int cpu = tracing_get_cpu(inode);
6174         char buf[64];
6175         int r = 0;
6176         ssize_t ret;
6177
6178         mutex_lock(&trace_types_lock);
6179
6180         if (cpu == RING_BUFFER_ALL_CPUS) {
6181                 int cpu, buf_size_same;
6182                 unsigned long size;
6183
6184                 size = 0;
6185                 buf_size_same = 1;
6186                 /* check if all cpu sizes are same */
6187                 for_each_tracing_cpu(cpu) {
6188                         /* fill in the size from first enabled cpu */
6189                         if (size == 0)
6190                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6191                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6192                                 buf_size_same = 0;
6193                                 break;
6194                         }
6195                 }
6196
6197                 if (buf_size_same) {
6198                         if (!ring_buffer_expanded)
6199                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6200                                             size >> 10,
6201                                             trace_buf_size >> 10);
6202                         else
6203                                 r = sprintf(buf, "%lu\n", size >> 10);
6204                 } else
6205                         r = sprintf(buf, "X\n");
6206         } else
6207                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6208
6209         mutex_unlock(&trace_types_lock);
6210
6211         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6212         return ret;
6213 }
6214
6215 static ssize_t
6216 tracing_entries_write(struct file *filp, const char __user *ubuf,
6217                       size_t cnt, loff_t *ppos)
6218 {
6219         struct inode *inode = file_inode(filp);
6220         struct trace_array *tr = inode->i_private;
6221         unsigned long val;
6222         int ret;
6223
6224         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6225         if (ret)
6226                 return ret;
6227
6228         /* must have at least 1 entry */
6229         if (!val)
6230                 return -EINVAL;
6231
6232         /* value is in KB */
6233         val <<= 10;
6234         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6235         if (ret < 0)
6236                 return ret;
6237
6238         *ppos += cnt;
6239
6240         return cnt;
6241 }
6242
6243 static ssize_t
6244 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6245                                 size_t cnt, loff_t *ppos)
6246 {
6247         struct trace_array *tr = filp->private_data;
6248         char buf[64];
6249         int r, cpu;
6250         unsigned long size = 0, expanded_size = 0;
6251
6252         mutex_lock(&trace_types_lock);
6253         for_each_tracing_cpu(cpu) {
6254                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6255                 if (!ring_buffer_expanded)
6256                         expanded_size += trace_buf_size >> 10;
6257         }
6258         if (ring_buffer_expanded)
6259                 r = sprintf(buf, "%lu\n", size);
6260         else
6261                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6262         mutex_unlock(&trace_types_lock);
6263
6264         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6265 }
6266
6267 static ssize_t
6268 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6269                           size_t cnt, loff_t *ppos)
6270 {
6271         /*
6272          * There is no need to read what the user has written, this function
6273          * is just to make sure that there is no error when "echo" is used
6274          */
6275
6276         *ppos += cnt;
6277
6278         return cnt;
6279 }
6280
6281 static int
6282 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6283 {
6284         struct trace_array *tr = inode->i_private;
6285
6286         /* disable tracing ? */
6287         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6288                 tracer_tracing_off(tr);
6289         /* resize the ring buffer to 0 */
6290         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6291
6292         trace_array_put(tr);
6293
6294         return 0;
6295 }
6296
6297 static ssize_t
6298 tracing_mark_write(struct file *filp, const char __user *ubuf,
6299                                         size_t cnt, loff_t *fpos)
6300 {
6301         struct trace_array *tr = filp->private_data;
6302         struct ring_buffer_event *event;
6303         enum event_trigger_type tt = ETT_NONE;
6304         struct ring_buffer *buffer;
6305         struct print_entry *entry;
6306         unsigned long irq_flags;
6307         const char faulted[] = "<faulted>";
6308         ssize_t written;
6309         int size;
6310         int len;
6311
6312 /* Used in tracing_mark_raw_write() as well */
6313 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6314
6315         if (tracing_disabled)
6316                 return -EINVAL;
6317
6318         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6319                 return -EINVAL;
6320
6321         if (cnt > TRACE_BUF_SIZE)
6322                 cnt = TRACE_BUF_SIZE;
6323
6324         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6325
6326         local_save_flags(irq_flags);
6327         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6328
6329         /* If less than "<faulted>", then make sure we can still add that */
6330         if (cnt < FAULTED_SIZE)
6331                 size += FAULTED_SIZE - cnt;
6332
6333         buffer = tr->trace_buffer.buffer;
6334         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6335                                             irq_flags, preempt_count());
6336         if (unlikely(!event))
6337                 /* Ring buffer disabled, return as if not open for write */
6338                 return -EBADF;
6339
6340         entry = ring_buffer_event_data(event);
6341         entry->ip = _THIS_IP_;
6342
6343         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6344         if (len) {
6345                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6346                 cnt = FAULTED_SIZE;
6347                 written = -EFAULT;
6348         } else
6349                 written = cnt;
6350         len = cnt;
6351
6352         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6353                 /* do not add \n before testing triggers, but add \0 */
6354                 entry->buf[cnt] = '\0';
6355                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6356         }
6357
6358         if (entry->buf[cnt - 1] != '\n') {
6359                 entry->buf[cnt] = '\n';
6360                 entry->buf[cnt + 1] = '\0';
6361         } else
6362                 entry->buf[cnt] = '\0';
6363
6364         __buffer_unlock_commit(buffer, event);
6365
6366         if (tt)
6367                 event_triggers_post_call(tr->trace_marker_file, tt);
6368
6369         if (written > 0)
6370                 *fpos += written;
6371
6372         return written;
6373 }
6374
6375 /* Limit it for now to 3K (including tag) */
6376 #define RAW_DATA_MAX_SIZE (1024*3)
6377
6378 static ssize_t
6379 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6380                                         size_t cnt, loff_t *fpos)
6381 {
6382         struct trace_array *tr = filp->private_data;
6383         struct ring_buffer_event *event;
6384         struct ring_buffer *buffer;
6385         struct raw_data_entry *entry;
6386         const char faulted[] = "<faulted>";
6387         unsigned long irq_flags;
6388         ssize_t written;
6389         int size;
6390         int len;
6391
6392 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6393
6394         if (tracing_disabled)
6395                 return -EINVAL;
6396
6397         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6398                 return -EINVAL;
6399
6400         /* The marker must at least have a tag id */
6401         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6402                 return -EINVAL;
6403
6404         if (cnt > TRACE_BUF_SIZE)
6405                 cnt = TRACE_BUF_SIZE;
6406
6407         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6408
6409         local_save_flags(irq_flags);
6410         size = sizeof(*entry) + cnt;
6411         if (cnt < FAULT_SIZE_ID)
6412                 size += FAULT_SIZE_ID - cnt;
6413
6414         buffer = tr->trace_buffer.buffer;
6415         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6416                                             irq_flags, preempt_count());
6417         if (!event)
6418                 /* Ring buffer disabled, return as if not open for write */
6419                 return -EBADF;
6420
6421         entry = ring_buffer_event_data(event);
6422
6423         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6424         if (len) {
6425                 entry->id = -1;
6426                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6427                 written = -EFAULT;
6428         } else
6429                 written = cnt;
6430
6431         __buffer_unlock_commit(buffer, event);
6432
6433         if (written > 0)
6434                 *fpos += written;
6435
6436         return written;
6437 }
6438
6439 static int tracing_clock_show(struct seq_file *m, void *v)
6440 {
6441         struct trace_array *tr = m->private;
6442         int i;
6443
6444         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6445                 seq_printf(m,
6446                         "%s%s%s%s", i ? " " : "",
6447                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6448                         i == tr->clock_id ? "]" : "");
6449         seq_putc(m, '\n');
6450
6451         return 0;
6452 }
6453
6454 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6455 {
6456         int i;
6457
6458         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6459                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6460                         break;
6461         }
6462         if (i == ARRAY_SIZE(trace_clocks))
6463                 return -EINVAL;
6464
6465         mutex_lock(&trace_types_lock);
6466
6467         tr->clock_id = i;
6468
6469         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6470
6471         /*
6472          * New clock may not be consistent with the previous clock.
6473          * Reset the buffer so that it doesn't have incomparable timestamps.
6474          */
6475         tracing_reset_online_cpus(&tr->trace_buffer);
6476
6477 #ifdef CONFIG_TRACER_MAX_TRACE
6478         if (tr->max_buffer.buffer)
6479                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6480         tracing_reset_online_cpus(&tr->max_buffer);
6481 #endif
6482
6483         mutex_unlock(&trace_types_lock);
6484
6485         return 0;
6486 }
6487
6488 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6489                                    size_t cnt, loff_t *fpos)
6490 {
6491         struct seq_file *m = filp->private_data;
6492         struct trace_array *tr = m->private;
6493         char buf[64];
6494         const char *clockstr;
6495         int ret;
6496
6497         if (cnt >= sizeof(buf))
6498                 return -EINVAL;
6499
6500         if (copy_from_user(buf, ubuf, cnt))
6501                 return -EFAULT;
6502
6503         buf[cnt] = 0;
6504
6505         clockstr = strstrip(buf);
6506
6507         ret = tracing_set_clock(tr, clockstr);
6508         if (ret)
6509                 return ret;
6510
6511         *fpos += cnt;
6512
6513         return cnt;
6514 }
6515
6516 static int tracing_clock_open(struct inode *inode, struct file *file)
6517 {
6518         struct trace_array *tr = inode->i_private;
6519         int ret;
6520
6521         if (tracing_disabled)
6522                 return -ENODEV;
6523
6524         if (trace_array_get(tr))
6525                 return -ENODEV;
6526
6527         ret = single_open(file, tracing_clock_show, inode->i_private);
6528         if (ret < 0)
6529                 trace_array_put(tr);
6530
6531         return ret;
6532 }
6533
6534 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6535 {
6536         struct trace_array *tr = m->private;
6537
6538         mutex_lock(&trace_types_lock);
6539
6540         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6541                 seq_puts(m, "delta [absolute]\n");
6542         else
6543                 seq_puts(m, "[delta] absolute\n");
6544
6545         mutex_unlock(&trace_types_lock);
6546
6547         return 0;
6548 }
6549
6550 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6551 {
6552         struct trace_array *tr = inode->i_private;
6553         int ret;
6554
6555         if (tracing_disabled)
6556                 return -ENODEV;
6557
6558         if (trace_array_get(tr))
6559                 return -ENODEV;
6560
6561         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6562         if (ret < 0)
6563                 trace_array_put(tr);
6564
6565         return ret;
6566 }
6567
6568 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6569 {
6570         int ret = 0;
6571
6572         mutex_lock(&trace_types_lock);
6573
6574         if (abs && tr->time_stamp_abs_ref++)
6575                 goto out;
6576
6577         if (!abs) {
6578                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6579                         ret = -EINVAL;
6580                         goto out;
6581                 }
6582
6583                 if (--tr->time_stamp_abs_ref)
6584                         goto out;
6585         }
6586
6587         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6588
6589 #ifdef CONFIG_TRACER_MAX_TRACE
6590         if (tr->max_buffer.buffer)
6591                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6592 #endif
6593  out:
6594         mutex_unlock(&trace_types_lock);
6595
6596         return ret;
6597 }
6598
6599 struct ftrace_buffer_info {
6600         struct trace_iterator   iter;
6601         void                    *spare;
6602         unsigned int            spare_cpu;
6603         unsigned int            read;
6604 };
6605
6606 #ifdef CONFIG_TRACER_SNAPSHOT
6607 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6608 {
6609         struct trace_array *tr = inode->i_private;
6610         struct trace_iterator *iter;
6611         struct seq_file *m;
6612         int ret = 0;
6613
6614         if (trace_array_get(tr) < 0)
6615                 return -ENODEV;
6616
6617         if (file->f_mode & FMODE_READ) {
6618                 iter = __tracing_open(inode, file, true);
6619                 if (IS_ERR(iter))
6620                         ret = PTR_ERR(iter);
6621         } else {
6622                 /* Writes still need the seq_file to hold the private data */
6623                 ret = -ENOMEM;
6624                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6625                 if (!m)
6626                         goto out;
6627                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6628                 if (!iter) {
6629                         kfree(m);
6630                         goto out;
6631                 }
6632                 ret = 0;
6633
6634                 iter->tr = tr;
6635                 iter->trace_buffer = &tr->max_buffer;
6636                 iter->cpu_file = tracing_get_cpu(inode);
6637                 m->private = iter;
6638                 file->private_data = m;
6639         }
6640 out:
6641         if (ret < 0)
6642                 trace_array_put(tr);
6643
6644         return ret;
6645 }
6646
6647 static ssize_t
6648 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6649                        loff_t *ppos)
6650 {
6651         struct seq_file *m = filp->private_data;
6652         struct trace_iterator *iter = m->private;
6653         struct trace_array *tr = iter->tr;
6654         unsigned long val;
6655         int ret;
6656
6657         ret = tracing_update_buffers();
6658         if (ret < 0)
6659                 return ret;
6660
6661         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6662         if (ret)
6663                 return ret;
6664
6665         mutex_lock(&trace_types_lock);
6666
6667         if (tr->current_trace->use_max_tr) {
6668                 ret = -EBUSY;
6669                 goto out;
6670         }
6671
6672         arch_spin_lock(&tr->max_lock);
6673         if (tr->cond_snapshot)
6674                 ret = -EBUSY;
6675         arch_spin_unlock(&tr->max_lock);
6676         if (ret)
6677                 goto out;
6678
6679         switch (val) {
6680         case 0:
6681                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6682                         ret = -EINVAL;
6683                         break;
6684                 }
6685                 if (tr->allocated_snapshot)
6686                         free_snapshot(tr);
6687                 break;
6688         case 1:
6689 /* Only allow per-cpu swap if the ring buffer supports it */
6690 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6691                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6692                         ret = -EINVAL;
6693                         break;
6694                 }
6695 #endif
6696                 if (!tr->allocated_snapshot) {
6697                         ret = tracing_alloc_snapshot_instance(tr);
6698                         if (ret < 0)
6699                                 break;
6700                 }
6701                 local_irq_disable();
6702                 /* Now, we're going to swap */
6703                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6704                         update_max_tr(tr, current, smp_processor_id(), NULL);
6705                 else
6706                         update_max_tr_single(tr, current, iter->cpu_file);
6707                 local_irq_enable();
6708                 break;
6709         default:
6710                 if (tr->allocated_snapshot) {
6711                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6712                                 tracing_reset_online_cpus(&tr->max_buffer);
6713                         else
6714                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6715                 }
6716                 break;
6717         }
6718
6719         if (ret >= 0) {
6720                 *ppos += cnt;
6721                 ret = cnt;
6722         }
6723 out:
6724         mutex_unlock(&trace_types_lock);
6725         return ret;
6726 }
6727
6728 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6729 {
6730         struct seq_file *m = file->private_data;
6731         int ret;
6732
6733         ret = tracing_release(inode, file);
6734
6735         if (file->f_mode & FMODE_READ)
6736                 return ret;
6737
6738         /* If write only, the seq_file is just a stub */
6739         if (m)
6740                 kfree(m->private);
6741         kfree(m);
6742
6743         return 0;
6744 }
6745
6746 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6747 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6748                                     size_t count, loff_t *ppos);
6749 static int tracing_buffers_release(struct inode *inode, struct file *file);
6750 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6751                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6752
6753 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6754 {
6755         struct ftrace_buffer_info *info;
6756         int ret;
6757
6758         ret = tracing_buffers_open(inode, filp);
6759         if (ret < 0)
6760                 return ret;
6761
6762         info = filp->private_data;
6763
6764         if (info->iter.trace->use_max_tr) {
6765                 tracing_buffers_release(inode, filp);
6766                 return -EBUSY;
6767         }
6768
6769         info->iter.snapshot = true;
6770         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6771
6772         return ret;
6773 }
6774
6775 #endif /* CONFIG_TRACER_SNAPSHOT */
6776
6777
6778 static const struct file_operations tracing_thresh_fops = {
6779         .open           = tracing_open_generic,
6780         .read           = tracing_thresh_read,
6781         .write          = tracing_thresh_write,
6782         .llseek         = generic_file_llseek,
6783 };
6784
6785 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6786 static const struct file_operations tracing_max_lat_fops = {
6787         .open           = tracing_open_generic,
6788         .read           = tracing_max_lat_read,
6789         .write          = tracing_max_lat_write,
6790         .llseek         = generic_file_llseek,
6791 };
6792 #endif
6793
6794 static const struct file_operations set_tracer_fops = {
6795         .open           = tracing_open_generic,
6796         .read           = tracing_set_trace_read,
6797         .write          = tracing_set_trace_write,
6798         .llseek         = generic_file_llseek,
6799 };
6800
6801 static const struct file_operations tracing_pipe_fops = {
6802         .open           = tracing_open_pipe,
6803         .poll           = tracing_poll_pipe,
6804         .read           = tracing_read_pipe,
6805         .splice_read    = tracing_splice_read_pipe,
6806         .release        = tracing_release_pipe,
6807         .llseek         = no_llseek,
6808 };
6809
6810 static const struct file_operations tracing_entries_fops = {
6811         .open           = tracing_open_generic_tr,
6812         .read           = tracing_entries_read,
6813         .write          = tracing_entries_write,
6814         .llseek         = generic_file_llseek,
6815         .release        = tracing_release_generic_tr,
6816 };
6817
6818 static const struct file_operations tracing_total_entries_fops = {
6819         .open           = tracing_open_generic_tr,
6820         .read           = tracing_total_entries_read,
6821         .llseek         = generic_file_llseek,
6822         .release        = tracing_release_generic_tr,
6823 };
6824
6825 static const struct file_operations tracing_free_buffer_fops = {
6826         .open           = tracing_open_generic_tr,
6827         .write          = tracing_free_buffer_write,
6828         .release        = tracing_free_buffer_release,
6829 };
6830
6831 static const struct file_operations tracing_mark_fops = {
6832         .open           = tracing_open_generic_tr,
6833         .write          = tracing_mark_write,
6834         .llseek         = generic_file_llseek,
6835         .release        = tracing_release_generic_tr,
6836 };
6837
6838 static const struct file_operations tracing_mark_raw_fops = {
6839         .open           = tracing_open_generic_tr,
6840         .write          = tracing_mark_raw_write,
6841         .llseek         = generic_file_llseek,
6842         .release        = tracing_release_generic_tr,
6843 };
6844
6845 static const struct file_operations trace_clock_fops = {
6846         .open           = tracing_clock_open,
6847         .read           = seq_read,
6848         .llseek         = seq_lseek,
6849         .release        = tracing_single_release_tr,
6850         .write          = tracing_clock_write,
6851 };
6852
6853 static const struct file_operations trace_time_stamp_mode_fops = {
6854         .open           = tracing_time_stamp_mode_open,
6855         .read           = seq_read,
6856         .llseek         = seq_lseek,
6857         .release        = tracing_single_release_tr,
6858 };
6859
6860 #ifdef CONFIG_TRACER_SNAPSHOT
6861 static const struct file_operations snapshot_fops = {
6862         .open           = tracing_snapshot_open,
6863         .read           = seq_read,
6864         .write          = tracing_snapshot_write,
6865         .llseek         = tracing_lseek,
6866         .release        = tracing_snapshot_release,
6867 };
6868
6869 static const struct file_operations snapshot_raw_fops = {
6870         .open           = snapshot_raw_open,
6871         .read           = tracing_buffers_read,
6872         .release        = tracing_buffers_release,
6873         .splice_read    = tracing_buffers_splice_read,
6874         .llseek         = no_llseek,
6875 };
6876
6877 #endif /* CONFIG_TRACER_SNAPSHOT */
6878
6879 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6880 {
6881         struct trace_array *tr = inode->i_private;
6882         struct ftrace_buffer_info *info;
6883         int ret;
6884
6885         if (tracing_disabled)
6886                 return -ENODEV;
6887
6888         if (trace_array_get(tr) < 0)
6889                 return -ENODEV;
6890
6891         info = kzalloc(sizeof(*info), GFP_KERNEL);
6892         if (!info) {
6893                 trace_array_put(tr);
6894                 return -ENOMEM;
6895         }
6896
6897         mutex_lock(&trace_types_lock);
6898
6899         info->iter.tr           = tr;
6900         info->iter.cpu_file     = tracing_get_cpu(inode);
6901         info->iter.trace        = tr->current_trace;
6902         info->iter.trace_buffer = &tr->trace_buffer;
6903         info->spare             = NULL;
6904         /* Force reading ring buffer for first read */
6905         info->read              = (unsigned int)-1;
6906
6907         filp->private_data = info;
6908
6909         tr->current_trace->ref++;
6910
6911         mutex_unlock(&trace_types_lock);
6912
6913         ret = nonseekable_open(inode, filp);
6914         if (ret < 0)
6915                 trace_array_put(tr);
6916
6917         return ret;
6918 }
6919
6920 static __poll_t
6921 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6922 {
6923         struct ftrace_buffer_info *info = filp->private_data;
6924         struct trace_iterator *iter = &info->iter;
6925
6926         return trace_poll(iter, filp, poll_table);
6927 }
6928
6929 static ssize_t
6930 tracing_buffers_read(struct file *filp, char __user *ubuf,
6931                      size_t count, loff_t *ppos)
6932 {
6933         struct ftrace_buffer_info *info = filp->private_data;
6934         struct trace_iterator *iter = &info->iter;
6935         ssize_t ret = 0;
6936         ssize_t size;
6937
6938         if (!count)
6939                 return 0;
6940
6941 #ifdef CONFIG_TRACER_MAX_TRACE
6942         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6943                 return -EBUSY;
6944 #endif
6945
6946         if (!info->spare) {
6947                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6948                                                           iter->cpu_file);
6949                 if (IS_ERR(info->spare)) {
6950                         ret = PTR_ERR(info->spare);
6951                         info->spare = NULL;
6952                 } else {
6953                         info->spare_cpu = iter->cpu_file;
6954                 }
6955         }
6956         if (!info->spare)
6957                 return ret;
6958
6959         /* Do we have previous read data to read? */
6960         if (info->read < PAGE_SIZE)
6961                 goto read;
6962
6963  again:
6964         trace_access_lock(iter->cpu_file);
6965         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6966                                     &info->spare,
6967                                     count,
6968                                     iter->cpu_file, 0);
6969         trace_access_unlock(iter->cpu_file);
6970
6971         if (ret < 0) {
6972                 if (trace_empty(iter)) {
6973                         if ((filp->f_flags & O_NONBLOCK))
6974                                 return -EAGAIN;
6975
6976                         ret = wait_on_pipe(iter, 0);
6977                         if (ret)
6978                                 return ret;
6979
6980                         goto again;
6981                 }
6982                 return 0;
6983         }
6984
6985         info->read = 0;
6986  read:
6987         size = PAGE_SIZE - info->read;
6988         if (size > count)
6989                 size = count;
6990
6991         ret = copy_to_user(ubuf, info->spare + info->read, size);
6992         if (ret == size)
6993                 return -EFAULT;
6994
6995         size -= ret;
6996
6997         *ppos += size;
6998         info->read += size;
6999
7000         return size;
7001 }
7002
7003 static int tracing_buffers_release(struct inode *inode, struct file *file)
7004 {
7005         struct ftrace_buffer_info *info = file->private_data;
7006         struct trace_iterator *iter = &info->iter;
7007
7008         mutex_lock(&trace_types_lock);
7009
7010         iter->tr->current_trace->ref--;
7011
7012         __trace_array_put(iter->tr);
7013
7014         if (info->spare)
7015                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7016                                            info->spare_cpu, info->spare);
7017         kfree(info);
7018
7019         mutex_unlock(&trace_types_lock);
7020
7021         return 0;
7022 }
7023
7024 struct buffer_ref {
7025         struct ring_buffer      *buffer;
7026         void                    *page;
7027         int                     cpu;
7028         int                     ref;
7029 };
7030
7031 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7032                                     struct pipe_buffer *buf)
7033 {
7034         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7035
7036         if (--ref->ref)
7037                 return;
7038
7039         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7040         kfree(ref);
7041         buf->private = 0;
7042 }
7043
7044 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7045                                 struct pipe_buffer *buf)
7046 {
7047         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7048
7049         if (ref->ref > INT_MAX/2)
7050                 return false;
7051
7052         ref->ref++;
7053         return true;
7054 }
7055
7056 /* Pipe buffer operations for a buffer. */
7057 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7058         .confirm                = generic_pipe_buf_confirm,
7059         .release                = buffer_pipe_buf_release,
7060         .steal                  = generic_pipe_buf_steal,
7061         .get                    = buffer_pipe_buf_get,
7062 };
7063
7064 /*
7065  * Callback from splice_to_pipe(), if we need to release some pages
7066  * at the end of the spd in case we error'ed out in filling the pipe.
7067  */
7068 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7069 {
7070         struct buffer_ref *ref =
7071                 (struct buffer_ref *)spd->partial[i].private;
7072
7073         if (--ref->ref)
7074                 return;
7075
7076         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7077         kfree(ref);
7078         spd->partial[i].private = 0;
7079 }
7080
7081 static ssize_t
7082 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7083                             struct pipe_inode_info *pipe, size_t len,
7084                             unsigned int flags)
7085 {
7086         struct ftrace_buffer_info *info = file->private_data;
7087         struct trace_iterator *iter = &info->iter;
7088         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7089         struct page *pages_def[PIPE_DEF_BUFFERS];
7090         struct splice_pipe_desc spd = {
7091                 .pages          = pages_def,
7092                 .partial        = partial_def,
7093                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7094                 .ops            = &buffer_pipe_buf_ops,
7095                 .spd_release    = buffer_spd_release,
7096         };
7097         struct buffer_ref *ref;
7098         int entries, i;
7099         ssize_t ret = 0;
7100
7101 #ifdef CONFIG_TRACER_MAX_TRACE
7102         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7103                 return -EBUSY;
7104 #endif
7105
7106         if (*ppos & (PAGE_SIZE - 1))
7107                 return -EINVAL;
7108
7109         if (len & (PAGE_SIZE - 1)) {
7110                 if (len < PAGE_SIZE)
7111                         return -EINVAL;
7112                 len &= PAGE_MASK;
7113         }
7114
7115         if (splice_grow_spd(pipe, &spd))
7116                 return -ENOMEM;
7117
7118  again:
7119         trace_access_lock(iter->cpu_file);
7120         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7121
7122         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7123                 struct page *page;
7124                 int r;
7125
7126                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7127                 if (!ref) {
7128                         ret = -ENOMEM;
7129                         break;
7130                 }
7131
7132                 ref->ref = 1;
7133                 ref->buffer = iter->trace_buffer->buffer;
7134                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7135                 if (IS_ERR(ref->page)) {
7136                         ret = PTR_ERR(ref->page);
7137                         ref->page = NULL;
7138                         kfree(ref);
7139                         break;
7140                 }
7141                 ref->cpu = iter->cpu_file;
7142
7143                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7144                                           len, iter->cpu_file, 1);
7145                 if (r < 0) {
7146                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7147                                                    ref->page);
7148                         kfree(ref);
7149                         break;
7150                 }
7151
7152                 page = virt_to_page(ref->page);
7153
7154                 spd.pages[i] = page;
7155                 spd.partial[i].len = PAGE_SIZE;
7156                 spd.partial[i].offset = 0;
7157                 spd.partial[i].private = (unsigned long)ref;
7158                 spd.nr_pages++;
7159                 *ppos += PAGE_SIZE;
7160
7161                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7162         }
7163
7164         trace_access_unlock(iter->cpu_file);
7165         spd.nr_pages = i;
7166
7167         /* did we read anything? */
7168         if (!spd.nr_pages) {
7169                 if (ret)
7170                         goto out;
7171
7172                 ret = -EAGAIN;
7173                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7174                         goto out;
7175
7176                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7177                 if (ret)
7178                         goto out;
7179
7180                 goto again;
7181         }
7182
7183         ret = splice_to_pipe(pipe, &spd);
7184 out:
7185         splice_shrink_spd(&spd);
7186
7187         return ret;
7188 }
7189
7190 static const struct file_operations tracing_buffers_fops = {
7191         .open           = tracing_buffers_open,
7192         .read           = tracing_buffers_read,
7193         .poll           = tracing_buffers_poll,
7194         .release        = tracing_buffers_release,
7195         .splice_read    = tracing_buffers_splice_read,
7196         .llseek         = no_llseek,
7197 };
7198
7199 static ssize_t
7200 tracing_stats_read(struct file *filp, char __user *ubuf,
7201                    size_t count, loff_t *ppos)
7202 {
7203         struct inode *inode = file_inode(filp);
7204         struct trace_array *tr = inode->i_private;
7205         struct trace_buffer *trace_buf = &tr->trace_buffer;
7206         int cpu = tracing_get_cpu(inode);
7207         struct trace_seq *s;
7208         unsigned long cnt;
7209         unsigned long long t;
7210         unsigned long usec_rem;
7211
7212         s = kmalloc(sizeof(*s), GFP_KERNEL);
7213         if (!s)
7214                 return -ENOMEM;
7215
7216         trace_seq_init(s);
7217
7218         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7219         trace_seq_printf(s, "entries: %ld\n", cnt);
7220
7221         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7222         trace_seq_printf(s, "overrun: %ld\n", cnt);
7223
7224         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7225         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7226
7227         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7228         trace_seq_printf(s, "bytes: %ld\n", cnt);
7229
7230         if (trace_clocks[tr->clock_id].in_ns) {
7231                 /* local or global for trace_clock */
7232                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7233                 usec_rem = do_div(t, USEC_PER_SEC);
7234                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7235                                                                 t, usec_rem);
7236
7237                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7238                 usec_rem = do_div(t, USEC_PER_SEC);
7239                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7240         } else {
7241                 /* counter or tsc mode for trace_clock */
7242                 trace_seq_printf(s, "oldest event ts: %llu\n",
7243                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7244
7245                 trace_seq_printf(s, "now ts: %llu\n",
7246                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7247         }
7248
7249         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7250         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7251
7252         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7253         trace_seq_printf(s, "read events: %ld\n", cnt);
7254
7255         count = simple_read_from_buffer(ubuf, count, ppos,
7256                                         s->buffer, trace_seq_used(s));
7257
7258         kfree(s);
7259
7260         return count;
7261 }
7262
7263 static const struct file_operations tracing_stats_fops = {
7264         .open           = tracing_open_generic_tr,
7265         .read           = tracing_stats_read,
7266         .llseek         = generic_file_llseek,
7267         .release        = tracing_release_generic_tr,
7268 };
7269
7270 #ifdef CONFIG_DYNAMIC_FTRACE
7271
7272 static ssize_t
7273 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7274                   size_t cnt, loff_t *ppos)
7275 {
7276         unsigned long *p = filp->private_data;
7277         char buf[64]; /* Not too big for a shallow stack */
7278         int r;
7279
7280         r = scnprintf(buf, 63, "%ld", *p);
7281         buf[r++] = '\n';
7282
7283         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7284 }
7285
7286 static const struct file_operations tracing_dyn_info_fops = {
7287         .open           = tracing_open_generic,
7288         .read           = tracing_read_dyn_info,
7289         .llseek         = generic_file_llseek,
7290 };
7291 #endif /* CONFIG_DYNAMIC_FTRACE */
7292
7293 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7294 static void
7295 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7296                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7297                 void *data)
7298 {
7299         tracing_snapshot_instance(tr);
7300 }
7301
7302 static void
7303 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7304                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7305                       void *data)
7306 {
7307         struct ftrace_func_mapper *mapper = data;
7308         long *count = NULL;
7309
7310         if (mapper)
7311                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7312
7313         if (count) {
7314
7315                 if (*count <= 0)
7316                         return;
7317
7318                 (*count)--;
7319         }
7320
7321         tracing_snapshot_instance(tr);
7322 }
7323
7324 static int
7325 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7326                       struct ftrace_probe_ops *ops, void *data)
7327 {
7328         struct ftrace_func_mapper *mapper = data;
7329         long *count = NULL;
7330
7331         seq_printf(m, "%ps:", (void *)ip);
7332
7333         seq_puts(m, "snapshot");
7334
7335         if (mapper)
7336                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7337
7338         if (count)
7339                 seq_printf(m, ":count=%ld\n", *count);
7340         else
7341                 seq_puts(m, ":unlimited\n");
7342
7343         return 0;
7344 }
7345
7346 static int
7347 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7348                      unsigned long ip, void *init_data, void **data)
7349 {
7350         struct ftrace_func_mapper *mapper = *data;
7351
7352         if (!mapper) {
7353                 mapper = allocate_ftrace_func_mapper();
7354                 if (!mapper)
7355                         return -ENOMEM;
7356                 *data = mapper;
7357         }
7358
7359         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7360 }
7361
7362 static void
7363 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7364                      unsigned long ip, void *data)
7365 {
7366         struct ftrace_func_mapper *mapper = data;
7367
7368         if (!ip) {
7369                 if (!mapper)
7370                         return;
7371                 free_ftrace_func_mapper(mapper, NULL);
7372                 return;
7373         }
7374
7375         ftrace_func_mapper_remove_ip(mapper, ip);
7376 }
7377
7378 static struct ftrace_probe_ops snapshot_probe_ops = {
7379         .func                   = ftrace_snapshot,
7380         .print                  = ftrace_snapshot_print,
7381 };
7382
7383 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7384         .func                   = ftrace_count_snapshot,
7385         .print                  = ftrace_snapshot_print,
7386         .init                   = ftrace_snapshot_init,
7387         .free                   = ftrace_snapshot_free,
7388 };
7389
7390 static int
7391 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7392                                char *glob, char *cmd, char *param, int enable)
7393 {
7394         struct ftrace_probe_ops *ops;
7395         void *count = (void *)-1;
7396         char *number;
7397         int ret;
7398
7399         if (!tr)
7400                 return -ENODEV;
7401
7402         /* hash funcs only work with set_ftrace_filter */
7403         if (!enable)
7404                 return -EINVAL;
7405
7406         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7407
7408         if (glob[0] == '!')
7409                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7410
7411         if (!param)
7412                 goto out_reg;
7413
7414         number = strsep(&param, ":");
7415
7416         if (!strlen(number))
7417                 goto out_reg;
7418
7419         /*
7420          * We use the callback data field (which is a pointer)
7421          * as our counter.
7422          */
7423         ret = kstrtoul(number, 0, (unsigned long *)&count);
7424         if (ret)
7425                 return ret;
7426
7427  out_reg:
7428         ret = tracing_alloc_snapshot_instance(tr);
7429         if (ret < 0)
7430                 goto out;
7431
7432         ret = register_ftrace_function_probe(glob, tr, ops, count);
7433
7434  out:
7435         return ret < 0 ? ret : 0;
7436 }
7437
7438 static struct ftrace_func_command ftrace_snapshot_cmd = {
7439         .name                   = "snapshot",
7440         .func                   = ftrace_trace_snapshot_callback,
7441 };
7442
7443 static __init int register_snapshot_cmd(void)
7444 {
7445         return register_ftrace_command(&ftrace_snapshot_cmd);
7446 }
7447 #else
7448 static inline __init int register_snapshot_cmd(void) { return 0; }
7449 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7450
7451 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7452 {
7453         if (WARN_ON(!tr->dir))
7454                 return ERR_PTR(-ENODEV);
7455
7456         /* Top directory uses NULL as the parent */
7457         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7458                 return NULL;
7459
7460         /* All sub buffers have a descriptor */
7461         return tr->dir;
7462 }
7463
7464 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7465 {
7466         struct dentry *d_tracer;
7467
7468         if (tr->percpu_dir)
7469                 return tr->percpu_dir;
7470
7471         d_tracer = tracing_get_dentry(tr);
7472         if (IS_ERR(d_tracer))
7473                 return NULL;
7474
7475         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7476
7477         WARN_ONCE(!tr->percpu_dir,
7478                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7479
7480         return tr->percpu_dir;
7481 }
7482
7483 static struct dentry *
7484 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7485                       void *data, long cpu, const struct file_operations *fops)
7486 {
7487         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7488
7489         if (ret) /* See tracing_get_cpu() */
7490                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7491         return ret;
7492 }
7493
7494 static void
7495 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7496 {
7497         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7498         struct dentry *d_cpu;
7499         char cpu_dir[30]; /* 30 characters should be more than enough */
7500
7501         if (!d_percpu)
7502                 return;
7503
7504         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7505         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7506         if (!d_cpu) {
7507                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7508                 return;
7509         }
7510
7511         /* per cpu trace_pipe */
7512         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7513                                 tr, cpu, &tracing_pipe_fops);
7514
7515         /* per cpu trace */
7516         trace_create_cpu_file("trace", 0644, d_cpu,
7517                                 tr, cpu, &tracing_fops);
7518
7519         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7520                                 tr, cpu, &tracing_buffers_fops);
7521
7522         trace_create_cpu_file("stats", 0444, d_cpu,
7523                                 tr, cpu, &tracing_stats_fops);
7524
7525         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7526                                 tr, cpu, &tracing_entries_fops);
7527
7528 #ifdef CONFIG_TRACER_SNAPSHOT
7529         trace_create_cpu_file("snapshot", 0644, d_cpu,
7530                                 tr, cpu, &snapshot_fops);
7531
7532         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7533                                 tr, cpu, &snapshot_raw_fops);
7534 #endif
7535 }
7536
7537 #ifdef CONFIG_FTRACE_SELFTEST
7538 /* Let selftest have access to static functions in this file */
7539 #include "trace_selftest.c"
7540 #endif
7541
7542 static ssize_t
7543 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7544                         loff_t *ppos)
7545 {
7546         struct trace_option_dentry *topt = filp->private_data;
7547         char *buf;
7548
7549         if (topt->flags->val & topt->opt->bit)
7550                 buf = "1\n";
7551         else
7552                 buf = "0\n";
7553
7554         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7555 }
7556
7557 static ssize_t
7558 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7559                          loff_t *ppos)
7560 {
7561         struct trace_option_dentry *topt = filp->private_data;
7562         unsigned long val;
7563         int ret;
7564
7565         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7566         if (ret)
7567                 return ret;
7568
7569         if (val != 0 && val != 1)
7570                 return -EINVAL;
7571
7572         if (!!(topt->flags->val & topt->opt->bit) != val) {
7573                 mutex_lock(&trace_types_lock);
7574                 ret = __set_tracer_option(topt->tr, topt->flags,
7575                                           topt->opt, !val);
7576                 mutex_unlock(&trace_types_lock);
7577                 if (ret)
7578                         return ret;
7579         }
7580
7581         *ppos += cnt;
7582
7583         return cnt;
7584 }
7585
7586
7587 static const struct file_operations trace_options_fops = {
7588         .open = tracing_open_generic,
7589         .read = trace_options_read,
7590         .write = trace_options_write,
7591         .llseek = generic_file_llseek,
7592 };
7593
7594 /*
7595  * In order to pass in both the trace_array descriptor as well as the index
7596  * to the flag that the trace option file represents, the trace_array
7597  * has a character array of trace_flags_index[], which holds the index
7598  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7599  * The address of this character array is passed to the flag option file
7600  * read/write callbacks.
7601  *
7602  * In order to extract both the index and the trace_array descriptor,
7603  * get_tr_index() uses the following algorithm.
7604  *
7605  *   idx = *ptr;
7606  *
7607  * As the pointer itself contains the address of the index (remember
7608  * index[1] == 1).
7609  *
7610  * Then to get the trace_array descriptor, by subtracting that index
7611  * from the ptr, we get to the start of the index itself.
7612  *
7613  *   ptr - idx == &index[0]
7614  *
7615  * Then a simple container_of() from that pointer gets us to the
7616  * trace_array descriptor.
7617  */
7618 static void get_tr_index(void *data, struct trace_array **ptr,
7619                          unsigned int *pindex)
7620 {
7621         *pindex = *(unsigned char *)data;
7622
7623         *ptr = container_of(data - *pindex, struct trace_array,
7624                             trace_flags_index);
7625 }
7626
7627 static ssize_t
7628 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7629                         loff_t *ppos)
7630 {
7631         void *tr_index = filp->private_data;
7632         struct trace_array *tr;
7633         unsigned int index;
7634         char *buf;
7635
7636         get_tr_index(tr_index, &tr, &index);
7637
7638         if (tr->trace_flags & (1 << index))
7639                 buf = "1\n";
7640         else
7641                 buf = "0\n";
7642
7643         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7644 }
7645
7646 static ssize_t
7647 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7648                          loff_t *ppos)
7649 {
7650         void *tr_index = filp->private_data;
7651         struct trace_array *tr;
7652         unsigned int index;
7653         unsigned long val;
7654         int ret;
7655
7656         get_tr_index(tr_index, &tr, &index);
7657
7658         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7659         if (ret)
7660                 return ret;
7661
7662         if (val != 0 && val != 1)
7663                 return -EINVAL;
7664
7665         mutex_lock(&trace_types_lock);
7666         ret = set_tracer_flag(tr, 1 << index, val);
7667         mutex_unlock(&trace_types_lock);
7668
7669         if (ret < 0)
7670                 return ret;
7671
7672         *ppos += cnt;
7673
7674         return cnt;
7675 }
7676
7677 static const struct file_operations trace_options_core_fops = {
7678         .open = tracing_open_generic,
7679         .read = trace_options_core_read,
7680         .write = trace_options_core_write,
7681         .llseek = generic_file_llseek,
7682 };
7683
7684 struct dentry *trace_create_file(const char *name,
7685                                  umode_t mode,
7686                                  struct dentry *parent,
7687                                  void *data,
7688                                  const struct file_operations *fops)
7689 {
7690         struct dentry *ret;
7691
7692         ret = tracefs_create_file(name, mode, parent, data, fops);
7693         if (!ret)
7694                 pr_warn("Could not create tracefs '%s' entry\n", name);
7695
7696         return ret;
7697 }
7698
7699
7700 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7701 {
7702         struct dentry *d_tracer;
7703
7704         if (tr->options)
7705                 return tr->options;
7706
7707         d_tracer = tracing_get_dentry(tr);
7708         if (IS_ERR(d_tracer))
7709                 return NULL;
7710
7711         tr->options = tracefs_create_dir("options", d_tracer);
7712         if (!tr->options) {
7713                 pr_warn("Could not create tracefs directory 'options'\n");
7714                 return NULL;
7715         }
7716
7717         return tr->options;
7718 }
7719
7720 static void
7721 create_trace_option_file(struct trace_array *tr,
7722                          struct trace_option_dentry *topt,
7723                          struct tracer_flags *flags,
7724                          struct tracer_opt *opt)
7725 {
7726         struct dentry *t_options;
7727
7728         t_options = trace_options_init_dentry(tr);
7729         if (!t_options)
7730                 return;
7731
7732         topt->flags = flags;
7733         topt->opt = opt;
7734         topt->tr = tr;
7735
7736         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7737                                     &trace_options_fops);
7738
7739 }
7740
7741 static void
7742 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7743 {
7744         struct trace_option_dentry *topts;
7745         struct trace_options *tr_topts;
7746         struct tracer_flags *flags;
7747         struct tracer_opt *opts;
7748         int cnt;
7749         int i;
7750
7751         if (!tracer)
7752                 return;
7753
7754         flags = tracer->flags;
7755
7756         if (!flags || !flags->opts)
7757                 return;
7758
7759         /*
7760          * If this is an instance, only create flags for tracers
7761          * the instance may have.
7762          */
7763         if (!trace_ok_for_array(tracer, tr))
7764                 return;
7765
7766         for (i = 0; i < tr->nr_topts; i++) {
7767                 /* Make sure there's no duplicate flags. */
7768                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7769                         return;
7770         }
7771
7772         opts = flags->opts;
7773
7774         for (cnt = 0; opts[cnt].name; cnt++)
7775                 ;
7776
7777         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7778         if (!topts)
7779                 return;
7780
7781         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7782                             GFP_KERNEL);
7783         if (!tr_topts) {
7784                 kfree(topts);
7785                 return;
7786         }
7787
7788         tr->topts = tr_topts;
7789         tr->topts[tr->nr_topts].tracer = tracer;
7790         tr->topts[tr->nr_topts].topts = topts;
7791         tr->nr_topts++;
7792
7793         for (cnt = 0; opts[cnt].name; cnt++) {
7794                 create_trace_option_file(tr, &topts[cnt], flags,
7795                                          &opts[cnt]);
7796                 WARN_ONCE(topts[cnt].entry == NULL,
7797                           "Failed to create trace option: %s",
7798                           opts[cnt].name);
7799         }
7800 }
7801
7802 static struct dentry *
7803 create_trace_option_core_file(struct trace_array *tr,
7804                               const char *option, long index)
7805 {
7806         struct dentry *t_options;
7807
7808         t_options = trace_options_init_dentry(tr);
7809         if (!t_options)
7810                 return NULL;
7811
7812         return trace_create_file(option, 0644, t_options,
7813                                  (void *)&tr->trace_flags_index[index],
7814                                  &trace_options_core_fops);
7815 }
7816
7817 static void create_trace_options_dir(struct trace_array *tr)
7818 {
7819         struct dentry *t_options;
7820         bool top_level = tr == &global_trace;
7821         int i;
7822
7823         t_options = trace_options_init_dentry(tr);
7824         if (!t_options)
7825                 return;
7826
7827         for (i = 0; trace_options[i]; i++) {
7828                 if (top_level ||
7829                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7830                         create_trace_option_core_file(tr, trace_options[i], i);
7831         }
7832 }
7833
7834 static ssize_t
7835 rb_simple_read(struct file *filp, char __user *ubuf,
7836                size_t cnt, loff_t *ppos)
7837 {
7838         struct trace_array *tr = filp->private_data;
7839         char buf[64];
7840         int r;
7841
7842         r = tracer_tracing_is_on(tr);
7843         r = sprintf(buf, "%d\n", r);
7844
7845         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7846 }
7847
7848 static ssize_t
7849 rb_simple_write(struct file *filp, const char __user *ubuf,
7850                 size_t cnt, loff_t *ppos)
7851 {
7852         struct trace_array *tr = filp->private_data;
7853         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7854         unsigned long val;
7855         int ret;
7856
7857         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7858         if (ret)
7859                 return ret;
7860
7861         if (buffer) {
7862                 mutex_lock(&trace_types_lock);
7863                 if (!!val == tracer_tracing_is_on(tr)) {
7864                         val = 0; /* do nothing */
7865                 } else if (val) {
7866                         tracer_tracing_on(tr);
7867                         if (tr->current_trace->start)
7868                                 tr->current_trace->start(tr);
7869                 } else {
7870                         tracer_tracing_off(tr);
7871                         if (tr->current_trace->stop)
7872                                 tr->current_trace->stop(tr);
7873                 }
7874                 mutex_unlock(&trace_types_lock);
7875         }
7876
7877         (*ppos)++;
7878
7879         return cnt;
7880 }
7881
7882 static const struct file_operations rb_simple_fops = {
7883         .open           = tracing_open_generic_tr,
7884         .read           = rb_simple_read,
7885         .write          = rb_simple_write,
7886         .release        = tracing_release_generic_tr,
7887         .llseek         = default_llseek,
7888 };
7889
7890 static ssize_t
7891 buffer_percent_read(struct file *filp, char __user *ubuf,
7892                     size_t cnt, loff_t *ppos)
7893 {
7894         struct trace_array *tr = filp->private_data;
7895         char buf[64];
7896         int r;
7897
7898         r = tr->buffer_percent;
7899         r = sprintf(buf, "%d\n", r);
7900
7901         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7902 }
7903
7904 static ssize_t
7905 buffer_percent_write(struct file *filp, const char __user *ubuf,
7906                      size_t cnt, loff_t *ppos)
7907 {
7908         struct trace_array *tr = filp->private_data;
7909         unsigned long val;
7910         int ret;
7911
7912         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7913         if (ret)
7914                 return ret;
7915
7916         if (val > 100)
7917                 return -EINVAL;
7918
7919         if (!val)
7920                 val = 1;
7921
7922         tr->buffer_percent = val;
7923
7924         (*ppos)++;
7925
7926         return cnt;
7927 }
7928
7929 static const struct file_operations buffer_percent_fops = {
7930         .open           = tracing_open_generic_tr,
7931         .read           = buffer_percent_read,
7932         .write          = buffer_percent_write,
7933         .release        = tracing_release_generic_tr,
7934         .llseek         = default_llseek,
7935 };
7936
7937 struct dentry *trace_instance_dir;
7938
7939 static void
7940 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7941
7942 static int
7943 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7944 {
7945         enum ring_buffer_flags rb_flags;
7946
7947         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7948
7949         buf->tr = tr;
7950
7951         buf->buffer = ring_buffer_alloc(size, rb_flags);
7952         if (!buf->buffer)
7953                 return -ENOMEM;
7954
7955         buf->data = alloc_percpu(struct trace_array_cpu);
7956         if (!buf->data) {
7957                 ring_buffer_free(buf->buffer);
7958                 buf->buffer = NULL;
7959                 return -ENOMEM;
7960         }
7961
7962         /* Allocate the first page for all buffers */
7963         set_buffer_entries(&tr->trace_buffer,
7964                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7965
7966         return 0;
7967 }
7968
7969 static int allocate_trace_buffers(struct trace_array *tr, int size)
7970 {
7971         int ret;
7972
7973         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7974         if (ret)
7975                 return ret;
7976
7977 #ifdef CONFIG_TRACER_MAX_TRACE
7978         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7979                                     allocate_snapshot ? size : 1);
7980         if (WARN_ON(ret)) {
7981                 ring_buffer_free(tr->trace_buffer.buffer);
7982                 tr->trace_buffer.buffer = NULL;
7983                 free_percpu(tr->trace_buffer.data);
7984                 tr->trace_buffer.data = NULL;
7985                 return -ENOMEM;
7986         }
7987         tr->allocated_snapshot = allocate_snapshot;
7988
7989         /*
7990          * Only the top level trace array gets its snapshot allocated
7991          * from the kernel command line.
7992          */
7993         allocate_snapshot = false;
7994 #endif
7995         return 0;
7996 }
7997
7998 static void free_trace_buffer(struct trace_buffer *buf)
7999 {
8000         if (buf->buffer) {
8001                 ring_buffer_free(buf->buffer);
8002                 buf->buffer = NULL;
8003                 free_percpu(buf->data);
8004                 buf->data = NULL;
8005         }
8006 }
8007
8008 static void free_trace_buffers(struct trace_array *tr)
8009 {
8010         if (!tr)
8011                 return;
8012
8013         free_trace_buffer(&tr->trace_buffer);
8014
8015 #ifdef CONFIG_TRACER_MAX_TRACE
8016         free_trace_buffer(&tr->max_buffer);
8017 #endif
8018 }
8019
8020 static void init_trace_flags_index(struct trace_array *tr)
8021 {
8022         int i;
8023
8024         /* Used by the trace options files */
8025         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8026                 tr->trace_flags_index[i] = i;
8027 }
8028
8029 static void __update_tracer_options(struct trace_array *tr)
8030 {
8031         struct tracer *t;
8032
8033         for (t = trace_types; t; t = t->next)
8034                 add_tracer_options(tr, t);
8035 }
8036
8037 static void update_tracer_options(struct trace_array *tr)
8038 {
8039         mutex_lock(&trace_types_lock);
8040         __update_tracer_options(tr);
8041         mutex_unlock(&trace_types_lock);
8042 }
8043
8044 static int instance_mkdir(const char *name)
8045 {
8046         struct trace_array *tr;
8047         int ret;
8048
8049         mutex_lock(&event_mutex);
8050         mutex_lock(&trace_types_lock);
8051
8052         ret = -EEXIST;
8053         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8054                 if (tr->name && strcmp(tr->name, name) == 0)
8055                         goto out_unlock;
8056         }
8057
8058         ret = -ENOMEM;
8059         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8060         if (!tr)
8061                 goto out_unlock;
8062
8063         tr->name = kstrdup(name, GFP_KERNEL);
8064         if (!tr->name)
8065                 goto out_free_tr;
8066
8067         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8068                 goto out_free_tr;
8069
8070         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8071
8072         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8073
8074         raw_spin_lock_init(&tr->start_lock);
8075
8076         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8077
8078         tr->current_trace = &nop_trace;
8079
8080         INIT_LIST_HEAD(&tr->systems);
8081         INIT_LIST_HEAD(&tr->events);
8082         INIT_LIST_HEAD(&tr->hist_vars);
8083
8084         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8085                 goto out_free_tr;
8086
8087         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8088         if (!tr->dir)
8089                 goto out_free_tr;
8090
8091         ret = event_trace_add_tracer(tr->dir, tr);
8092         if (ret) {
8093                 tracefs_remove_recursive(tr->dir);
8094                 goto out_free_tr;
8095         }
8096
8097         ftrace_init_trace_array(tr);
8098
8099         init_tracer_tracefs(tr, tr->dir);
8100         init_trace_flags_index(tr);
8101         __update_tracer_options(tr);
8102
8103         list_add(&tr->list, &ftrace_trace_arrays);
8104
8105         mutex_unlock(&trace_types_lock);
8106         mutex_unlock(&event_mutex);
8107
8108         return 0;
8109
8110  out_free_tr:
8111         free_trace_buffers(tr);
8112         free_cpumask_var(tr->tracing_cpumask);
8113         kfree(tr->name);
8114         kfree(tr);
8115
8116  out_unlock:
8117         mutex_unlock(&trace_types_lock);
8118         mutex_unlock(&event_mutex);
8119
8120         return ret;
8121
8122 }
8123
8124 static int instance_rmdir(const char *name)
8125 {
8126         struct trace_array *tr;
8127         int found = 0;
8128         int ret;
8129         int i;
8130
8131         mutex_lock(&event_mutex);
8132         mutex_lock(&trace_types_lock);
8133
8134         ret = -ENODEV;
8135         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8136                 if (tr->name && strcmp(tr->name, name) == 0) {
8137                         found = 1;
8138                         break;
8139                 }
8140         }
8141         if (!found)
8142                 goto out_unlock;
8143
8144         ret = -EBUSY;
8145         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8146                 goto out_unlock;
8147
8148         list_del(&tr->list);
8149
8150         /* Disable all the flags that were enabled coming in */
8151         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8152                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8153                         set_tracer_flag(tr, 1 << i, 0);
8154         }
8155
8156         tracing_set_nop(tr);
8157         clear_ftrace_function_probes(tr);
8158         event_trace_del_tracer(tr);
8159         ftrace_clear_pids(tr);
8160         ftrace_destroy_function_files(tr);
8161         tracefs_remove_recursive(tr->dir);
8162         free_trace_buffers(tr);
8163
8164         for (i = 0; i < tr->nr_topts; i++) {
8165                 kfree(tr->topts[i].topts);
8166         }
8167         kfree(tr->topts);
8168
8169         free_cpumask_var(tr->tracing_cpumask);
8170         kfree(tr->name);
8171         kfree(tr);
8172
8173         ret = 0;
8174
8175  out_unlock:
8176         mutex_unlock(&trace_types_lock);
8177         mutex_unlock(&event_mutex);
8178
8179         return ret;
8180 }
8181
8182 static __init void create_trace_instances(struct dentry *d_tracer)
8183 {
8184         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8185                                                          instance_mkdir,
8186                                                          instance_rmdir);
8187         if (WARN_ON(!trace_instance_dir))
8188                 return;
8189 }
8190
8191 static void
8192 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8193 {
8194         struct trace_event_file *file;
8195         int cpu;
8196
8197         trace_create_file("available_tracers", 0444, d_tracer,
8198                         tr, &show_traces_fops);
8199
8200         trace_create_file("current_tracer", 0644, d_tracer,
8201                         tr, &set_tracer_fops);
8202
8203         trace_create_file("tracing_cpumask", 0644, d_tracer,
8204                           tr, &tracing_cpumask_fops);
8205
8206         trace_create_file("trace_options", 0644, d_tracer,
8207                           tr, &tracing_iter_fops);
8208
8209         trace_create_file("trace", 0644, d_tracer,
8210                           tr, &tracing_fops);
8211
8212         trace_create_file("trace_pipe", 0444, d_tracer,
8213                           tr, &tracing_pipe_fops);
8214
8215         trace_create_file("buffer_size_kb", 0644, d_tracer,
8216                           tr, &tracing_entries_fops);
8217
8218         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8219                           tr, &tracing_total_entries_fops);
8220
8221         trace_create_file("free_buffer", 0200, d_tracer,
8222                           tr, &tracing_free_buffer_fops);
8223
8224         trace_create_file("trace_marker", 0220, d_tracer,
8225                           tr, &tracing_mark_fops);
8226
8227         file = __find_event_file(tr, "ftrace", "print");
8228         if (file && file->dir)
8229                 trace_create_file("trigger", 0644, file->dir, file,
8230                                   &event_trigger_fops);
8231         tr->trace_marker_file = file;
8232
8233         trace_create_file("trace_marker_raw", 0220, d_tracer,
8234                           tr, &tracing_mark_raw_fops);
8235
8236         trace_create_file("trace_clock", 0644, d_tracer, tr,
8237                           &trace_clock_fops);
8238
8239         trace_create_file("tracing_on", 0644, d_tracer,
8240                           tr, &rb_simple_fops);
8241
8242         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8243                           &trace_time_stamp_mode_fops);
8244
8245         tr->buffer_percent = 50;
8246
8247         trace_create_file("buffer_percent", 0444, d_tracer,
8248                         tr, &buffer_percent_fops);
8249
8250         create_trace_options_dir(tr);
8251
8252 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8253         trace_create_file("tracing_max_latency", 0644, d_tracer,
8254                         &tr->max_latency, &tracing_max_lat_fops);
8255 #endif
8256
8257         if (ftrace_create_function_files(tr, d_tracer))
8258                 WARN(1, "Could not allocate function filter files");
8259
8260 #ifdef CONFIG_TRACER_SNAPSHOT
8261         trace_create_file("snapshot", 0644, d_tracer,
8262                           tr, &snapshot_fops);
8263 #endif
8264
8265         for_each_tracing_cpu(cpu)
8266                 tracing_init_tracefs_percpu(tr, cpu);
8267
8268         ftrace_init_tracefs(tr, d_tracer);
8269 }
8270
8271 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8272 {
8273         struct vfsmount *mnt;
8274         struct file_system_type *type;
8275
8276         /*
8277          * To maintain backward compatibility for tools that mount
8278          * debugfs to get to the tracing facility, tracefs is automatically
8279          * mounted to the debugfs/tracing directory.
8280          */
8281         type = get_fs_type("tracefs");
8282         if (!type)
8283                 return NULL;
8284         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8285         put_filesystem(type);
8286         if (IS_ERR(mnt))
8287                 return NULL;
8288         mntget(mnt);
8289
8290         return mnt;
8291 }
8292
8293 /**
8294  * tracing_init_dentry - initialize top level trace array
8295  *
8296  * This is called when creating files or directories in the tracing
8297  * directory. It is called via fs_initcall() by any of the boot up code
8298  * and expects to return the dentry of the top level tracing directory.
8299  */
8300 struct dentry *tracing_init_dentry(void)
8301 {
8302         struct trace_array *tr = &global_trace;
8303
8304         /* The top level trace array uses  NULL as parent */
8305         if (tr->dir)
8306                 return NULL;
8307
8308         if (WARN_ON(!tracefs_initialized()) ||
8309                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8310                  WARN_ON(!debugfs_initialized())))
8311                 return ERR_PTR(-ENODEV);
8312
8313         /*
8314          * As there may still be users that expect the tracing
8315          * files to exist in debugfs/tracing, we must automount
8316          * the tracefs file system there, so older tools still
8317          * work with the newer kerenl.
8318          */
8319         tr->dir = debugfs_create_automount("tracing", NULL,
8320                                            trace_automount, NULL);
8321         if (!tr->dir) {
8322                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8323                 return ERR_PTR(-ENOMEM);
8324         }
8325
8326         return NULL;
8327 }
8328
8329 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8330 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8331
8332 static void __init trace_eval_init(void)
8333 {
8334         int len;
8335
8336         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8337         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8338 }
8339
8340 #ifdef CONFIG_MODULES
8341 static void trace_module_add_evals(struct module *mod)
8342 {
8343         if (!mod->num_trace_evals)
8344                 return;
8345
8346         /*
8347          * Modules with bad taint do not have events created, do
8348          * not bother with enums either.
8349          */
8350         if (trace_module_has_bad_taint(mod))
8351                 return;
8352
8353         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8354 }
8355
8356 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8357 static void trace_module_remove_evals(struct module *mod)
8358 {
8359         union trace_eval_map_item *map;
8360         union trace_eval_map_item **last = &trace_eval_maps;
8361
8362         if (!mod->num_trace_evals)
8363                 return;
8364
8365         mutex_lock(&trace_eval_mutex);
8366
8367         map = trace_eval_maps;
8368
8369         while (map) {
8370                 if (map->head.mod == mod)
8371                         break;
8372                 map = trace_eval_jmp_to_tail(map);
8373                 last = &map->tail.next;
8374                 map = map->tail.next;
8375         }
8376         if (!map)
8377                 goto out;
8378
8379         *last = trace_eval_jmp_to_tail(map)->tail.next;
8380         kfree(map);
8381  out:
8382         mutex_unlock(&trace_eval_mutex);
8383 }
8384 #else
8385 static inline void trace_module_remove_evals(struct module *mod) { }
8386 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8387
8388 static int trace_module_notify(struct notifier_block *self,
8389                                unsigned long val, void *data)
8390 {
8391         struct module *mod = data;
8392
8393         switch (val) {
8394         case MODULE_STATE_COMING:
8395                 trace_module_add_evals(mod);
8396                 break;
8397         case MODULE_STATE_GOING:
8398                 trace_module_remove_evals(mod);
8399                 break;
8400         }
8401
8402         return 0;
8403 }
8404
8405 static struct notifier_block trace_module_nb = {
8406         .notifier_call = trace_module_notify,
8407         .priority = 0,
8408 };
8409 #endif /* CONFIG_MODULES */
8410
8411 static __init int tracer_init_tracefs(void)
8412 {
8413         struct dentry *d_tracer;
8414
8415         trace_access_lock_init();
8416
8417         d_tracer = tracing_init_dentry();
8418         if (IS_ERR(d_tracer))
8419                 return 0;
8420
8421         event_trace_init();
8422
8423         init_tracer_tracefs(&global_trace, d_tracer);
8424         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8425
8426         trace_create_file("tracing_thresh", 0644, d_tracer,
8427                         &global_trace, &tracing_thresh_fops);
8428
8429         trace_create_file("README", 0444, d_tracer,
8430                         NULL, &tracing_readme_fops);
8431
8432         trace_create_file("saved_cmdlines", 0444, d_tracer,
8433                         NULL, &tracing_saved_cmdlines_fops);
8434
8435         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8436                           NULL, &tracing_saved_cmdlines_size_fops);
8437
8438         trace_create_file("saved_tgids", 0444, d_tracer,
8439                         NULL, &tracing_saved_tgids_fops);
8440
8441         trace_eval_init();
8442
8443         trace_create_eval_file(d_tracer);
8444
8445 #ifdef CONFIG_MODULES
8446         register_module_notifier(&trace_module_nb);
8447 #endif
8448
8449 #ifdef CONFIG_DYNAMIC_FTRACE
8450         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8451                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8452 #endif
8453
8454         create_trace_instances(d_tracer);
8455
8456         update_tracer_options(&global_trace);
8457
8458         return 0;
8459 }
8460
8461 static int trace_panic_handler(struct notifier_block *this,
8462                                unsigned long event, void *unused)
8463 {
8464         if (ftrace_dump_on_oops)
8465                 ftrace_dump(ftrace_dump_on_oops);
8466         return NOTIFY_OK;
8467 }
8468
8469 static struct notifier_block trace_panic_notifier = {
8470         .notifier_call  = trace_panic_handler,
8471         .next           = NULL,
8472         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8473 };
8474
8475 static int trace_die_handler(struct notifier_block *self,
8476                              unsigned long val,
8477                              void *data)
8478 {
8479         switch (val) {
8480         case DIE_OOPS:
8481                 if (ftrace_dump_on_oops)
8482                         ftrace_dump(ftrace_dump_on_oops);
8483                 break;
8484         default:
8485                 break;
8486         }
8487         return NOTIFY_OK;
8488 }
8489
8490 static struct notifier_block trace_die_notifier = {
8491         .notifier_call = trace_die_handler,
8492         .priority = 200
8493 };
8494
8495 /*
8496  * printk is set to max of 1024, we really don't need it that big.
8497  * Nothing should be printing 1000 characters anyway.
8498  */
8499 #define TRACE_MAX_PRINT         1000
8500
8501 /*
8502  * Define here KERN_TRACE so that we have one place to modify
8503  * it if we decide to change what log level the ftrace dump
8504  * should be at.
8505  */
8506 #define KERN_TRACE              KERN_EMERG
8507
8508 void
8509 trace_printk_seq(struct trace_seq *s)
8510 {
8511         /* Probably should print a warning here. */
8512         if (s->seq.len >= TRACE_MAX_PRINT)
8513                 s->seq.len = TRACE_MAX_PRINT;
8514
8515         /*
8516          * More paranoid code. Although the buffer size is set to
8517          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8518          * an extra layer of protection.
8519          */
8520         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8521                 s->seq.len = s->seq.size - 1;
8522
8523         /* should be zero ended, but we are paranoid. */
8524         s->buffer[s->seq.len] = 0;
8525
8526         printk(KERN_TRACE "%s", s->buffer);
8527
8528         trace_seq_init(s);
8529 }
8530
8531 void trace_init_global_iter(struct trace_iterator *iter)
8532 {
8533         iter->tr = &global_trace;
8534         iter->trace = iter->tr->current_trace;
8535         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8536         iter->trace_buffer = &global_trace.trace_buffer;
8537
8538         if (iter->trace && iter->trace->open)
8539                 iter->trace->open(iter);
8540
8541         /* Annotate start of buffers if we had overruns */
8542         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8543                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8544
8545         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8546         if (trace_clocks[iter->tr->clock_id].in_ns)
8547                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8548 }
8549
8550 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8551 {
8552         /* use static because iter can be a bit big for the stack */
8553         static struct trace_iterator iter;
8554         static atomic_t dump_running;
8555         struct trace_array *tr = &global_trace;
8556         unsigned int old_userobj;
8557         unsigned long flags;
8558         int cnt = 0, cpu;
8559
8560         /* Only allow one dump user at a time. */
8561         if (atomic_inc_return(&dump_running) != 1) {
8562                 atomic_dec(&dump_running);
8563                 return;
8564         }
8565
8566         /*
8567          * Always turn off tracing when we dump.
8568          * We don't need to show trace output of what happens
8569          * between multiple crashes.
8570          *
8571          * If the user does a sysrq-z, then they can re-enable
8572          * tracing with echo 1 > tracing_on.
8573          */
8574         tracing_off();
8575
8576         local_irq_save(flags);
8577         printk_nmi_direct_enter();
8578
8579         /* Simulate the iterator */
8580         trace_init_global_iter(&iter);
8581
8582         for_each_tracing_cpu(cpu) {
8583                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8584         }
8585
8586         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8587
8588         /* don't look at user memory in panic mode */
8589         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8590
8591         switch (oops_dump_mode) {
8592         case DUMP_ALL:
8593                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8594                 break;
8595         case DUMP_ORIG:
8596                 iter.cpu_file = raw_smp_processor_id();
8597                 break;
8598         case DUMP_NONE:
8599                 goto out_enable;
8600         default:
8601                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8602                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8603         }
8604
8605         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8606
8607         /* Did function tracer already get disabled? */
8608         if (ftrace_is_dead()) {
8609                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8610                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8611         }
8612
8613         /*
8614          * We need to stop all tracing on all CPUS to read the
8615          * the next buffer. This is a bit expensive, but is
8616          * not done often. We fill all what we can read,
8617          * and then release the locks again.
8618          */
8619
8620         while (!trace_empty(&iter)) {
8621
8622                 if (!cnt)
8623                         printk(KERN_TRACE "---------------------------------\n");
8624
8625                 cnt++;
8626
8627                 /* reset all but tr, trace, and overruns */
8628                 memset(&iter.seq, 0,
8629                        sizeof(struct trace_iterator) -
8630                        offsetof(struct trace_iterator, seq));
8631                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8632                 iter.pos = -1;
8633
8634                 if (trace_find_next_entry_inc(&iter) != NULL) {
8635                         int ret;
8636
8637                         ret = print_trace_line(&iter);
8638                         if (ret != TRACE_TYPE_NO_CONSUME)
8639                                 trace_consume(&iter);
8640                 }
8641                 touch_nmi_watchdog();
8642
8643                 trace_printk_seq(&iter.seq);
8644         }
8645
8646         if (!cnt)
8647                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8648         else
8649                 printk(KERN_TRACE "---------------------------------\n");
8650
8651  out_enable:
8652         tr->trace_flags |= old_userobj;
8653
8654         for_each_tracing_cpu(cpu) {
8655                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8656         }
8657         atomic_dec(&dump_running);
8658         printk_nmi_direct_exit();
8659         local_irq_restore(flags);
8660 }
8661 EXPORT_SYMBOL_GPL(ftrace_dump);
8662
8663 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8664 {
8665         char **argv;
8666         int argc, ret;
8667
8668         argc = 0;
8669         ret = 0;
8670         argv = argv_split(GFP_KERNEL, buf, &argc);
8671         if (!argv)
8672                 return -ENOMEM;
8673
8674         if (argc)
8675                 ret = createfn(argc, argv);
8676
8677         argv_free(argv);
8678
8679         return ret;
8680 }
8681
8682 #define WRITE_BUFSIZE  4096
8683
8684 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8685                                 size_t count, loff_t *ppos,
8686                                 int (*createfn)(int, char **))
8687 {
8688         char *kbuf, *buf, *tmp;
8689         int ret = 0;
8690         size_t done = 0;
8691         size_t size;
8692
8693         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8694         if (!kbuf)
8695                 return -ENOMEM;
8696
8697         while (done < count) {
8698                 size = count - done;
8699
8700                 if (size >= WRITE_BUFSIZE)
8701                         size = WRITE_BUFSIZE - 1;
8702
8703                 if (copy_from_user(kbuf, buffer + done, size)) {
8704                         ret = -EFAULT;
8705                         goto out;
8706                 }
8707                 kbuf[size] = '\0';
8708                 buf = kbuf;
8709                 do {
8710                         tmp = strchr(buf, '\n');
8711                         if (tmp) {
8712                                 *tmp = '\0';
8713                                 size = tmp - buf + 1;
8714                         } else {
8715                                 size = strlen(buf);
8716                                 if (done + size < count) {
8717                                         if (buf != kbuf)
8718                                                 break;
8719                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8720                                         pr_warn("Line length is too long: Should be less than %d\n",
8721                                                 WRITE_BUFSIZE - 2);
8722                                         ret = -EINVAL;
8723                                         goto out;
8724                                 }
8725                         }
8726                         done += size;
8727
8728                         /* Remove comments */
8729                         tmp = strchr(buf, '#');
8730
8731                         if (tmp)
8732                                 *tmp = '\0';
8733
8734                         ret = trace_run_command(buf, createfn);
8735                         if (ret)
8736                                 goto out;
8737                         buf += size;
8738
8739                 } while (done < count);
8740         }
8741         ret = done;
8742
8743 out:
8744         kfree(kbuf);
8745
8746         return ret;
8747 }
8748
8749 __init static int tracer_alloc_buffers(void)
8750 {
8751         int ring_buf_size;
8752         int ret = -ENOMEM;
8753
8754         /*
8755          * Make sure we don't accidently add more trace options
8756          * than we have bits for.
8757          */
8758         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8759
8760         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8761                 goto out;
8762
8763         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8764                 goto out_free_buffer_mask;
8765
8766         /* Only allocate trace_printk buffers if a trace_printk exists */
8767         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8768                 /* Must be called before global_trace.buffer is allocated */
8769                 trace_printk_init_buffers();
8770
8771         /* To save memory, keep the ring buffer size to its minimum */
8772         if (ring_buffer_expanded)
8773                 ring_buf_size = trace_buf_size;
8774         else
8775                 ring_buf_size = 1;
8776
8777         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8778         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8779
8780         raw_spin_lock_init(&global_trace.start_lock);
8781
8782         /*
8783          * The prepare callbacks allocates some memory for the ring buffer. We
8784          * don't free the buffer if the if the CPU goes down. If we were to free
8785          * the buffer, then the user would lose any trace that was in the
8786          * buffer. The memory will be removed once the "instance" is removed.
8787          */
8788         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8789                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8790                                       NULL);
8791         if (ret < 0)
8792                 goto out_free_cpumask;
8793         /* Used for event triggers */
8794         ret = -ENOMEM;
8795         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8796         if (!temp_buffer)
8797                 goto out_rm_hp_state;
8798
8799         if (trace_create_savedcmd() < 0)
8800                 goto out_free_temp_buffer;
8801
8802         /* TODO: make the number of buffers hot pluggable with CPUS */
8803         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8804                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8805                 WARN_ON(1);
8806                 goto out_free_savedcmd;
8807         }
8808
8809         if (global_trace.buffer_disabled)
8810                 tracing_off();
8811
8812         if (trace_boot_clock) {
8813                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8814                 if (ret < 0)
8815                         pr_warn("Trace clock %s not defined, going back to default\n",
8816                                 trace_boot_clock);
8817         }
8818
8819         /*
8820          * register_tracer() might reference current_trace, so it
8821          * needs to be set before we register anything. This is
8822          * just a bootstrap of current_trace anyway.
8823          */
8824         global_trace.current_trace = &nop_trace;
8825
8826         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8827
8828         ftrace_init_global_array_ops(&global_trace);
8829
8830         init_trace_flags_index(&global_trace);
8831
8832         register_tracer(&nop_trace);
8833
8834         /* Function tracing may start here (via kernel command line) */
8835         init_function_trace();
8836
8837         /* All seems OK, enable tracing */
8838         tracing_disabled = 0;
8839
8840         atomic_notifier_chain_register(&panic_notifier_list,
8841                                        &trace_panic_notifier);
8842
8843         register_die_notifier(&trace_die_notifier);
8844
8845         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8846
8847         INIT_LIST_HEAD(&global_trace.systems);
8848         INIT_LIST_HEAD(&global_trace.events);
8849         INIT_LIST_HEAD(&global_trace.hist_vars);
8850         list_add(&global_trace.list, &ftrace_trace_arrays);
8851
8852         apply_trace_boot_options();
8853
8854         register_snapshot_cmd();
8855
8856         return 0;
8857
8858 out_free_savedcmd:
8859         free_saved_cmdlines_buffer(savedcmd);
8860 out_free_temp_buffer:
8861         ring_buffer_free(temp_buffer);
8862 out_rm_hp_state:
8863         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8864 out_free_cpumask:
8865         free_cpumask_var(global_trace.tracing_cpumask);
8866 out_free_buffer_mask:
8867         free_cpumask_var(tracing_buffer_mask);
8868 out:
8869         return ret;
8870 }
8871
8872 void __init early_trace_init(void)
8873 {
8874         if (tracepoint_printk) {
8875                 tracepoint_print_iter =
8876                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8877                 if (WARN_ON(!tracepoint_print_iter))
8878                         tracepoint_printk = 0;
8879                 else
8880                         static_key_enable(&tracepoint_printk_key.key);
8881         }
8882         tracer_alloc_buffers();
8883 }
8884
8885 void __init trace_init(void)
8886 {
8887         trace_event_init();
8888 }
8889
8890 __init static int clear_boot_tracer(void)
8891 {
8892         /*
8893          * The default tracer at boot buffer is an init section.
8894          * This function is called in lateinit. If we did not
8895          * find the boot tracer, then clear it out, to prevent
8896          * later registration from accessing the buffer that is
8897          * about to be freed.
8898          */
8899         if (!default_bootup_tracer)
8900                 return 0;
8901
8902         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8903                default_bootup_tracer);
8904         default_bootup_tracer = NULL;
8905
8906         return 0;
8907 }
8908
8909 fs_initcall(tracer_init_tracefs);
8910 late_initcall_sync(clear_boot_tracer);
8911
8912 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8913 __init static int tracing_set_default_clock(void)
8914 {
8915         /* sched_clock_stable() is determined in late_initcall */
8916         if (!trace_boot_clock && !sched_clock_stable()) {
8917                 printk(KERN_WARNING
8918                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8919                        "If you want to keep using the local clock, then add:\n"
8920                        "  \"trace_clock=local\"\n"
8921                        "on the kernel command line\n");
8922                 tracing_set_clock(&global_trace, "global");
8923         }
8924
8925         return 0;
8926 }
8927 late_initcall_sync(tracing_set_default_clock);
8928 #endif