Merge tag 'spdx-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct ring_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct ring_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
390 {
391         /*
392          * Return false, because if filtered_pids does not exist,
393          * all pids are good to trace.
394          */
395         if (!filtered_pids)
396                 return false;
397
398         return !trace_find_filtered_pid(filtered_pids, task->pid);
399 }
400
401 /**
402  * trace_filter_add_remove_task - Add or remove a task from a pid_list
403  * @pid_list: The list to modify
404  * @self: The current task for fork or NULL for exit
405  * @task: The task to add or remove
406  *
407  * If adding a task, if @self is defined, the task is only added if @self
408  * is also included in @pid_list. This happens on fork and tasks should
409  * only be added when the parent is listed. If @self is NULL, then the
410  * @task pid will be removed from the list, which would happen on exit
411  * of a task.
412  */
413 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
414                                   struct task_struct *self,
415                                   struct task_struct *task)
416 {
417         if (!pid_list)
418                 return;
419
420         /* For forks, we only add if the forking task is listed */
421         if (self) {
422                 if (!trace_find_filtered_pid(pid_list, self->pid))
423                         return;
424         }
425
426         /* Sorry, but we don't support pid_max changing after setting */
427         if (task->pid >= pid_list->pid_max)
428                 return;
429
430         /* "self" is set for forks, and NULL for exits */
431         if (self)
432                 set_bit(task->pid, pid_list->pids);
433         else
434                 clear_bit(task->pid, pid_list->pids);
435 }
436
437 /**
438  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
439  * @pid_list: The pid list to show
440  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
441  * @pos: The position of the file
442  *
443  * This is used by the seq_file "next" operation to iterate the pids
444  * listed in a trace_pid_list structure.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
450 {
451         unsigned long pid = (unsigned long)v;
452
453         (*pos)++;
454
455         /* pid already is +1 of the actual prevous bit */
456         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
457
458         /* Return pid + 1 to allow zero to be represented */
459         if (pid < pid_list->pid_max)
460                 return (void *)(pid + 1);
461
462         return NULL;
463 }
464
465 /**
466  * trace_pid_start - Used for seq_file to start reading pid lists
467  * @pid_list: The pid list to show
468  * @pos: The position of the file
469  *
470  * This is used by seq_file "start" operation to start the iteration
471  * of listing pids.
472  *
473  * Returns the pid+1 as we want to display pid of zero, but NULL would
474  * stop the iteration.
475  */
476 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
477 {
478         unsigned long pid;
479         loff_t l = 0;
480
481         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
482         if (pid >= pid_list->pid_max)
483                 return NULL;
484
485         /* Return pid + 1 so that zero can be the exit value */
486         for (pid++; pid && l < *pos;
487              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
488                 ;
489         return (void *)pid;
490 }
491
492 /**
493  * trace_pid_show - show the current pid in seq_file processing
494  * @m: The seq_file structure to write into
495  * @v: A void pointer of the pid (+1) value to display
496  *
497  * Can be directly used by seq_file operations to display the current
498  * pid value.
499  */
500 int trace_pid_show(struct seq_file *m, void *v)
501 {
502         unsigned long pid = (unsigned long)v - 1;
503
504         seq_printf(m, "%lu\n", pid);
505         return 0;
506 }
507
508 /* 128 should be much more than enough */
509 #define PID_BUF_SIZE            127
510
511 int trace_pid_write(struct trace_pid_list *filtered_pids,
512                     struct trace_pid_list **new_pid_list,
513                     const char __user *ubuf, size_t cnt)
514 {
515         struct trace_pid_list *pid_list;
516         struct trace_parser parser;
517         unsigned long val;
518         int nr_pids = 0;
519         ssize_t read = 0;
520         ssize_t ret = 0;
521         loff_t pos;
522         pid_t pid;
523
524         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
525                 return -ENOMEM;
526
527         /*
528          * Always recreate a new array. The write is an all or nothing
529          * operation. Always create a new array when adding new pids by
530          * the user. If the operation fails, then the current list is
531          * not modified.
532          */
533         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
534         if (!pid_list) {
535                 trace_parser_put(&parser);
536                 return -ENOMEM;
537         }
538
539         pid_list->pid_max = READ_ONCE(pid_max);
540
541         /* Only truncating will shrink pid_max */
542         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
543                 pid_list->pid_max = filtered_pids->pid_max;
544
545         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
546         if (!pid_list->pids) {
547                 trace_parser_put(&parser);
548                 kfree(pid_list);
549                 return -ENOMEM;
550         }
551
552         if (filtered_pids) {
553                 /* copy the current bits to the new max */
554                 for_each_set_bit(pid, filtered_pids->pids,
555                                  filtered_pids->pid_max) {
556                         set_bit(pid, pid_list->pids);
557                         nr_pids++;
558                 }
559         }
560
561         while (cnt > 0) {
562
563                 pos = 0;
564
565                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
566                 if (ret < 0 || !trace_parser_loaded(&parser))
567                         break;
568
569                 read += ret;
570                 ubuf += ret;
571                 cnt -= ret;
572
573                 ret = -EINVAL;
574                 if (kstrtoul(parser.buffer, 0, &val))
575                         break;
576                 if (val >= pid_list->pid_max)
577                         break;
578
579                 pid = (pid_t)val;
580
581                 set_bit(pid, pid_list->pids);
582                 nr_pids++;
583
584                 trace_parser_clear(&parser);
585                 ret = 0;
586         }
587         trace_parser_put(&parser);
588
589         if (ret < 0) {
590                 trace_free_pid_list(pid_list);
591                 return ret;
592         }
593
594         if (!nr_pids) {
595                 /* Cleared the list of pids */
596                 trace_free_pid_list(pid_list);
597                 read = ret;
598                 pid_list = NULL;
599         }
600
601         *new_pid_list = pid_list;
602
603         return read;
604 }
605
606 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
607 {
608         u64 ts;
609
610         /* Early boot up does not have a buffer yet */
611         if (!buf->buffer)
612                 return trace_clock_local();
613
614         ts = ring_buffer_time_stamp(buf->buffer, cpu);
615         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
616
617         return ts;
618 }
619
620 u64 ftrace_now(int cpu)
621 {
622         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
623 }
624
625 /**
626  * tracing_is_enabled - Show if global_trace has been disabled
627  *
628  * Shows if the global trace has been enabled or not. It uses the
629  * mirror flag "buffer_disabled" to be used in fast paths such as for
630  * the irqsoff tracer. But it may be inaccurate due to races. If you
631  * need to know the accurate state, use tracing_is_on() which is a little
632  * slower, but accurate.
633  */
634 int tracing_is_enabled(void)
635 {
636         /*
637          * For quick access (irqsoff uses this in fast path), just
638          * return the mirror variable of the state of the ring buffer.
639          * It's a little racy, but we don't really care.
640          */
641         smp_rmb();
642         return !global_trace.buffer_disabled;
643 }
644
645 /*
646  * trace_buf_size is the size in bytes that is allocated
647  * for a buffer. Note, the number of bytes is always rounded
648  * to page size.
649  *
650  * This number is purposely set to a low number of 16384.
651  * If the dump on oops happens, it will be much appreciated
652  * to not have to wait for all that output. Anyway this can be
653  * boot time and run time configurable.
654  */
655 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
656
657 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
658
659 /* trace_types holds a link list of available tracers. */
660 static struct tracer            *trace_types __read_mostly;
661
662 /*
663  * trace_types_lock is used to protect the trace_types list.
664  */
665 DEFINE_MUTEX(trace_types_lock);
666
667 /*
668  * serialize the access of the ring buffer
669  *
670  * ring buffer serializes readers, but it is low level protection.
671  * The validity of the events (which returns by ring_buffer_peek() ..etc)
672  * are not protected by ring buffer.
673  *
674  * The content of events may become garbage if we allow other process consumes
675  * these events concurrently:
676  *   A) the page of the consumed events may become a normal page
677  *      (not reader page) in ring buffer, and this page will be rewrited
678  *      by events producer.
679  *   B) The page of the consumed events may become a page for splice_read,
680  *      and this page will be returned to system.
681  *
682  * These primitives allow multi process access to different cpu ring buffer
683  * concurrently.
684  *
685  * These primitives don't distinguish read-only and read-consume access.
686  * Multi read-only access are also serialized.
687  */
688
689 #ifdef CONFIG_SMP
690 static DECLARE_RWSEM(all_cpu_access_lock);
691 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         if (cpu == RING_BUFFER_ALL_CPUS) {
696                 /* gain it for accessing the whole ring buffer. */
697                 down_write(&all_cpu_access_lock);
698         } else {
699                 /* gain it for accessing a cpu ring buffer. */
700
701                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
702                 down_read(&all_cpu_access_lock);
703
704                 /* Secondly block other access to this @cpu ring buffer. */
705                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
706         }
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         if (cpu == RING_BUFFER_ALL_CPUS) {
712                 up_write(&all_cpu_access_lock);
713         } else {
714                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
715                 up_read(&all_cpu_access_lock);
716         }
717 }
718
719 static inline void trace_access_lock_init(void)
720 {
721         int cpu;
722
723         for_each_possible_cpu(cpu)
724                 mutex_init(&per_cpu(cpu_access_lock, cpu));
725 }
726
727 #else
728
729 static DEFINE_MUTEX(access_lock);
730
731 static inline void trace_access_lock(int cpu)
732 {
733         (void)cpu;
734         mutex_lock(&access_lock);
735 }
736
737 static inline void trace_access_unlock(int cpu)
738 {
739         (void)cpu;
740         mutex_unlock(&access_lock);
741 }
742
743 static inline void trace_access_lock_init(void)
744 {
745 }
746
747 #endif
748
749 #ifdef CONFIG_STACKTRACE
750 static void __ftrace_trace_stack(struct ring_buffer *buffer,
751                                  unsigned long flags,
752                                  int skip, int pc, struct pt_regs *regs);
753 static inline void ftrace_trace_stack(struct trace_array *tr,
754                                       struct ring_buffer *buffer,
755                                       unsigned long flags,
756                                       int skip, int pc, struct pt_regs *regs);
757
758 #else
759 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
760                                         unsigned long flags,
761                                         int skip, int pc, struct pt_regs *regs)
762 {
763 }
764 static inline void ftrace_trace_stack(struct trace_array *tr,
765                                       struct ring_buffer *buffer,
766                                       unsigned long flags,
767                                       int skip, int pc, struct pt_regs *regs)
768 {
769 }
770
771 #endif
772
773 static __always_inline void
774 trace_event_setup(struct ring_buffer_event *event,
775                   int type, unsigned long flags, int pc)
776 {
777         struct trace_entry *ent = ring_buffer_event_data(event);
778
779         tracing_generic_entry_update(ent, type, flags, pc);
780 }
781
782 static __always_inline struct ring_buffer_event *
783 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
784                           int type,
785                           unsigned long len,
786                           unsigned long flags, int pc)
787 {
788         struct ring_buffer_event *event;
789
790         event = ring_buffer_lock_reserve(buffer, len);
791         if (event != NULL)
792                 trace_event_setup(event, type, flags, pc);
793
794         return event;
795 }
796
797 void tracer_tracing_on(struct trace_array *tr)
798 {
799         if (tr->trace_buffer.buffer)
800                 ring_buffer_record_on(tr->trace_buffer.buffer);
801         /*
802          * This flag is looked at when buffers haven't been allocated
803          * yet, or by some tracers (like irqsoff), that just want to
804          * know if the ring buffer has been disabled, but it can handle
805          * races of where it gets disabled but we still do a record.
806          * As the check is in the fast path of the tracers, it is more
807          * important to be fast than accurate.
808          */
809         tr->buffer_disabled = 0;
810         /* Make the flag seen by readers */
811         smp_wmb();
812 }
813
814 /**
815  * tracing_on - enable tracing buffers
816  *
817  * This function enables tracing buffers that may have been
818  * disabled with tracing_off.
819  */
820 void tracing_on(void)
821 {
822         tracer_tracing_on(&global_trace);
823 }
824 EXPORT_SYMBOL_GPL(tracing_on);
825
826
827 static __always_inline void
828 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
829 {
830         __this_cpu_write(trace_taskinfo_save, true);
831
832         /* If this is the temp buffer, we need to commit fully */
833         if (this_cpu_read(trace_buffered_event) == event) {
834                 /* Length is in event->array[0] */
835                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
836                 /* Release the temp buffer */
837                 this_cpu_dec(trace_buffered_event_cnt);
838         } else
839                 ring_buffer_unlock_commit(buffer, event);
840 }
841
842 /**
843  * __trace_puts - write a constant string into the trace buffer.
844  * @ip:    The address of the caller
845  * @str:   The constant string to write
846  * @size:  The size of the string.
847  */
848 int __trace_puts(unsigned long ip, const char *str, int size)
849 {
850         struct ring_buffer_event *event;
851         struct ring_buffer *buffer;
852         struct print_entry *entry;
853         unsigned long irq_flags;
854         int alloc;
855         int pc;
856
857         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
858                 return 0;
859
860         pc = preempt_count();
861
862         if (unlikely(tracing_selftest_running || tracing_disabled))
863                 return 0;
864
865         alloc = sizeof(*entry) + size + 2; /* possible \n added */
866
867         local_save_flags(irq_flags);
868         buffer = global_trace.trace_buffer.buffer;
869         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
870                                             irq_flags, pc);
871         if (!event)
872                 return 0;
873
874         entry = ring_buffer_event_data(event);
875         entry->ip = ip;
876
877         memcpy(&entry->buf, str, size);
878
879         /* Add a newline if necessary */
880         if (entry->buf[size - 1] != '\n') {
881                 entry->buf[size] = '\n';
882                 entry->buf[size + 1] = '\0';
883         } else
884                 entry->buf[size] = '\0';
885
886         __buffer_unlock_commit(buffer, event);
887         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
888
889         return size;
890 }
891 EXPORT_SYMBOL_GPL(__trace_puts);
892
893 /**
894  * __trace_bputs - write the pointer to a constant string into trace buffer
895  * @ip:    The address of the caller
896  * @str:   The constant string to write to the buffer to
897  */
898 int __trace_bputs(unsigned long ip, const char *str)
899 {
900         struct ring_buffer_event *event;
901         struct ring_buffer *buffer;
902         struct bputs_entry *entry;
903         unsigned long irq_flags;
904         int size = sizeof(struct bputs_entry);
905         int pc;
906
907         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
908                 return 0;
909
910         pc = preempt_count();
911
912         if (unlikely(tracing_selftest_running || tracing_disabled))
913                 return 0;
914
915         local_save_flags(irq_flags);
916         buffer = global_trace.trace_buffer.buffer;
917         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
918                                             irq_flags, pc);
919         if (!event)
920                 return 0;
921
922         entry = ring_buffer_event_data(event);
923         entry->ip                       = ip;
924         entry->str                      = str;
925
926         __buffer_unlock_commit(buffer, event);
927         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
928
929         return 1;
930 }
931 EXPORT_SYMBOL_GPL(__trace_bputs);
932
933 #ifdef CONFIG_TRACER_SNAPSHOT
934 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
935 {
936         struct tracer *tracer = tr->current_trace;
937         unsigned long flags;
938
939         if (in_nmi()) {
940                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
941                 internal_trace_puts("*** snapshot is being ignored        ***\n");
942                 return;
943         }
944
945         if (!tr->allocated_snapshot) {
946                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
947                 internal_trace_puts("*** stopping trace here!   ***\n");
948                 tracing_off();
949                 return;
950         }
951
952         /* Note, snapshot can not be used when the tracer uses it */
953         if (tracer->use_max_tr) {
954                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
955                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
956                 return;
957         }
958
959         local_irq_save(flags);
960         update_max_tr(tr, current, smp_processor_id(), cond_data);
961         local_irq_restore(flags);
962 }
963
964 void tracing_snapshot_instance(struct trace_array *tr)
965 {
966         tracing_snapshot_instance_cond(tr, NULL);
967 }
968
969 /**
970  * tracing_snapshot - take a snapshot of the current buffer.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  *
976  * Note, make sure to allocate the snapshot with either
977  * a tracing_snapshot_alloc(), or by doing it manually
978  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
979  *
980  * If the snapshot buffer is not allocated, it will stop tracing.
981  * Basically making a permanent snapshot.
982  */
983 void tracing_snapshot(void)
984 {
985         struct trace_array *tr = &global_trace;
986
987         tracing_snapshot_instance(tr);
988 }
989 EXPORT_SYMBOL_GPL(tracing_snapshot);
990
991 /**
992  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
993  * @tr:         The tracing instance to snapshot
994  * @cond_data:  The data to be tested conditionally, and possibly saved
995  *
996  * This is the same as tracing_snapshot() except that the snapshot is
997  * conditional - the snapshot will only happen if the
998  * cond_snapshot.update() implementation receiving the cond_data
999  * returns true, which means that the trace array's cond_snapshot
1000  * update() operation used the cond_data to determine whether the
1001  * snapshot should be taken, and if it was, presumably saved it along
1002  * with the snapshot.
1003  */
1004 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1005 {
1006         tracing_snapshot_instance_cond(tr, cond_data);
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1009
1010 /**
1011  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1012  * @tr:         The tracing instance
1013  *
1014  * When the user enables a conditional snapshot using
1015  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1016  * with the snapshot.  This accessor is used to retrieve it.
1017  *
1018  * Should not be called from cond_snapshot.update(), since it takes
1019  * the tr->max_lock lock, which the code calling
1020  * cond_snapshot.update() has already done.
1021  *
1022  * Returns the cond_data associated with the trace array's snapshot.
1023  */
1024 void *tracing_cond_snapshot_data(struct trace_array *tr)
1025 {
1026         void *cond_data = NULL;
1027
1028         arch_spin_lock(&tr->max_lock);
1029
1030         if (tr->cond_snapshot)
1031                 cond_data = tr->cond_snapshot->cond_data;
1032
1033         arch_spin_unlock(&tr->max_lock);
1034
1035         return cond_data;
1036 }
1037 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1038
1039 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1040                                         struct trace_buffer *size_buf, int cpu_id);
1041 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1042
1043 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1044 {
1045         int ret;
1046
1047         if (!tr->allocated_snapshot) {
1048
1049                 /* allocate spare buffer */
1050                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1051                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1052                 if (ret < 0)
1053                         return ret;
1054
1055                 tr->allocated_snapshot = true;
1056         }
1057
1058         return 0;
1059 }
1060
1061 static void free_snapshot(struct trace_array *tr)
1062 {
1063         /*
1064          * We don't free the ring buffer. instead, resize it because
1065          * The max_tr ring buffer has some state (e.g. ring->clock) and
1066          * we want preserve it.
1067          */
1068         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1069         set_buffer_entries(&tr->max_buffer, 1);
1070         tracing_reset_online_cpus(&tr->max_buffer);
1071         tr->allocated_snapshot = false;
1072 }
1073
1074 /**
1075  * tracing_alloc_snapshot - allocate snapshot buffer.
1076  *
1077  * This only allocates the snapshot buffer if it isn't already
1078  * allocated - it doesn't also take a snapshot.
1079  *
1080  * This is meant to be used in cases where the snapshot buffer needs
1081  * to be set up for events that can't sleep but need to be able to
1082  * trigger a snapshot.
1083  */
1084 int tracing_alloc_snapshot(void)
1085 {
1086         struct trace_array *tr = &global_trace;
1087         int ret;
1088
1089         ret = tracing_alloc_snapshot_instance(tr);
1090         WARN_ON(ret < 0);
1091
1092         return ret;
1093 }
1094 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1095
1096 /**
1097  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1098  *
1099  * This is similar to tracing_snapshot(), but it will allocate the
1100  * snapshot buffer if it isn't already allocated. Use this only
1101  * where it is safe to sleep, as the allocation may sleep.
1102  *
1103  * This causes a swap between the snapshot buffer and the current live
1104  * tracing buffer. You can use this to take snapshots of the live
1105  * trace when some condition is triggered, but continue to trace.
1106  */
1107 void tracing_snapshot_alloc(void)
1108 {
1109         int ret;
1110
1111         ret = tracing_alloc_snapshot();
1112         if (ret < 0)
1113                 return;
1114
1115         tracing_snapshot();
1116 }
1117 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1118
1119 /**
1120  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1121  * @tr:         The tracing instance
1122  * @cond_data:  User data to associate with the snapshot
1123  * @update:     Implementation of the cond_snapshot update function
1124  *
1125  * Check whether the conditional snapshot for the given instance has
1126  * already been enabled, or if the current tracer is already using a
1127  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1128  * save the cond_data and update function inside.
1129  *
1130  * Returns 0 if successful, error otherwise.
1131  */
1132 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1133                                  cond_update_fn_t update)
1134 {
1135         struct cond_snapshot *cond_snapshot;
1136         int ret = 0;
1137
1138         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1139         if (!cond_snapshot)
1140                 return -ENOMEM;
1141
1142         cond_snapshot->cond_data = cond_data;
1143         cond_snapshot->update = update;
1144
1145         mutex_lock(&trace_types_lock);
1146
1147         ret = tracing_alloc_snapshot_instance(tr);
1148         if (ret)
1149                 goto fail_unlock;
1150
1151         if (tr->current_trace->use_max_tr) {
1152                 ret = -EBUSY;
1153                 goto fail_unlock;
1154         }
1155
1156         /*
1157          * The cond_snapshot can only change to NULL without the
1158          * trace_types_lock. We don't care if we race with it going
1159          * to NULL, but we want to make sure that it's not set to
1160          * something other than NULL when we get here, which we can
1161          * do safely with only holding the trace_types_lock and not
1162          * having to take the max_lock.
1163          */
1164         if (tr->cond_snapshot) {
1165                 ret = -EBUSY;
1166                 goto fail_unlock;
1167         }
1168
1169         arch_spin_lock(&tr->max_lock);
1170         tr->cond_snapshot = cond_snapshot;
1171         arch_spin_unlock(&tr->max_lock);
1172
1173         mutex_unlock(&trace_types_lock);
1174
1175         return ret;
1176
1177  fail_unlock:
1178         mutex_unlock(&trace_types_lock);
1179         kfree(cond_snapshot);
1180         return ret;
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1183
1184 /**
1185  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1186  * @tr:         The tracing instance
1187  *
1188  * Check whether the conditional snapshot for the given instance is
1189  * enabled; if so, free the cond_snapshot associated with it,
1190  * otherwise return -EINVAL.
1191  *
1192  * Returns 0 if successful, error otherwise.
1193  */
1194 int tracing_snapshot_cond_disable(struct trace_array *tr)
1195 {
1196         int ret = 0;
1197
1198         arch_spin_lock(&tr->max_lock);
1199
1200         if (!tr->cond_snapshot)
1201                 ret = -EINVAL;
1202         else {
1203                 kfree(tr->cond_snapshot);
1204                 tr->cond_snapshot = NULL;
1205         }
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return ret;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1212 #else
1213 void tracing_snapshot(void)
1214 {
1215         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1216 }
1217 EXPORT_SYMBOL_GPL(tracing_snapshot);
1218 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1219 {
1220         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1221 }
1222 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1223 int tracing_alloc_snapshot(void)
1224 {
1225         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1226         return -ENODEV;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1229 void tracing_snapshot_alloc(void)
1230 {
1231         /* Give warning */
1232         tracing_snapshot();
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1235 void *tracing_cond_snapshot_data(struct trace_array *tr)
1236 {
1237         return NULL;
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1240 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1241 {
1242         return -ENODEV;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1245 int tracing_snapshot_cond_disable(struct trace_array *tr)
1246 {
1247         return false;
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1250 #endif /* CONFIG_TRACER_SNAPSHOT */
1251
1252 void tracer_tracing_off(struct trace_array *tr)
1253 {
1254         if (tr->trace_buffer.buffer)
1255                 ring_buffer_record_off(tr->trace_buffer.buffer);
1256         /*
1257          * This flag is looked at when buffers haven't been allocated
1258          * yet, or by some tracers (like irqsoff), that just want to
1259          * know if the ring buffer has been disabled, but it can handle
1260          * races of where it gets disabled but we still do a record.
1261          * As the check is in the fast path of the tracers, it is more
1262          * important to be fast than accurate.
1263          */
1264         tr->buffer_disabled = 1;
1265         /* Make the flag seen by readers */
1266         smp_wmb();
1267 }
1268
1269 /**
1270  * tracing_off - turn off tracing buffers
1271  *
1272  * This function stops the tracing buffers from recording data.
1273  * It does not disable any overhead the tracers themselves may
1274  * be causing. This function simply causes all recording to
1275  * the ring buffers to fail.
1276  */
1277 void tracing_off(void)
1278 {
1279         tracer_tracing_off(&global_trace);
1280 }
1281 EXPORT_SYMBOL_GPL(tracing_off);
1282
1283 void disable_trace_on_warning(void)
1284 {
1285         if (__disable_trace_on_warning)
1286                 tracing_off();
1287 }
1288
1289 /**
1290  * tracer_tracing_is_on - show real state of ring buffer enabled
1291  * @tr : the trace array to know if ring buffer is enabled
1292  *
1293  * Shows real state of the ring buffer if it is enabled or not.
1294  */
1295 bool tracer_tracing_is_on(struct trace_array *tr)
1296 {
1297         if (tr->trace_buffer.buffer)
1298                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1299         return !tr->buffer_disabled;
1300 }
1301
1302 /**
1303  * tracing_is_on - show state of ring buffers enabled
1304  */
1305 int tracing_is_on(void)
1306 {
1307         return tracer_tracing_is_on(&global_trace);
1308 }
1309 EXPORT_SYMBOL_GPL(tracing_is_on);
1310
1311 static int __init set_buf_size(char *str)
1312 {
1313         unsigned long buf_size;
1314
1315         if (!str)
1316                 return 0;
1317         buf_size = memparse(str, &str);
1318         /* nr_entries can not be zero */
1319         if (buf_size == 0)
1320                 return 0;
1321         trace_buf_size = buf_size;
1322         return 1;
1323 }
1324 __setup("trace_buf_size=", set_buf_size);
1325
1326 static int __init set_tracing_thresh(char *str)
1327 {
1328         unsigned long threshold;
1329         int ret;
1330
1331         if (!str)
1332                 return 0;
1333         ret = kstrtoul(str, 0, &threshold);
1334         if (ret < 0)
1335                 return 0;
1336         tracing_thresh = threshold * 1000;
1337         return 1;
1338 }
1339 __setup("tracing_thresh=", set_tracing_thresh);
1340
1341 unsigned long nsecs_to_usecs(unsigned long nsecs)
1342 {
1343         return nsecs / 1000;
1344 }
1345
1346 /*
1347  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1348  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1349  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1350  * of strings in the order that the evals (enum) were defined.
1351  */
1352 #undef C
1353 #define C(a, b) b
1354
1355 /* These must match the bit postions in trace_iterator_flags */
1356 static const char *trace_options[] = {
1357         TRACE_FLAGS
1358         NULL
1359 };
1360
1361 static struct {
1362         u64 (*func)(void);
1363         const char *name;
1364         int in_ns;              /* is this clock in nanoseconds? */
1365 } trace_clocks[] = {
1366         { trace_clock_local,            "local",        1 },
1367         { trace_clock_global,           "global",       1 },
1368         { trace_clock_counter,          "counter",      0 },
1369         { trace_clock_jiffies,          "uptime",       0 },
1370         { trace_clock,                  "perf",         1 },
1371         { ktime_get_mono_fast_ns,       "mono",         1 },
1372         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1373         { ktime_get_boot_fast_ns,       "boot",         1 },
1374         ARCH_TRACE_CLOCKS
1375 };
1376
1377 bool trace_clock_in_ns(struct trace_array *tr)
1378 {
1379         if (trace_clocks[tr->clock_id].in_ns)
1380                 return true;
1381
1382         return false;
1383 }
1384
1385 /*
1386  * trace_parser_get_init - gets the buffer for trace parser
1387  */
1388 int trace_parser_get_init(struct trace_parser *parser, int size)
1389 {
1390         memset(parser, 0, sizeof(*parser));
1391
1392         parser->buffer = kmalloc(size, GFP_KERNEL);
1393         if (!parser->buffer)
1394                 return 1;
1395
1396         parser->size = size;
1397         return 0;
1398 }
1399
1400 /*
1401  * trace_parser_put - frees the buffer for trace parser
1402  */
1403 void trace_parser_put(struct trace_parser *parser)
1404 {
1405         kfree(parser->buffer);
1406         parser->buffer = NULL;
1407 }
1408
1409 /*
1410  * trace_get_user - reads the user input string separated by  space
1411  * (matched by isspace(ch))
1412  *
1413  * For each string found the 'struct trace_parser' is updated,
1414  * and the function returns.
1415  *
1416  * Returns number of bytes read.
1417  *
1418  * See kernel/trace/trace.h for 'struct trace_parser' details.
1419  */
1420 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1421         size_t cnt, loff_t *ppos)
1422 {
1423         char ch;
1424         size_t read = 0;
1425         ssize_t ret;
1426
1427         if (!*ppos)
1428                 trace_parser_clear(parser);
1429
1430         ret = get_user(ch, ubuf++);
1431         if (ret)
1432                 goto out;
1433
1434         read++;
1435         cnt--;
1436
1437         /*
1438          * The parser is not finished with the last write,
1439          * continue reading the user input without skipping spaces.
1440          */
1441         if (!parser->cont) {
1442                 /* skip white space */
1443                 while (cnt && isspace(ch)) {
1444                         ret = get_user(ch, ubuf++);
1445                         if (ret)
1446                                 goto out;
1447                         read++;
1448                         cnt--;
1449                 }
1450
1451                 parser->idx = 0;
1452
1453                 /* only spaces were written */
1454                 if (isspace(ch) || !ch) {
1455                         *ppos += read;
1456                         ret = read;
1457                         goto out;
1458                 }
1459         }
1460
1461         /* read the non-space input */
1462         while (cnt && !isspace(ch) && ch) {
1463                 if (parser->idx < parser->size - 1)
1464                         parser->buffer[parser->idx++] = ch;
1465                 else {
1466                         ret = -EINVAL;
1467                         goto out;
1468                 }
1469                 ret = get_user(ch, ubuf++);
1470                 if (ret)
1471                         goto out;
1472                 read++;
1473                 cnt--;
1474         }
1475
1476         /* We either got finished input or we have to wait for another call. */
1477         if (isspace(ch) || !ch) {
1478                 parser->buffer[parser->idx] = 0;
1479                 parser->cont = false;
1480         } else if (parser->idx < parser->size - 1) {
1481                 parser->cont = true;
1482                 parser->buffer[parser->idx++] = ch;
1483                 /* Make sure the parsed string always terminates with '\0'. */
1484                 parser->buffer[parser->idx] = 0;
1485         } else {
1486                 ret = -EINVAL;
1487                 goto out;
1488         }
1489
1490         *ppos += read;
1491         ret = read;
1492
1493 out:
1494         return ret;
1495 }
1496
1497 /* TODO add a seq_buf_to_buffer() */
1498 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1499 {
1500         int len;
1501
1502         if (trace_seq_used(s) <= s->seq.readpos)
1503                 return -EBUSY;
1504
1505         len = trace_seq_used(s) - s->seq.readpos;
1506         if (cnt > len)
1507                 cnt = len;
1508         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1509
1510         s->seq.readpos += cnt;
1511         return cnt;
1512 }
1513
1514 unsigned long __read_mostly     tracing_thresh;
1515 static const struct file_operations tracing_max_lat_fops;
1516
1517 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1518         defined(CONFIG_FSNOTIFY)
1519
1520 static struct workqueue_struct *fsnotify_wq;
1521
1522 static void latency_fsnotify_workfn(struct work_struct *work)
1523 {
1524         struct trace_array *tr = container_of(work, struct trace_array,
1525                                               fsnotify_work);
1526         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1527                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1528 }
1529
1530 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1531 {
1532         struct trace_array *tr = container_of(iwork, struct trace_array,
1533                                               fsnotify_irqwork);
1534         queue_work(fsnotify_wq, &tr->fsnotify_work);
1535 }
1536
1537 static void trace_create_maxlat_file(struct trace_array *tr,
1538                                      struct dentry *d_tracer)
1539 {
1540         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1543                                               d_tracer, &tr->max_latency,
1544                                               &tracing_max_lat_fops);
1545 }
1546
1547 __init static int latency_fsnotify_init(void)
1548 {
1549         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1550                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1551         if (!fsnotify_wq) {
1552                 pr_err("Unable to allocate tr_max_lat_wq\n");
1553                 return -ENOMEM;
1554         }
1555         return 0;
1556 }
1557
1558 late_initcall_sync(latency_fsnotify_init);
1559
1560 void latency_fsnotify(struct trace_array *tr)
1561 {
1562         if (!fsnotify_wq)
1563                 return;
1564         /*
1565          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1566          * possible that we are called from __schedule() or do_idle(), which
1567          * could cause a deadlock.
1568          */
1569         irq_work_queue(&tr->fsnotify_irqwork);
1570 }
1571
1572 /*
1573  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1574  *  defined(CONFIG_FSNOTIFY)
1575  */
1576 #else
1577
1578 #define trace_create_maxlat_file(tr, d_tracer)                          \
1579         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1580                           &tr->max_latency, &tracing_max_lat_fops)
1581
1582 #endif
1583
1584 #ifdef CONFIG_TRACER_MAX_TRACE
1585 /*
1586  * Copy the new maximum trace into the separate maximum-trace
1587  * structure. (this way the maximum trace is permanently saved,
1588  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1589  */
1590 static void
1591 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1592 {
1593         struct trace_buffer *trace_buf = &tr->trace_buffer;
1594         struct trace_buffer *max_buf = &tr->max_buffer;
1595         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1596         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1597
1598         max_buf->cpu = cpu;
1599         max_buf->time_start = data->preempt_timestamp;
1600
1601         max_data->saved_latency = tr->max_latency;
1602         max_data->critical_start = data->critical_start;
1603         max_data->critical_end = data->critical_end;
1604
1605         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1606         max_data->pid = tsk->pid;
1607         /*
1608          * If tsk == current, then use current_uid(), as that does not use
1609          * RCU. The irq tracer can be called out of RCU scope.
1610          */
1611         if (tsk == current)
1612                 max_data->uid = current_uid();
1613         else
1614                 max_data->uid = task_uid(tsk);
1615
1616         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1617         max_data->policy = tsk->policy;
1618         max_data->rt_priority = tsk->rt_priority;
1619
1620         /* record this tasks comm */
1621         tracing_record_cmdline(tsk);
1622         latency_fsnotify(tr);
1623 }
1624
1625 /**
1626  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1627  * @tr: tracer
1628  * @tsk: the task with the latency
1629  * @cpu: The cpu that initiated the trace.
1630  * @cond_data: User data associated with a conditional snapshot
1631  *
1632  * Flip the buffers between the @tr and the max_tr and record information
1633  * about which task was the cause of this latency.
1634  */
1635 void
1636 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1637               void *cond_data)
1638 {
1639         if (tr->stop_count)
1640                 return;
1641
1642         WARN_ON_ONCE(!irqs_disabled());
1643
1644         if (!tr->allocated_snapshot) {
1645                 /* Only the nop tracer should hit this when disabling */
1646                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1647                 return;
1648         }
1649
1650         arch_spin_lock(&tr->max_lock);
1651
1652         /* Inherit the recordable setting from trace_buffer */
1653         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1654                 ring_buffer_record_on(tr->max_buffer.buffer);
1655         else
1656                 ring_buffer_record_off(tr->max_buffer.buffer);
1657
1658 #ifdef CONFIG_TRACER_SNAPSHOT
1659         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1660                 goto out_unlock;
1661 #endif
1662         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1663
1664         __update_max_tr(tr, tsk, cpu);
1665
1666  out_unlock:
1667         arch_spin_unlock(&tr->max_lock);
1668 }
1669
1670 /**
1671  * update_max_tr_single - only copy one trace over, and reset the rest
1672  * @tr: tracer
1673  * @tsk: task with the latency
1674  * @cpu: the cpu of the buffer to copy.
1675  *
1676  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1677  */
1678 void
1679 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1680 {
1681         int ret;
1682
1683         if (tr->stop_count)
1684                 return;
1685
1686         WARN_ON_ONCE(!irqs_disabled());
1687         if (!tr->allocated_snapshot) {
1688                 /* Only the nop tracer should hit this when disabling */
1689                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1690                 return;
1691         }
1692
1693         arch_spin_lock(&tr->max_lock);
1694
1695         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1696
1697         if (ret == -EBUSY) {
1698                 /*
1699                  * We failed to swap the buffer due to a commit taking
1700                  * place on this CPU. We fail to record, but we reset
1701                  * the max trace buffer (no one writes directly to it)
1702                  * and flag that it failed.
1703                  */
1704                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1705                         "Failed to swap buffers due to commit in progress\n");
1706         }
1707
1708         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1709
1710         __update_max_tr(tr, tsk, cpu);
1711         arch_spin_unlock(&tr->max_lock);
1712 }
1713 #endif /* CONFIG_TRACER_MAX_TRACE */
1714
1715 static int wait_on_pipe(struct trace_iterator *iter, int full)
1716 {
1717         /* Iterators are static, they should be filled or empty */
1718         if (trace_buffer_iter(iter, iter->cpu_file))
1719                 return 0;
1720
1721         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1722                                 full);
1723 }
1724
1725 #ifdef CONFIG_FTRACE_STARTUP_TEST
1726 static bool selftests_can_run;
1727
1728 struct trace_selftests {
1729         struct list_head                list;
1730         struct tracer                   *type;
1731 };
1732
1733 static LIST_HEAD(postponed_selftests);
1734
1735 static int save_selftest(struct tracer *type)
1736 {
1737         struct trace_selftests *selftest;
1738
1739         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1740         if (!selftest)
1741                 return -ENOMEM;
1742
1743         selftest->type = type;
1744         list_add(&selftest->list, &postponed_selftests);
1745         return 0;
1746 }
1747
1748 static int run_tracer_selftest(struct tracer *type)
1749 {
1750         struct trace_array *tr = &global_trace;
1751         struct tracer *saved_tracer = tr->current_trace;
1752         int ret;
1753
1754         if (!type->selftest || tracing_selftest_disabled)
1755                 return 0;
1756
1757         /*
1758          * If a tracer registers early in boot up (before scheduling is
1759          * initialized and such), then do not run its selftests yet.
1760          * Instead, run it a little later in the boot process.
1761          */
1762         if (!selftests_can_run)
1763                 return save_selftest(type);
1764
1765         /*
1766          * Run a selftest on this tracer.
1767          * Here we reset the trace buffer, and set the current
1768          * tracer to be this tracer. The tracer can then run some
1769          * internal tracing to verify that everything is in order.
1770          * If we fail, we do not register this tracer.
1771          */
1772         tracing_reset_online_cpus(&tr->trace_buffer);
1773
1774         tr->current_trace = type;
1775
1776 #ifdef CONFIG_TRACER_MAX_TRACE
1777         if (type->use_max_tr) {
1778                 /* If we expanded the buffers, make sure the max is expanded too */
1779                 if (ring_buffer_expanded)
1780                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1781                                            RING_BUFFER_ALL_CPUS);
1782                 tr->allocated_snapshot = true;
1783         }
1784 #endif
1785
1786         /* the test is responsible for initializing and enabling */
1787         pr_info("Testing tracer %s: ", type->name);
1788         ret = type->selftest(type, tr);
1789         /* the test is responsible for resetting too */
1790         tr->current_trace = saved_tracer;
1791         if (ret) {
1792                 printk(KERN_CONT "FAILED!\n");
1793                 /* Add the warning after printing 'FAILED' */
1794                 WARN_ON(1);
1795                 return -1;
1796         }
1797         /* Only reset on passing, to avoid touching corrupted buffers */
1798         tracing_reset_online_cpus(&tr->trace_buffer);
1799
1800 #ifdef CONFIG_TRACER_MAX_TRACE
1801         if (type->use_max_tr) {
1802                 tr->allocated_snapshot = false;
1803
1804                 /* Shrink the max buffer again */
1805                 if (ring_buffer_expanded)
1806                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1807                                            RING_BUFFER_ALL_CPUS);
1808         }
1809 #endif
1810
1811         printk(KERN_CONT "PASSED\n");
1812         return 0;
1813 }
1814
1815 static __init int init_trace_selftests(void)
1816 {
1817         struct trace_selftests *p, *n;
1818         struct tracer *t, **last;
1819         int ret;
1820
1821         selftests_can_run = true;
1822
1823         mutex_lock(&trace_types_lock);
1824
1825         if (list_empty(&postponed_selftests))
1826                 goto out;
1827
1828         pr_info("Running postponed tracer tests:\n");
1829
1830         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1831                 /* This loop can take minutes when sanitizers are enabled, so
1832                  * lets make sure we allow RCU processing.
1833                  */
1834                 cond_resched();
1835                 ret = run_tracer_selftest(p->type);
1836                 /* If the test fails, then warn and remove from available_tracers */
1837                 if (ret < 0) {
1838                         WARN(1, "tracer: %s failed selftest, disabling\n",
1839                              p->type->name);
1840                         last = &trace_types;
1841                         for (t = trace_types; t; t = t->next) {
1842                                 if (t == p->type) {
1843                                         *last = t->next;
1844                                         break;
1845                                 }
1846                                 last = &t->next;
1847                         }
1848                 }
1849                 list_del(&p->list);
1850                 kfree(p);
1851         }
1852
1853  out:
1854         mutex_unlock(&trace_types_lock);
1855
1856         return 0;
1857 }
1858 core_initcall(init_trace_selftests);
1859 #else
1860 static inline int run_tracer_selftest(struct tracer *type)
1861 {
1862         return 0;
1863 }
1864 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1865
1866 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1867
1868 static void __init apply_trace_boot_options(void);
1869
1870 /**
1871  * register_tracer - register a tracer with the ftrace system.
1872  * @type: the plugin for the tracer
1873  *
1874  * Register a new plugin tracer.
1875  */
1876 int __init register_tracer(struct tracer *type)
1877 {
1878         struct tracer *t;
1879         int ret = 0;
1880
1881         if (!type->name) {
1882                 pr_info("Tracer must have a name\n");
1883                 return -1;
1884         }
1885
1886         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1887                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1888                 return -1;
1889         }
1890
1891         mutex_lock(&trace_types_lock);
1892
1893         tracing_selftest_running = true;
1894
1895         for (t = trace_types; t; t = t->next) {
1896                 if (strcmp(type->name, t->name) == 0) {
1897                         /* already found */
1898                         pr_info("Tracer %s already registered\n",
1899                                 type->name);
1900                         ret = -1;
1901                         goto out;
1902                 }
1903         }
1904
1905         if (!type->set_flag)
1906                 type->set_flag = &dummy_set_flag;
1907         if (!type->flags) {
1908                 /*allocate a dummy tracer_flags*/
1909                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1910                 if (!type->flags) {
1911                         ret = -ENOMEM;
1912                         goto out;
1913                 }
1914                 type->flags->val = 0;
1915                 type->flags->opts = dummy_tracer_opt;
1916         } else
1917                 if (!type->flags->opts)
1918                         type->flags->opts = dummy_tracer_opt;
1919
1920         /* store the tracer for __set_tracer_option */
1921         type->flags->trace = type;
1922
1923         ret = run_tracer_selftest(type);
1924         if (ret < 0)
1925                 goto out;
1926
1927         type->next = trace_types;
1928         trace_types = type;
1929         add_tracer_options(&global_trace, type);
1930
1931  out:
1932         tracing_selftest_running = false;
1933         mutex_unlock(&trace_types_lock);
1934
1935         if (ret || !default_bootup_tracer)
1936                 goto out_unlock;
1937
1938         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1939                 goto out_unlock;
1940
1941         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1942         /* Do we want this tracer to start on bootup? */
1943         tracing_set_tracer(&global_trace, type->name);
1944         default_bootup_tracer = NULL;
1945
1946         apply_trace_boot_options();
1947
1948         /* disable other selftests, since this will break it. */
1949         tracing_selftest_disabled = true;
1950 #ifdef CONFIG_FTRACE_STARTUP_TEST
1951         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1952                type->name);
1953 #endif
1954
1955  out_unlock:
1956         return ret;
1957 }
1958
1959 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1960 {
1961         struct ring_buffer *buffer = buf->buffer;
1962
1963         if (!buffer)
1964                 return;
1965
1966         ring_buffer_record_disable(buffer);
1967
1968         /* Make sure all commits have finished */
1969         synchronize_rcu();
1970         ring_buffer_reset_cpu(buffer, cpu);
1971
1972         ring_buffer_record_enable(buffer);
1973 }
1974
1975 void tracing_reset_online_cpus(struct trace_buffer *buf)
1976 {
1977         struct ring_buffer *buffer = buf->buffer;
1978         int cpu;
1979
1980         if (!buffer)
1981                 return;
1982
1983         ring_buffer_record_disable(buffer);
1984
1985         /* Make sure all commits have finished */
1986         synchronize_rcu();
1987
1988         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1989
1990         for_each_online_cpu(cpu)
1991                 ring_buffer_reset_cpu(buffer, cpu);
1992
1993         ring_buffer_record_enable(buffer);
1994 }
1995
1996 /* Must have trace_types_lock held */
1997 void tracing_reset_all_online_cpus(void)
1998 {
1999         struct trace_array *tr;
2000
2001         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2002                 if (!tr->clear_trace)
2003                         continue;
2004                 tr->clear_trace = false;
2005                 tracing_reset_online_cpus(&tr->trace_buffer);
2006 #ifdef CONFIG_TRACER_MAX_TRACE
2007                 tracing_reset_online_cpus(&tr->max_buffer);
2008 #endif
2009         }
2010 }
2011
2012 static int *tgid_map;
2013
2014 #define SAVED_CMDLINES_DEFAULT 128
2015 #define NO_CMDLINE_MAP UINT_MAX
2016 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2017 struct saved_cmdlines_buffer {
2018         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2019         unsigned *map_cmdline_to_pid;
2020         unsigned cmdline_num;
2021         int cmdline_idx;
2022         char *saved_cmdlines;
2023 };
2024 static struct saved_cmdlines_buffer *savedcmd;
2025
2026 /* temporary disable recording */
2027 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2028
2029 static inline char *get_saved_cmdlines(int idx)
2030 {
2031         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2032 }
2033
2034 static inline void set_cmdline(int idx, const char *cmdline)
2035 {
2036         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2037 }
2038
2039 static int allocate_cmdlines_buffer(unsigned int val,
2040                                     struct saved_cmdlines_buffer *s)
2041 {
2042         s->map_cmdline_to_pid = kmalloc_array(val,
2043                                               sizeof(*s->map_cmdline_to_pid),
2044                                               GFP_KERNEL);
2045         if (!s->map_cmdline_to_pid)
2046                 return -ENOMEM;
2047
2048         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2049         if (!s->saved_cmdlines) {
2050                 kfree(s->map_cmdline_to_pid);
2051                 return -ENOMEM;
2052         }
2053
2054         s->cmdline_idx = 0;
2055         s->cmdline_num = val;
2056         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2057                sizeof(s->map_pid_to_cmdline));
2058         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2059                val * sizeof(*s->map_cmdline_to_pid));
2060
2061         return 0;
2062 }
2063
2064 static int trace_create_savedcmd(void)
2065 {
2066         int ret;
2067
2068         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2069         if (!savedcmd)
2070                 return -ENOMEM;
2071
2072         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2073         if (ret < 0) {
2074                 kfree(savedcmd);
2075                 savedcmd = NULL;
2076                 return -ENOMEM;
2077         }
2078
2079         return 0;
2080 }
2081
2082 int is_tracing_stopped(void)
2083 {
2084         return global_trace.stop_count;
2085 }
2086
2087 /**
2088  * tracing_start - quick start of the tracer
2089  *
2090  * If tracing is enabled but was stopped by tracing_stop,
2091  * this will start the tracer back up.
2092  */
2093 void tracing_start(void)
2094 {
2095         struct ring_buffer *buffer;
2096         unsigned long flags;
2097
2098         if (tracing_disabled)
2099                 return;
2100
2101         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2102         if (--global_trace.stop_count) {
2103                 if (global_trace.stop_count < 0) {
2104                         /* Someone screwed up their debugging */
2105                         WARN_ON_ONCE(1);
2106                         global_trace.stop_count = 0;
2107                 }
2108                 goto out;
2109         }
2110
2111         /* Prevent the buffers from switching */
2112         arch_spin_lock(&global_trace.max_lock);
2113
2114         buffer = global_trace.trace_buffer.buffer;
2115         if (buffer)
2116                 ring_buffer_record_enable(buffer);
2117
2118 #ifdef CONFIG_TRACER_MAX_TRACE
2119         buffer = global_trace.max_buffer.buffer;
2120         if (buffer)
2121                 ring_buffer_record_enable(buffer);
2122 #endif
2123
2124         arch_spin_unlock(&global_trace.max_lock);
2125
2126  out:
2127         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2128 }
2129
2130 static void tracing_start_tr(struct trace_array *tr)
2131 {
2132         struct ring_buffer *buffer;
2133         unsigned long flags;
2134
2135         if (tracing_disabled)
2136                 return;
2137
2138         /* If global, we need to also start the max tracer */
2139         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2140                 return tracing_start();
2141
2142         raw_spin_lock_irqsave(&tr->start_lock, flags);
2143
2144         if (--tr->stop_count) {
2145                 if (tr->stop_count < 0) {
2146                         /* Someone screwed up their debugging */
2147                         WARN_ON_ONCE(1);
2148                         tr->stop_count = 0;
2149                 }
2150                 goto out;
2151         }
2152
2153         buffer = tr->trace_buffer.buffer;
2154         if (buffer)
2155                 ring_buffer_record_enable(buffer);
2156
2157  out:
2158         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2159 }
2160
2161 /**
2162  * tracing_stop - quick stop of the tracer
2163  *
2164  * Light weight way to stop tracing. Use in conjunction with
2165  * tracing_start.
2166  */
2167 void tracing_stop(void)
2168 {
2169         struct ring_buffer *buffer;
2170         unsigned long flags;
2171
2172         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2173         if (global_trace.stop_count++)
2174                 goto out;
2175
2176         /* Prevent the buffers from switching */
2177         arch_spin_lock(&global_trace.max_lock);
2178
2179         buffer = global_trace.trace_buffer.buffer;
2180         if (buffer)
2181                 ring_buffer_record_disable(buffer);
2182
2183 #ifdef CONFIG_TRACER_MAX_TRACE
2184         buffer = global_trace.max_buffer.buffer;
2185         if (buffer)
2186                 ring_buffer_record_disable(buffer);
2187 #endif
2188
2189         arch_spin_unlock(&global_trace.max_lock);
2190
2191  out:
2192         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2193 }
2194
2195 static void tracing_stop_tr(struct trace_array *tr)
2196 {
2197         struct ring_buffer *buffer;
2198         unsigned long flags;
2199
2200         /* If global, we need to also stop the max tracer */
2201         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2202                 return tracing_stop();
2203
2204         raw_spin_lock_irqsave(&tr->start_lock, flags);
2205         if (tr->stop_count++)
2206                 goto out;
2207
2208         buffer = tr->trace_buffer.buffer;
2209         if (buffer)
2210                 ring_buffer_record_disable(buffer);
2211
2212  out:
2213         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2214 }
2215
2216 static int trace_save_cmdline(struct task_struct *tsk)
2217 {
2218         unsigned pid, idx;
2219
2220         /* treat recording of idle task as a success */
2221         if (!tsk->pid)
2222                 return 1;
2223
2224         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2225                 return 0;
2226
2227         /*
2228          * It's not the end of the world if we don't get
2229          * the lock, but we also don't want to spin
2230          * nor do we want to disable interrupts,
2231          * so if we miss here, then better luck next time.
2232          */
2233         if (!arch_spin_trylock(&trace_cmdline_lock))
2234                 return 0;
2235
2236         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2237         if (idx == NO_CMDLINE_MAP) {
2238                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2239
2240                 /*
2241                  * Check whether the cmdline buffer at idx has a pid
2242                  * mapped. We are going to overwrite that entry so we
2243                  * need to clear the map_pid_to_cmdline. Otherwise we
2244                  * would read the new comm for the old pid.
2245                  */
2246                 pid = savedcmd->map_cmdline_to_pid[idx];
2247                 if (pid != NO_CMDLINE_MAP)
2248                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2249
2250                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2251                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2252
2253                 savedcmd->cmdline_idx = idx;
2254         }
2255
2256         set_cmdline(idx, tsk->comm);
2257
2258         arch_spin_unlock(&trace_cmdline_lock);
2259
2260         return 1;
2261 }
2262
2263 static void __trace_find_cmdline(int pid, char comm[])
2264 {
2265         unsigned map;
2266
2267         if (!pid) {
2268                 strcpy(comm, "<idle>");
2269                 return;
2270         }
2271
2272         if (WARN_ON_ONCE(pid < 0)) {
2273                 strcpy(comm, "<XXX>");
2274                 return;
2275         }
2276
2277         if (pid > PID_MAX_DEFAULT) {
2278                 strcpy(comm, "<...>");
2279                 return;
2280         }
2281
2282         map = savedcmd->map_pid_to_cmdline[pid];
2283         if (map != NO_CMDLINE_MAP)
2284                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2285         else
2286                 strcpy(comm, "<...>");
2287 }
2288
2289 void trace_find_cmdline(int pid, char comm[])
2290 {
2291         preempt_disable();
2292         arch_spin_lock(&trace_cmdline_lock);
2293
2294         __trace_find_cmdline(pid, comm);
2295
2296         arch_spin_unlock(&trace_cmdline_lock);
2297         preempt_enable();
2298 }
2299
2300 int trace_find_tgid(int pid)
2301 {
2302         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2303                 return 0;
2304
2305         return tgid_map[pid];
2306 }
2307
2308 static int trace_save_tgid(struct task_struct *tsk)
2309 {
2310         /* treat recording of idle task as a success */
2311         if (!tsk->pid)
2312                 return 1;
2313
2314         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2315                 return 0;
2316
2317         tgid_map[tsk->pid] = tsk->tgid;
2318         return 1;
2319 }
2320
2321 static bool tracing_record_taskinfo_skip(int flags)
2322 {
2323         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2324                 return true;
2325         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2326                 return true;
2327         if (!__this_cpu_read(trace_taskinfo_save))
2328                 return true;
2329         return false;
2330 }
2331
2332 /**
2333  * tracing_record_taskinfo - record the task info of a task
2334  *
2335  * @task:  task to record
2336  * @flags: TRACE_RECORD_CMDLINE for recording comm
2337  *         TRACE_RECORD_TGID for recording tgid
2338  */
2339 void tracing_record_taskinfo(struct task_struct *task, int flags)
2340 {
2341         bool done;
2342
2343         if (tracing_record_taskinfo_skip(flags))
2344                 return;
2345
2346         /*
2347          * Record as much task information as possible. If some fail, continue
2348          * to try to record the others.
2349          */
2350         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2351         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2352
2353         /* If recording any information failed, retry again soon. */
2354         if (!done)
2355                 return;
2356
2357         __this_cpu_write(trace_taskinfo_save, false);
2358 }
2359
2360 /**
2361  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2362  *
2363  * @prev: previous task during sched_switch
2364  * @next: next task during sched_switch
2365  * @flags: TRACE_RECORD_CMDLINE for recording comm
2366  *         TRACE_RECORD_TGID for recording tgid
2367  */
2368 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2369                                           struct task_struct *next, int flags)
2370 {
2371         bool done;
2372
2373         if (tracing_record_taskinfo_skip(flags))
2374                 return;
2375
2376         /*
2377          * Record as much task information as possible. If some fail, continue
2378          * to try to record the others.
2379          */
2380         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2381         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2382         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2383         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2384
2385         /* If recording any information failed, retry again soon. */
2386         if (!done)
2387                 return;
2388
2389         __this_cpu_write(trace_taskinfo_save, false);
2390 }
2391
2392 /* Helpers to record a specific task information */
2393 void tracing_record_cmdline(struct task_struct *task)
2394 {
2395         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2396 }
2397
2398 void tracing_record_tgid(struct task_struct *task)
2399 {
2400         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2401 }
2402
2403 /*
2404  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2405  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2406  * simplifies those functions and keeps them in sync.
2407  */
2408 enum print_line_t trace_handle_return(struct trace_seq *s)
2409 {
2410         return trace_seq_has_overflowed(s) ?
2411                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2412 }
2413 EXPORT_SYMBOL_GPL(trace_handle_return);
2414
2415 void
2416 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2417                              unsigned long flags, int pc)
2418 {
2419         struct task_struct *tsk = current;
2420
2421         entry->preempt_count            = pc & 0xff;
2422         entry->pid                      = (tsk) ? tsk->pid : 0;
2423         entry->type                     = type;
2424         entry->flags =
2425 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2426                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2427 #else
2428                 TRACE_FLAG_IRQS_NOSUPPORT |
2429 #endif
2430                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2431                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2432                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2433                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2434                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2435 }
2436 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2437
2438 struct ring_buffer_event *
2439 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2440                           int type,
2441                           unsigned long len,
2442                           unsigned long flags, int pc)
2443 {
2444         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2445 }
2446
2447 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2448 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2449 static int trace_buffered_event_ref;
2450
2451 /**
2452  * trace_buffered_event_enable - enable buffering events
2453  *
2454  * When events are being filtered, it is quicker to use a temporary
2455  * buffer to write the event data into if there's a likely chance
2456  * that it will not be committed. The discard of the ring buffer
2457  * is not as fast as committing, and is much slower than copying
2458  * a commit.
2459  *
2460  * When an event is to be filtered, allocate per cpu buffers to
2461  * write the event data into, and if the event is filtered and discarded
2462  * it is simply dropped, otherwise, the entire data is to be committed
2463  * in one shot.
2464  */
2465 void trace_buffered_event_enable(void)
2466 {
2467         struct ring_buffer_event *event;
2468         struct page *page;
2469         int cpu;
2470
2471         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2472
2473         if (trace_buffered_event_ref++)
2474                 return;
2475
2476         for_each_tracing_cpu(cpu) {
2477                 page = alloc_pages_node(cpu_to_node(cpu),
2478                                         GFP_KERNEL | __GFP_NORETRY, 0);
2479                 if (!page)
2480                         goto failed;
2481
2482                 event = page_address(page);
2483                 memset(event, 0, sizeof(*event));
2484
2485                 per_cpu(trace_buffered_event, cpu) = event;
2486
2487                 preempt_disable();
2488                 if (cpu == smp_processor_id() &&
2489                     this_cpu_read(trace_buffered_event) !=
2490                     per_cpu(trace_buffered_event, cpu))
2491                         WARN_ON_ONCE(1);
2492                 preempt_enable();
2493         }
2494
2495         return;
2496  failed:
2497         trace_buffered_event_disable();
2498 }
2499
2500 static void enable_trace_buffered_event(void *data)
2501 {
2502         /* Probably not needed, but do it anyway */
2503         smp_rmb();
2504         this_cpu_dec(trace_buffered_event_cnt);
2505 }
2506
2507 static void disable_trace_buffered_event(void *data)
2508 {
2509         this_cpu_inc(trace_buffered_event_cnt);
2510 }
2511
2512 /**
2513  * trace_buffered_event_disable - disable buffering events
2514  *
2515  * When a filter is removed, it is faster to not use the buffered
2516  * events, and to commit directly into the ring buffer. Free up
2517  * the temp buffers when there are no more users. This requires
2518  * special synchronization with current events.
2519  */
2520 void trace_buffered_event_disable(void)
2521 {
2522         int cpu;
2523
2524         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2525
2526         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2527                 return;
2528
2529         if (--trace_buffered_event_ref)
2530                 return;
2531
2532         preempt_disable();
2533         /* For each CPU, set the buffer as used. */
2534         smp_call_function_many(tracing_buffer_mask,
2535                                disable_trace_buffered_event, NULL, 1);
2536         preempt_enable();
2537
2538         /* Wait for all current users to finish */
2539         synchronize_rcu();
2540
2541         for_each_tracing_cpu(cpu) {
2542                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2543                 per_cpu(trace_buffered_event, cpu) = NULL;
2544         }
2545         /*
2546          * Make sure trace_buffered_event is NULL before clearing
2547          * trace_buffered_event_cnt.
2548          */
2549         smp_wmb();
2550
2551         preempt_disable();
2552         /* Do the work on each cpu */
2553         smp_call_function_many(tracing_buffer_mask,
2554                                enable_trace_buffered_event, NULL, 1);
2555         preempt_enable();
2556 }
2557
2558 static struct ring_buffer *temp_buffer;
2559
2560 struct ring_buffer_event *
2561 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2562                           struct trace_event_file *trace_file,
2563                           int type, unsigned long len,
2564                           unsigned long flags, int pc)
2565 {
2566         struct ring_buffer_event *entry;
2567         int val;
2568
2569         *current_rb = trace_file->tr->trace_buffer.buffer;
2570
2571         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2572              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2573             (entry = this_cpu_read(trace_buffered_event))) {
2574                 /* Try to use the per cpu buffer first */
2575                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2576                 if (val == 1) {
2577                         trace_event_setup(entry, type, flags, pc);
2578                         entry->array[0] = len;
2579                         return entry;
2580                 }
2581                 this_cpu_dec(trace_buffered_event_cnt);
2582         }
2583
2584         entry = __trace_buffer_lock_reserve(*current_rb,
2585                                             type, len, flags, pc);
2586         /*
2587          * If tracing is off, but we have triggers enabled
2588          * we still need to look at the event data. Use the temp_buffer
2589          * to store the trace event for the tigger to use. It's recusive
2590          * safe and will not be recorded anywhere.
2591          */
2592         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2593                 *current_rb = temp_buffer;
2594                 entry = __trace_buffer_lock_reserve(*current_rb,
2595                                                     type, len, flags, pc);
2596         }
2597         return entry;
2598 }
2599 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2600
2601 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2602 static DEFINE_MUTEX(tracepoint_printk_mutex);
2603
2604 static void output_printk(struct trace_event_buffer *fbuffer)
2605 {
2606         struct trace_event_call *event_call;
2607         struct trace_event *event;
2608         unsigned long flags;
2609         struct trace_iterator *iter = tracepoint_print_iter;
2610
2611         /* We should never get here if iter is NULL */
2612         if (WARN_ON_ONCE(!iter))
2613                 return;
2614
2615         event_call = fbuffer->trace_file->event_call;
2616         if (!event_call || !event_call->event.funcs ||
2617             !event_call->event.funcs->trace)
2618                 return;
2619
2620         event = &fbuffer->trace_file->event_call->event;
2621
2622         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2623         trace_seq_init(&iter->seq);
2624         iter->ent = fbuffer->entry;
2625         event_call->event.funcs->trace(iter, 0, event);
2626         trace_seq_putc(&iter->seq, 0);
2627         printk("%s", iter->seq.buffer);
2628
2629         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2630 }
2631
2632 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2633                              void __user *buffer, size_t *lenp,
2634                              loff_t *ppos)
2635 {
2636         int save_tracepoint_printk;
2637         int ret;
2638
2639         mutex_lock(&tracepoint_printk_mutex);
2640         save_tracepoint_printk = tracepoint_printk;
2641
2642         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2643
2644         /*
2645          * This will force exiting early, as tracepoint_printk
2646          * is always zero when tracepoint_printk_iter is not allocated
2647          */
2648         if (!tracepoint_print_iter)
2649                 tracepoint_printk = 0;
2650
2651         if (save_tracepoint_printk == tracepoint_printk)
2652                 goto out;
2653
2654         if (tracepoint_printk)
2655                 static_key_enable(&tracepoint_printk_key.key);
2656         else
2657                 static_key_disable(&tracepoint_printk_key.key);
2658
2659  out:
2660         mutex_unlock(&tracepoint_printk_mutex);
2661
2662         return ret;
2663 }
2664
2665 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2666 {
2667         if (static_key_false(&tracepoint_printk_key.key))
2668                 output_printk(fbuffer);
2669
2670         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2671                                     fbuffer->event, fbuffer->entry,
2672                                     fbuffer->flags, fbuffer->pc);
2673 }
2674 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2675
2676 /*
2677  * Skip 3:
2678  *
2679  *   trace_buffer_unlock_commit_regs()
2680  *   trace_event_buffer_commit()
2681  *   trace_event_raw_event_xxx()
2682  */
2683 # define STACK_SKIP 3
2684
2685 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2686                                      struct ring_buffer *buffer,
2687                                      struct ring_buffer_event *event,
2688                                      unsigned long flags, int pc,
2689                                      struct pt_regs *regs)
2690 {
2691         __buffer_unlock_commit(buffer, event);
2692
2693         /*
2694          * If regs is not set, then skip the necessary functions.
2695          * Note, we can still get here via blktrace, wakeup tracer
2696          * and mmiotrace, but that's ok if they lose a function or
2697          * two. They are not that meaningful.
2698          */
2699         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2700         ftrace_trace_userstack(buffer, flags, pc);
2701 }
2702
2703 /*
2704  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2705  */
2706 void
2707 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2708                                    struct ring_buffer_event *event)
2709 {
2710         __buffer_unlock_commit(buffer, event);
2711 }
2712
2713 static void
2714 trace_process_export(struct trace_export *export,
2715                struct ring_buffer_event *event)
2716 {
2717         struct trace_entry *entry;
2718         unsigned int size = 0;
2719
2720         entry = ring_buffer_event_data(event);
2721         size = ring_buffer_event_length(event);
2722         export->write(export, entry, size);
2723 }
2724
2725 static DEFINE_MUTEX(ftrace_export_lock);
2726
2727 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2728
2729 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2730
2731 static inline void ftrace_exports_enable(void)
2732 {
2733         static_branch_enable(&ftrace_exports_enabled);
2734 }
2735
2736 static inline void ftrace_exports_disable(void)
2737 {
2738         static_branch_disable(&ftrace_exports_enabled);
2739 }
2740
2741 static void ftrace_exports(struct ring_buffer_event *event)
2742 {
2743         struct trace_export *export;
2744
2745         preempt_disable_notrace();
2746
2747         export = rcu_dereference_raw_check(ftrace_exports_list);
2748         while (export) {
2749                 trace_process_export(export, event);
2750                 export = rcu_dereference_raw_check(export->next);
2751         }
2752
2753         preempt_enable_notrace();
2754 }
2755
2756 static inline void
2757 add_trace_export(struct trace_export **list, struct trace_export *export)
2758 {
2759         rcu_assign_pointer(export->next, *list);
2760         /*
2761          * We are entering export into the list but another
2762          * CPU might be walking that list. We need to make sure
2763          * the export->next pointer is valid before another CPU sees
2764          * the export pointer included into the list.
2765          */
2766         rcu_assign_pointer(*list, export);
2767 }
2768
2769 static inline int
2770 rm_trace_export(struct trace_export **list, struct trace_export *export)
2771 {
2772         struct trace_export **p;
2773
2774         for (p = list; *p != NULL; p = &(*p)->next)
2775                 if (*p == export)
2776                         break;
2777
2778         if (*p != export)
2779                 return -1;
2780
2781         rcu_assign_pointer(*p, (*p)->next);
2782
2783         return 0;
2784 }
2785
2786 static inline void
2787 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2788 {
2789         if (*list == NULL)
2790                 ftrace_exports_enable();
2791
2792         add_trace_export(list, export);
2793 }
2794
2795 static inline int
2796 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2797 {
2798         int ret;
2799
2800         ret = rm_trace_export(list, export);
2801         if (*list == NULL)
2802                 ftrace_exports_disable();
2803
2804         return ret;
2805 }
2806
2807 int register_ftrace_export(struct trace_export *export)
2808 {
2809         if (WARN_ON_ONCE(!export->write))
2810                 return -1;
2811
2812         mutex_lock(&ftrace_export_lock);
2813
2814         add_ftrace_export(&ftrace_exports_list, export);
2815
2816         mutex_unlock(&ftrace_export_lock);
2817
2818         return 0;
2819 }
2820 EXPORT_SYMBOL_GPL(register_ftrace_export);
2821
2822 int unregister_ftrace_export(struct trace_export *export)
2823 {
2824         int ret;
2825
2826         mutex_lock(&ftrace_export_lock);
2827
2828         ret = rm_ftrace_export(&ftrace_exports_list, export);
2829
2830         mutex_unlock(&ftrace_export_lock);
2831
2832         return ret;
2833 }
2834 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2835
2836 void
2837 trace_function(struct trace_array *tr,
2838                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2839                int pc)
2840 {
2841         struct trace_event_call *call = &event_function;
2842         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2843         struct ring_buffer_event *event;
2844         struct ftrace_entry *entry;
2845
2846         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2847                                             flags, pc);
2848         if (!event)
2849                 return;
2850         entry   = ring_buffer_event_data(event);
2851         entry->ip                       = ip;
2852         entry->parent_ip                = parent_ip;
2853
2854         if (!call_filter_check_discard(call, entry, buffer, event)) {
2855                 if (static_branch_unlikely(&ftrace_exports_enabled))
2856                         ftrace_exports(event);
2857                 __buffer_unlock_commit(buffer, event);
2858         }
2859 }
2860
2861 #ifdef CONFIG_STACKTRACE
2862
2863 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2864 #define FTRACE_KSTACK_NESTING   4
2865
2866 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2867
2868 struct ftrace_stack {
2869         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2870 };
2871
2872
2873 struct ftrace_stacks {
2874         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2875 };
2876
2877 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2878 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2879
2880 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2881                                  unsigned long flags,
2882                                  int skip, int pc, struct pt_regs *regs)
2883 {
2884         struct trace_event_call *call = &event_kernel_stack;
2885         struct ring_buffer_event *event;
2886         unsigned int size, nr_entries;
2887         struct ftrace_stack *fstack;
2888         struct stack_entry *entry;
2889         int stackidx;
2890
2891         /*
2892          * Add one, for this function and the call to save_stack_trace()
2893          * If regs is set, then these functions will not be in the way.
2894          */
2895 #ifndef CONFIG_UNWINDER_ORC
2896         if (!regs)
2897                 skip++;
2898 #endif
2899
2900         /*
2901          * Since events can happen in NMIs there's no safe way to
2902          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2903          * or NMI comes in, it will just have to use the default
2904          * FTRACE_STACK_SIZE.
2905          */
2906         preempt_disable_notrace();
2907
2908         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2909
2910         /* This should never happen. If it does, yell once and skip */
2911         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2912                 goto out;
2913
2914         /*
2915          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2916          * interrupt will either see the value pre increment or post
2917          * increment. If the interrupt happens pre increment it will have
2918          * restored the counter when it returns.  We just need a barrier to
2919          * keep gcc from moving things around.
2920          */
2921         barrier();
2922
2923         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2924         size = ARRAY_SIZE(fstack->calls);
2925
2926         if (regs) {
2927                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2928                                                    size, skip);
2929         } else {
2930                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2931         }
2932
2933         size = nr_entries * sizeof(unsigned long);
2934         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2935                                             sizeof(*entry) + size, flags, pc);
2936         if (!event)
2937                 goto out;
2938         entry = ring_buffer_event_data(event);
2939
2940         memcpy(&entry->caller, fstack->calls, size);
2941         entry->size = nr_entries;
2942
2943         if (!call_filter_check_discard(call, entry, buffer, event))
2944                 __buffer_unlock_commit(buffer, event);
2945
2946  out:
2947         /* Again, don't let gcc optimize things here */
2948         barrier();
2949         __this_cpu_dec(ftrace_stack_reserve);
2950         preempt_enable_notrace();
2951
2952 }
2953
2954 static inline void ftrace_trace_stack(struct trace_array *tr,
2955                                       struct ring_buffer *buffer,
2956                                       unsigned long flags,
2957                                       int skip, int pc, struct pt_regs *regs)
2958 {
2959         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2960                 return;
2961
2962         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2963 }
2964
2965 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2966                    int pc)
2967 {
2968         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2969
2970         if (rcu_is_watching()) {
2971                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2972                 return;
2973         }
2974
2975         /*
2976          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2977          * but if the above rcu_is_watching() failed, then the NMI
2978          * triggered someplace critical, and rcu_irq_enter() should
2979          * not be called from NMI.
2980          */
2981         if (unlikely(in_nmi()))
2982                 return;
2983
2984         rcu_irq_enter_irqson();
2985         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2986         rcu_irq_exit_irqson();
2987 }
2988
2989 /**
2990  * trace_dump_stack - record a stack back trace in the trace buffer
2991  * @skip: Number of functions to skip (helper handlers)
2992  */
2993 void trace_dump_stack(int skip)
2994 {
2995         unsigned long flags;
2996
2997         if (tracing_disabled || tracing_selftest_running)
2998                 return;
2999
3000         local_save_flags(flags);
3001
3002 #ifndef CONFIG_UNWINDER_ORC
3003         /* Skip 1 to skip this function. */
3004         skip++;
3005 #endif
3006         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
3007                              flags, skip, preempt_count(), NULL);
3008 }
3009 EXPORT_SYMBOL_GPL(trace_dump_stack);
3010
3011 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3012 static DEFINE_PER_CPU(int, user_stack_count);
3013
3014 static void
3015 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
3016 {
3017         struct trace_event_call *call = &event_user_stack;
3018         struct ring_buffer_event *event;
3019         struct userstack_entry *entry;
3020
3021         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3022                 return;
3023
3024         /*
3025          * NMIs can not handle page faults, even with fix ups.
3026          * The save user stack can (and often does) fault.
3027          */
3028         if (unlikely(in_nmi()))
3029                 return;
3030
3031         /*
3032          * prevent recursion, since the user stack tracing may
3033          * trigger other kernel events.
3034          */
3035         preempt_disable();
3036         if (__this_cpu_read(user_stack_count))
3037                 goto out;
3038
3039         __this_cpu_inc(user_stack_count);
3040
3041         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3042                                             sizeof(*entry), flags, pc);
3043         if (!event)
3044                 goto out_drop_count;
3045         entry   = ring_buffer_event_data(event);
3046
3047         entry->tgid             = current->tgid;
3048         memset(&entry->caller, 0, sizeof(entry->caller));
3049
3050         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3051         if (!call_filter_check_discard(call, entry, buffer, event))
3052                 __buffer_unlock_commit(buffer, event);
3053
3054  out_drop_count:
3055         __this_cpu_dec(user_stack_count);
3056  out:
3057         preempt_enable();
3058 }
3059 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3060 static void ftrace_trace_userstack(struct ring_buffer *buffer,
3061                                    unsigned long flags, int pc)
3062 {
3063 }
3064 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3065
3066 #endif /* CONFIG_STACKTRACE */
3067
3068 /* created for use with alloc_percpu */
3069 struct trace_buffer_struct {
3070         int nesting;
3071         char buffer[4][TRACE_BUF_SIZE];
3072 };
3073
3074 static struct trace_buffer_struct *trace_percpu_buffer;
3075
3076 /*
3077  * Thise allows for lockless recording.  If we're nested too deeply, then
3078  * this returns NULL.
3079  */
3080 static char *get_trace_buf(void)
3081 {
3082         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3083
3084         if (!buffer || buffer->nesting >= 4)
3085                 return NULL;
3086
3087         buffer->nesting++;
3088
3089         /* Interrupts must see nesting incremented before we use the buffer */
3090         barrier();
3091         return &buffer->buffer[buffer->nesting][0];
3092 }
3093
3094 static void put_trace_buf(void)
3095 {
3096         /* Don't let the decrement of nesting leak before this */
3097         barrier();
3098         this_cpu_dec(trace_percpu_buffer->nesting);
3099 }
3100
3101 static int alloc_percpu_trace_buffer(void)
3102 {
3103         struct trace_buffer_struct *buffers;
3104
3105         buffers = alloc_percpu(struct trace_buffer_struct);
3106         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3107                 return -ENOMEM;
3108
3109         trace_percpu_buffer = buffers;
3110         return 0;
3111 }
3112
3113 static int buffers_allocated;
3114
3115 void trace_printk_init_buffers(void)
3116 {
3117         if (buffers_allocated)
3118                 return;
3119
3120         if (alloc_percpu_trace_buffer())
3121                 return;
3122
3123         /* trace_printk() is for debug use only. Don't use it in production. */
3124
3125         pr_warn("\n");
3126         pr_warn("**********************************************************\n");
3127         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3128         pr_warn("**                                                      **\n");
3129         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3130         pr_warn("**                                                      **\n");
3131         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3132         pr_warn("** unsafe for production use.                           **\n");
3133         pr_warn("**                                                      **\n");
3134         pr_warn("** If you see this message and you are not debugging    **\n");
3135         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3136         pr_warn("**                                                      **\n");
3137         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3138         pr_warn("**********************************************************\n");
3139
3140         /* Expand the buffers to set size */
3141         tracing_update_buffers();
3142
3143         buffers_allocated = 1;
3144
3145         /*
3146          * trace_printk_init_buffers() can be called by modules.
3147          * If that happens, then we need to start cmdline recording
3148          * directly here. If the global_trace.buffer is already
3149          * allocated here, then this was called by module code.
3150          */
3151         if (global_trace.trace_buffer.buffer)
3152                 tracing_start_cmdline_record();
3153 }
3154 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3155
3156 void trace_printk_start_comm(void)
3157 {
3158         /* Start tracing comms if trace printk is set */
3159         if (!buffers_allocated)
3160                 return;
3161         tracing_start_cmdline_record();
3162 }
3163
3164 static void trace_printk_start_stop_comm(int enabled)
3165 {
3166         if (!buffers_allocated)
3167                 return;
3168
3169         if (enabled)
3170                 tracing_start_cmdline_record();
3171         else
3172                 tracing_stop_cmdline_record();
3173 }
3174
3175 /**
3176  * trace_vbprintk - write binary msg to tracing buffer
3177  * @ip:    The address of the caller
3178  * @fmt:   The string format to write to the buffer
3179  * @args:  Arguments for @fmt
3180  */
3181 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3182 {
3183         struct trace_event_call *call = &event_bprint;
3184         struct ring_buffer_event *event;
3185         struct ring_buffer *buffer;
3186         struct trace_array *tr = &global_trace;
3187         struct bprint_entry *entry;
3188         unsigned long flags;
3189         char *tbuffer;
3190         int len = 0, size, pc;
3191
3192         if (unlikely(tracing_selftest_running || tracing_disabled))
3193                 return 0;
3194
3195         /* Don't pollute graph traces with trace_vprintk internals */
3196         pause_graph_tracing();
3197
3198         pc = preempt_count();
3199         preempt_disable_notrace();
3200
3201         tbuffer = get_trace_buf();
3202         if (!tbuffer) {
3203                 len = 0;
3204                 goto out_nobuffer;
3205         }
3206
3207         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3208
3209         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3210                 goto out;
3211
3212         local_save_flags(flags);
3213         size = sizeof(*entry) + sizeof(u32) * len;
3214         buffer = tr->trace_buffer.buffer;
3215         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3216                                             flags, pc);
3217         if (!event)
3218                 goto out;
3219         entry = ring_buffer_event_data(event);
3220         entry->ip                       = ip;
3221         entry->fmt                      = fmt;
3222
3223         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3224         if (!call_filter_check_discard(call, entry, buffer, event)) {
3225                 __buffer_unlock_commit(buffer, event);
3226                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3227         }
3228
3229 out:
3230         put_trace_buf();
3231
3232 out_nobuffer:
3233         preempt_enable_notrace();
3234         unpause_graph_tracing();
3235
3236         return len;
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vbprintk);
3239
3240 __printf(3, 0)
3241 static int
3242 __trace_array_vprintk(struct ring_buffer *buffer,
3243                       unsigned long ip, const char *fmt, va_list args)
3244 {
3245         struct trace_event_call *call = &event_print;
3246         struct ring_buffer_event *event;
3247         int len = 0, size, pc;
3248         struct print_entry *entry;
3249         unsigned long flags;
3250         char *tbuffer;
3251
3252         if (tracing_disabled || tracing_selftest_running)
3253                 return 0;
3254
3255         /* Don't pollute graph traces with trace_vprintk internals */
3256         pause_graph_tracing();
3257
3258         pc = preempt_count();
3259         preempt_disable_notrace();
3260
3261
3262         tbuffer = get_trace_buf();
3263         if (!tbuffer) {
3264                 len = 0;
3265                 goto out_nobuffer;
3266         }
3267
3268         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3269
3270         local_save_flags(flags);
3271         size = sizeof(*entry) + len + 1;
3272         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3273                                             flags, pc);
3274         if (!event)
3275                 goto out;
3276         entry = ring_buffer_event_data(event);
3277         entry->ip = ip;
3278
3279         memcpy(&entry->buf, tbuffer, len + 1);
3280         if (!call_filter_check_discard(call, entry, buffer, event)) {
3281                 __buffer_unlock_commit(buffer, event);
3282                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3283         }
3284
3285 out:
3286         put_trace_buf();
3287
3288 out_nobuffer:
3289         preempt_enable_notrace();
3290         unpause_graph_tracing();
3291
3292         return len;
3293 }
3294
3295 __printf(3, 0)
3296 int trace_array_vprintk(struct trace_array *tr,
3297                         unsigned long ip, const char *fmt, va_list args)
3298 {
3299         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3300 }
3301
3302 __printf(3, 0)
3303 int trace_array_printk(struct trace_array *tr,
3304                        unsigned long ip, const char *fmt, ...)
3305 {
3306         int ret;
3307         va_list ap;
3308
3309         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3310                 return 0;
3311
3312         if (!tr)
3313                 return -ENOENT;
3314
3315         va_start(ap, fmt);
3316         ret = trace_array_vprintk(tr, ip, fmt, ap);
3317         va_end(ap);
3318         return ret;
3319 }
3320 EXPORT_SYMBOL_GPL(trace_array_printk);
3321
3322 __printf(3, 4)
3323 int trace_array_printk_buf(struct ring_buffer *buffer,
3324                            unsigned long ip, const char *fmt, ...)
3325 {
3326         int ret;
3327         va_list ap;
3328
3329         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3330                 return 0;
3331
3332         va_start(ap, fmt);
3333         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3334         va_end(ap);
3335         return ret;
3336 }
3337
3338 __printf(2, 0)
3339 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3340 {
3341         return trace_array_vprintk(&global_trace, ip, fmt, args);
3342 }
3343 EXPORT_SYMBOL_GPL(trace_vprintk);
3344
3345 static void trace_iterator_increment(struct trace_iterator *iter)
3346 {
3347         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3348
3349         iter->idx++;
3350         if (buf_iter)
3351                 ring_buffer_read(buf_iter, NULL);
3352 }
3353
3354 static struct trace_entry *
3355 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3356                 unsigned long *lost_events)
3357 {
3358         struct ring_buffer_event *event;
3359         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3360
3361         if (buf_iter)
3362                 event = ring_buffer_iter_peek(buf_iter, ts);
3363         else
3364                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3365                                          lost_events);
3366
3367         if (event) {
3368                 iter->ent_size = ring_buffer_event_length(event);
3369                 return ring_buffer_event_data(event);
3370         }
3371         iter->ent_size = 0;
3372         return NULL;
3373 }
3374
3375 static struct trace_entry *
3376 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3377                   unsigned long *missing_events, u64 *ent_ts)
3378 {
3379         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3380         struct trace_entry *ent, *next = NULL;
3381         unsigned long lost_events = 0, next_lost = 0;
3382         int cpu_file = iter->cpu_file;
3383         u64 next_ts = 0, ts;
3384         int next_cpu = -1;
3385         int next_size = 0;
3386         int cpu;
3387
3388         /*
3389          * If we are in a per_cpu trace file, don't bother by iterating over
3390          * all cpu and peek directly.
3391          */
3392         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3393                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3394                         return NULL;
3395                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3396                 if (ent_cpu)
3397                         *ent_cpu = cpu_file;
3398
3399                 return ent;
3400         }
3401
3402         for_each_tracing_cpu(cpu) {
3403
3404                 if (ring_buffer_empty_cpu(buffer, cpu))
3405                         continue;
3406
3407                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3408
3409                 /*
3410                  * Pick the entry with the smallest timestamp:
3411                  */
3412                 if (ent && (!next || ts < next_ts)) {
3413                         next = ent;
3414                         next_cpu = cpu;
3415                         next_ts = ts;
3416                         next_lost = lost_events;
3417                         next_size = iter->ent_size;
3418                 }
3419         }
3420
3421         iter->ent_size = next_size;
3422
3423         if (ent_cpu)
3424                 *ent_cpu = next_cpu;
3425
3426         if (ent_ts)
3427                 *ent_ts = next_ts;
3428
3429         if (missing_events)
3430                 *missing_events = next_lost;
3431
3432         return next;
3433 }
3434
3435 /* Find the next real entry, without updating the iterator itself */
3436 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3437                                           int *ent_cpu, u64 *ent_ts)
3438 {
3439         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3440 }
3441
3442 /* Find the next real entry, and increment the iterator to the next entry */
3443 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3444 {
3445         iter->ent = __find_next_entry(iter, &iter->cpu,
3446                                       &iter->lost_events, &iter->ts);
3447
3448         if (iter->ent)
3449                 trace_iterator_increment(iter);
3450
3451         return iter->ent ? iter : NULL;
3452 }
3453
3454 static void trace_consume(struct trace_iterator *iter)
3455 {
3456         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3457                             &iter->lost_events);
3458 }
3459
3460 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3461 {
3462         struct trace_iterator *iter = m->private;
3463         int i = (int)*pos;
3464         void *ent;
3465
3466         WARN_ON_ONCE(iter->leftover);
3467
3468         (*pos)++;
3469
3470         /* can't go backwards */
3471         if (iter->idx > i)
3472                 return NULL;
3473
3474         if (iter->idx < 0)
3475                 ent = trace_find_next_entry_inc(iter);
3476         else
3477                 ent = iter;
3478
3479         while (ent && iter->idx < i)
3480                 ent = trace_find_next_entry_inc(iter);
3481
3482         iter->pos = *pos;
3483
3484         return ent;
3485 }
3486
3487 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3488 {
3489         struct ring_buffer_event *event;
3490         struct ring_buffer_iter *buf_iter;
3491         unsigned long entries = 0;
3492         u64 ts;
3493
3494         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3495
3496         buf_iter = trace_buffer_iter(iter, cpu);
3497         if (!buf_iter)
3498                 return;
3499
3500         ring_buffer_iter_reset(buf_iter);
3501
3502         /*
3503          * We could have the case with the max latency tracers
3504          * that a reset never took place on a cpu. This is evident
3505          * by the timestamp being before the start of the buffer.
3506          */
3507         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3508                 if (ts >= iter->trace_buffer->time_start)
3509                         break;
3510                 entries++;
3511                 ring_buffer_read(buf_iter, NULL);
3512         }
3513
3514         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3515 }
3516
3517 /*
3518  * The current tracer is copied to avoid a global locking
3519  * all around.
3520  */
3521 static void *s_start(struct seq_file *m, loff_t *pos)
3522 {
3523         struct trace_iterator *iter = m->private;
3524         struct trace_array *tr = iter->tr;
3525         int cpu_file = iter->cpu_file;
3526         void *p = NULL;
3527         loff_t l = 0;
3528         int cpu;
3529
3530         /*
3531          * copy the tracer to avoid using a global lock all around.
3532          * iter->trace is a copy of current_trace, the pointer to the
3533          * name may be used instead of a strcmp(), as iter->trace->name
3534          * will point to the same string as current_trace->name.
3535          */
3536         mutex_lock(&trace_types_lock);
3537         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3538                 *iter->trace = *tr->current_trace;
3539         mutex_unlock(&trace_types_lock);
3540
3541 #ifdef CONFIG_TRACER_MAX_TRACE
3542         if (iter->snapshot && iter->trace->use_max_tr)
3543                 return ERR_PTR(-EBUSY);
3544 #endif
3545
3546         if (!iter->snapshot)
3547                 atomic_inc(&trace_record_taskinfo_disabled);
3548
3549         if (*pos != iter->pos) {
3550                 iter->ent = NULL;
3551                 iter->cpu = 0;
3552                 iter->idx = -1;
3553
3554                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3555                         for_each_tracing_cpu(cpu)
3556                                 tracing_iter_reset(iter, cpu);
3557                 } else
3558                         tracing_iter_reset(iter, cpu_file);
3559
3560                 iter->leftover = 0;
3561                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3562                         ;
3563
3564         } else {
3565                 /*
3566                  * If we overflowed the seq_file before, then we want
3567                  * to just reuse the trace_seq buffer again.
3568                  */
3569                 if (iter->leftover)
3570                         p = iter;
3571                 else {
3572                         l = *pos - 1;
3573                         p = s_next(m, p, &l);
3574                 }
3575         }
3576
3577         trace_event_read_lock();
3578         trace_access_lock(cpu_file);
3579         return p;
3580 }
3581
3582 static void s_stop(struct seq_file *m, void *p)
3583 {
3584         struct trace_iterator *iter = m->private;
3585
3586 #ifdef CONFIG_TRACER_MAX_TRACE
3587         if (iter->snapshot && iter->trace->use_max_tr)
3588                 return;
3589 #endif
3590
3591         if (!iter->snapshot)
3592                 atomic_dec(&trace_record_taskinfo_disabled);
3593
3594         trace_access_unlock(iter->cpu_file);
3595         trace_event_read_unlock();
3596 }
3597
3598 static void
3599 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3600                       unsigned long *entries, int cpu)
3601 {
3602         unsigned long count;
3603
3604         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3605         /*
3606          * If this buffer has skipped entries, then we hold all
3607          * entries for the trace and we need to ignore the
3608          * ones before the time stamp.
3609          */
3610         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3611                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3612                 /* total is the same as the entries */
3613                 *total = count;
3614         } else
3615                 *total = count +
3616                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3617         *entries = count;
3618 }
3619
3620 static void
3621 get_total_entries(struct trace_buffer *buf,
3622                   unsigned long *total, unsigned long *entries)
3623 {
3624         unsigned long t, e;
3625         int cpu;
3626
3627         *total = 0;
3628         *entries = 0;
3629
3630         for_each_tracing_cpu(cpu) {
3631                 get_total_entries_cpu(buf, &t, &e, cpu);
3632                 *total += t;
3633                 *entries += e;
3634         }
3635 }
3636
3637 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3638 {
3639         unsigned long total, entries;
3640
3641         if (!tr)
3642                 tr = &global_trace;
3643
3644         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3645
3646         return entries;
3647 }
3648
3649 unsigned long trace_total_entries(struct trace_array *tr)
3650 {
3651         unsigned long total, entries;
3652
3653         if (!tr)
3654                 tr = &global_trace;
3655
3656         get_total_entries(&tr->trace_buffer, &total, &entries);
3657
3658         return entries;
3659 }
3660
3661 static void print_lat_help_header(struct seq_file *m)
3662 {
3663         seq_puts(m, "#                  _------=> CPU#            \n"
3664                     "#                 / _-----=> irqs-off        \n"
3665                     "#                | / _----=> need-resched    \n"
3666                     "#                || / _---=> hardirq/softirq \n"
3667                     "#                ||| / _--=> preempt-depth   \n"
3668                     "#                |||| /     delay            \n"
3669                     "#  cmd     pid   ||||| time  |   caller      \n"
3670                     "#     \\   /      |||||  \\    |   /         \n");
3671 }
3672
3673 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3674 {
3675         unsigned long total;
3676         unsigned long entries;
3677
3678         get_total_entries(buf, &total, &entries);
3679         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3680                    entries, total, num_online_cpus());
3681         seq_puts(m, "#\n");
3682 }
3683
3684 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3685                                    unsigned int flags)
3686 {
3687         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3688
3689         print_event_info(buf, m);
3690
3691         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3692         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3693 }
3694
3695 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3696                                        unsigned int flags)
3697 {
3698         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3699         const char *space = "          ";
3700         int prec = tgid ? 10 : 2;
3701
3702         print_event_info(buf, m);
3703
3704         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3705         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3706         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3707         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3708         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3709         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3710         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3711 }
3712
3713 void
3714 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3715 {
3716         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3717         struct trace_buffer *buf = iter->trace_buffer;
3718         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3719         struct tracer *type = iter->trace;
3720         unsigned long entries;
3721         unsigned long total;
3722         const char *name = "preemption";
3723
3724         name = type->name;
3725
3726         get_total_entries(buf, &total, &entries);
3727
3728         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3729                    name, UTS_RELEASE);
3730         seq_puts(m, "# -----------------------------------"
3731                  "---------------------------------\n");
3732         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3733                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3734                    nsecs_to_usecs(data->saved_latency),
3735                    entries,
3736                    total,
3737                    buf->cpu,
3738 #if defined(CONFIG_PREEMPT_NONE)
3739                    "server",
3740 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3741                    "desktop",
3742 #elif defined(CONFIG_PREEMPT)
3743                    "preempt",
3744 #elif defined(CONFIG_PREEMPT_RT)
3745                    "preempt_rt",
3746 #else
3747                    "unknown",
3748 #endif
3749                    /* These are reserved for later use */
3750                    0, 0, 0, 0);
3751 #ifdef CONFIG_SMP
3752         seq_printf(m, " #P:%d)\n", num_online_cpus());
3753 #else
3754         seq_puts(m, ")\n");
3755 #endif
3756         seq_puts(m, "#    -----------------\n");
3757         seq_printf(m, "#    | task: %.16s-%d "
3758                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3759                    data->comm, data->pid,
3760                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3761                    data->policy, data->rt_priority);
3762         seq_puts(m, "#    -----------------\n");
3763
3764         if (data->critical_start) {
3765                 seq_puts(m, "#  => started at: ");
3766                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3767                 trace_print_seq(m, &iter->seq);
3768                 seq_puts(m, "\n#  => ended at:   ");
3769                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3770                 trace_print_seq(m, &iter->seq);
3771                 seq_puts(m, "\n#\n");
3772         }
3773
3774         seq_puts(m, "#\n");
3775 }
3776
3777 static void test_cpu_buff_start(struct trace_iterator *iter)
3778 {
3779         struct trace_seq *s = &iter->seq;
3780         struct trace_array *tr = iter->tr;
3781
3782         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3783                 return;
3784
3785         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3786                 return;
3787
3788         if (cpumask_available(iter->started) &&
3789             cpumask_test_cpu(iter->cpu, iter->started))
3790                 return;
3791
3792         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3793                 return;
3794
3795         if (cpumask_available(iter->started))
3796                 cpumask_set_cpu(iter->cpu, iter->started);
3797
3798         /* Don't print started cpu buffer for the first entry of the trace */
3799         if (iter->idx > 1)
3800                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3801                                 iter->cpu);
3802 }
3803
3804 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3805 {
3806         struct trace_array *tr = iter->tr;
3807         struct trace_seq *s = &iter->seq;
3808         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3809         struct trace_entry *entry;
3810         struct trace_event *event;
3811
3812         entry = iter->ent;
3813
3814         test_cpu_buff_start(iter);
3815
3816         event = ftrace_find_event(entry->type);
3817
3818         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3819                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3820                         trace_print_lat_context(iter);
3821                 else
3822                         trace_print_context(iter);
3823         }
3824
3825         if (trace_seq_has_overflowed(s))
3826                 return TRACE_TYPE_PARTIAL_LINE;
3827
3828         if (event)
3829                 return event->funcs->trace(iter, sym_flags, event);
3830
3831         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3832
3833         return trace_handle_return(s);
3834 }
3835
3836 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3837 {
3838         struct trace_array *tr = iter->tr;
3839         struct trace_seq *s = &iter->seq;
3840         struct trace_entry *entry;
3841         struct trace_event *event;
3842
3843         entry = iter->ent;
3844
3845         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3846                 trace_seq_printf(s, "%d %d %llu ",
3847                                  entry->pid, iter->cpu, iter->ts);
3848
3849         if (trace_seq_has_overflowed(s))
3850                 return TRACE_TYPE_PARTIAL_LINE;
3851
3852         event = ftrace_find_event(entry->type);
3853         if (event)
3854                 return event->funcs->raw(iter, 0, event);
3855
3856         trace_seq_printf(s, "%d ?\n", entry->type);
3857
3858         return trace_handle_return(s);
3859 }
3860
3861 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3862 {
3863         struct trace_array *tr = iter->tr;
3864         struct trace_seq *s = &iter->seq;
3865         unsigned char newline = '\n';
3866         struct trace_entry *entry;
3867         struct trace_event *event;
3868
3869         entry = iter->ent;
3870
3871         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3872                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3873                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3874                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3875                 if (trace_seq_has_overflowed(s))
3876                         return TRACE_TYPE_PARTIAL_LINE;
3877         }
3878
3879         event = ftrace_find_event(entry->type);
3880         if (event) {
3881                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3882                 if (ret != TRACE_TYPE_HANDLED)
3883                         return ret;
3884         }
3885
3886         SEQ_PUT_FIELD(s, newline);
3887
3888         return trace_handle_return(s);
3889 }
3890
3891 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3892 {
3893         struct trace_array *tr = iter->tr;
3894         struct trace_seq *s = &iter->seq;
3895         struct trace_entry *entry;
3896         struct trace_event *event;
3897
3898         entry = iter->ent;
3899
3900         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3901                 SEQ_PUT_FIELD(s, entry->pid);
3902                 SEQ_PUT_FIELD(s, iter->cpu);
3903                 SEQ_PUT_FIELD(s, iter->ts);
3904                 if (trace_seq_has_overflowed(s))
3905                         return TRACE_TYPE_PARTIAL_LINE;
3906         }
3907
3908         event = ftrace_find_event(entry->type);
3909         return event ? event->funcs->binary(iter, 0, event) :
3910                 TRACE_TYPE_HANDLED;
3911 }
3912
3913 int trace_empty(struct trace_iterator *iter)
3914 {
3915         struct ring_buffer_iter *buf_iter;
3916         int cpu;
3917
3918         /* If we are looking at one CPU buffer, only check that one */
3919         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3920                 cpu = iter->cpu_file;
3921                 buf_iter = trace_buffer_iter(iter, cpu);
3922                 if (buf_iter) {
3923                         if (!ring_buffer_iter_empty(buf_iter))
3924                                 return 0;
3925                 } else {
3926                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3927                                 return 0;
3928                 }
3929                 return 1;
3930         }
3931
3932         for_each_tracing_cpu(cpu) {
3933                 buf_iter = trace_buffer_iter(iter, cpu);
3934                 if (buf_iter) {
3935                         if (!ring_buffer_iter_empty(buf_iter))
3936                                 return 0;
3937                 } else {
3938                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3939                                 return 0;
3940                 }
3941         }
3942
3943         return 1;
3944 }
3945
3946 /*  Called with trace_event_read_lock() held. */
3947 enum print_line_t print_trace_line(struct trace_iterator *iter)
3948 {
3949         struct trace_array *tr = iter->tr;
3950         unsigned long trace_flags = tr->trace_flags;
3951         enum print_line_t ret;
3952
3953         if (iter->lost_events) {
3954                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3955                                  iter->cpu, iter->lost_events);
3956                 if (trace_seq_has_overflowed(&iter->seq))
3957                         return TRACE_TYPE_PARTIAL_LINE;
3958         }
3959
3960         if (iter->trace && iter->trace->print_line) {
3961                 ret = iter->trace->print_line(iter);
3962                 if (ret != TRACE_TYPE_UNHANDLED)
3963                         return ret;
3964         }
3965
3966         if (iter->ent->type == TRACE_BPUTS &&
3967                         trace_flags & TRACE_ITER_PRINTK &&
3968                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3969                 return trace_print_bputs_msg_only(iter);
3970
3971         if (iter->ent->type == TRACE_BPRINT &&
3972                         trace_flags & TRACE_ITER_PRINTK &&
3973                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3974                 return trace_print_bprintk_msg_only(iter);
3975
3976         if (iter->ent->type == TRACE_PRINT &&
3977                         trace_flags & TRACE_ITER_PRINTK &&
3978                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3979                 return trace_print_printk_msg_only(iter);
3980
3981         if (trace_flags & TRACE_ITER_BIN)
3982                 return print_bin_fmt(iter);
3983
3984         if (trace_flags & TRACE_ITER_HEX)
3985                 return print_hex_fmt(iter);
3986
3987         if (trace_flags & TRACE_ITER_RAW)
3988                 return print_raw_fmt(iter);
3989
3990         return print_trace_fmt(iter);
3991 }
3992
3993 void trace_latency_header(struct seq_file *m)
3994 {
3995         struct trace_iterator *iter = m->private;
3996         struct trace_array *tr = iter->tr;
3997
3998         /* print nothing if the buffers are empty */
3999         if (trace_empty(iter))
4000                 return;
4001
4002         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4003                 print_trace_header(m, iter);
4004
4005         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4006                 print_lat_help_header(m);
4007 }
4008
4009 void trace_default_header(struct seq_file *m)
4010 {
4011         struct trace_iterator *iter = m->private;
4012         struct trace_array *tr = iter->tr;
4013         unsigned long trace_flags = tr->trace_flags;
4014
4015         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4016                 return;
4017
4018         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4019                 /* print nothing if the buffers are empty */
4020                 if (trace_empty(iter))
4021                         return;
4022                 print_trace_header(m, iter);
4023                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4024                         print_lat_help_header(m);
4025         } else {
4026                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4027                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4028                                 print_func_help_header_irq(iter->trace_buffer,
4029                                                            m, trace_flags);
4030                         else
4031                                 print_func_help_header(iter->trace_buffer, m,
4032                                                        trace_flags);
4033                 }
4034         }
4035 }
4036
4037 static void test_ftrace_alive(struct seq_file *m)
4038 {
4039         if (!ftrace_is_dead())
4040                 return;
4041         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4042                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4043 }
4044
4045 #ifdef CONFIG_TRACER_MAX_TRACE
4046 static void show_snapshot_main_help(struct seq_file *m)
4047 {
4048         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4049                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4050                     "#                      Takes a snapshot of the main buffer.\n"
4051                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4052                     "#                      (Doesn't have to be '2' works with any number that\n"
4053                     "#                       is not a '0' or '1')\n");
4054 }
4055
4056 static void show_snapshot_percpu_help(struct seq_file *m)
4057 {
4058         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4059 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4060         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4061                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4062 #else
4063         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4064                     "#                     Must use main snapshot file to allocate.\n");
4065 #endif
4066         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4067                     "#                      (Doesn't have to be '2' works with any number that\n"
4068                     "#                       is not a '0' or '1')\n");
4069 }
4070
4071 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4072 {
4073         if (iter->tr->allocated_snapshot)
4074                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4075         else
4076                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4077
4078         seq_puts(m, "# Snapshot commands:\n");
4079         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4080                 show_snapshot_main_help(m);
4081         else
4082                 show_snapshot_percpu_help(m);
4083 }
4084 #else
4085 /* Should never be called */
4086 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4087 #endif
4088
4089 static int s_show(struct seq_file *m, void *v)
4090 {
4091         struct trace_iterator *iter = v;
4092         int ret;
4093
4094         if (iter->ent == NULL) {
4095                 if (iter->tr) {
4096                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4097                         seq_puts(m, "#\n");
4098                         test_ftrace_alive(m);
4099                 }
4100                 if (iter->snapshot && trace_empty(iter))
4101                         print_snapshot_help(m, iter);
4102                 else if (iter->trace && iter->trace->print_header)
4103                         iter->trace->print_header(m);
4104                 else
4105                         trace_default_header(m);
4106
4107         } else if (iter->leftover) {
4108                 /*
4109                  * If we filled the seq_file buffer earlier, we
4110                  * want to just show it now.
4111                  */
4112                 ret = trace_print_seq(m, &iter->seq);
4113
4114                 /* ret should this time be zero, but you never know */
4115                 iter->leftover = ret;
4116
4117         } else {
4118                 print_trace_line(iter);
4119                 ret = trace_print_seq(m, &iter->seq);
4120                 /*
4121                  * If we overflow the seq_file buffer, then it will
4122                  * ask us for this data again at start up.
4123                  * Use that instead.
4124                  *  ret is 0 if seq_file write succeeded.
4125                  *        -1 otherwise.
4126                  */
4127                 iter->leftover = ret;
4128         }
4129
4130         return 0;
4131 }
4132
4133 /*
4134  * Should be used after trace_array_get(), trace_types_lock
4135  * ensures that i_cdev was already initialized.
4136  */
4137 static inline int tracing_get_cpu(struct inode *inode)
4138 {
4139         if (inode->i_cdev) /* See trace_create_cpu_file() */
4140                 return (long)inode->i_cdev - 1;
4141         return RING_BUFFER_ALL_CPUS;
4142 }
4143
4144 static const struct seq_operations tracer_seq_ops = {
4145         .start          = s_start,
4146         .next           = s_next,
4147         .stop           = s_stop,
4148         .show           = s_show,
4149 };
4150
4151 static struct trace_iterator *
4152 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4153 {
4154         struct trace_array *tr = inode->i_private;
4155         struct trace_iterator *iter;
4156         int cpu;
4157
4158         if (tracing_disabled)
4159                 return ERR_PTR(-ENODEV);
4160
4161         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4162         if (!iter)
4163                 return ERR_PTR(-ENOMEM);
4164
4165         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4166                                     GFP_KERNEL);
4167         if (!iter->buffer_iter)
4168                 goto release;
4169
4170         /*
4171          * We make a copy of the current tracer to avoid concurrent
4172          * changes on it while we are reading.
4173          */
4174         mutex_lock(&trace_types_lock);
4175         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4176         if (!iter->trace)
4177                 goto fail;
4178
4179         *iter->trace = *tr->current_trace;
4180
4181         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4182                 goto fail;
4183
4184         iter->tr = tr;
4185
4186 #ifdef CONFIG_TRACER_MAX_TRACE
4187         /* Currently only the top directory has a snapshot */
4188         if (tr->current_trace->print_max || snapshot)
4189                 iter->trace_buffer = &tr->max_buffer;
4190         else
4191 #endif
4192                 iter->trace_buffer = &tr->trace_buffer;
4193         iter->snapshot = snapshot;
4194         iter->pos = -1;
4195         iter->cpu_file = tracing_get_cpu(inode);
4196         mutex_init(&iter->mutex);
4197
4198         /* Notify the tracer early; before we stop tracing. */
4199         if (iter->trace && iter->trace->open)
4200                 iter->trace->open(iter);
4201
4202         /* Annotate start of buffers if we had overruns */
4203         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4204                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4205
4206         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4207         if (trace_clocks[tr->clock_id].in_ns)
4208                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4209
4210         /* stop the trace while dumping if we are not opening "snapshot" */
4211         if (!iter->snapshot)
4212                 tracing_stop_tr(tr);
4213
4214         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4215                 for_each_tracing_cpu(cpu) {
4216                         iter->buffer_iter[cpu] =
4217                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4218                                                          cpu, GFP_KERNEL);
4219                 }
4220                 ring_buffer_read_prepare_sync();
4221                 for_each_tracing_cpu(cpu) {
4222                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4223                         tracing_iter_reset(iter, cpu);
4224                 }
4225         } else {
4226                 cpu = iter->cpu_file;
4227                 iter->buffer_iter[cpu] =
4228                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4229                                                  cpu, GFP_KERNEL);
4230                 ring_buffer_read_prepare_sync();
4231                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4232                 tracing_iter_reset(iter, cpu);
4233         }
4234
4235         mutex_unlock(&trace_types_lock);
4236
4237         return iter;
4238
4239  fail:
4240         mutex_unlock(&trace_types_lock);
4241         kfree(iter->trace);
4242         kfree(iter->buffer_iter);
4243 release:
4244         seq_release_private(inode, file);
4245         return ERR_PTR(-ENOMEM);
4246 }
4247
4248 int tracing_open_generic(struct inode *inode, struct file *filp)
4249 {
4250         int ret;
4251
4252         ret = tracing_check_open_get_tr(NULL);
4253         if (ret)
4254                 return ret;
4255
4256         filp->private_data = inode->i_private;
4257         return 0;
4258 }
4259
4260 bool tracing_is_disabled(void)
4261 {
4262         return (tracing_disabled) ? true: false;
4263 }
4264
4265 /*
4266  * Open and update trace_array ref count.
4267  * Must have the current trace_array passed to it.
4268  */
4269 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4270 {
4271         struct trace_array *tr = inode->i_private;
4272         int ret;
4273
4274         ret = tracing_check_open_get_tr(tr);
4275         if (ret)
4276                 return ret;
4277
4278         filp->private_data = inode->i_private;
4279
4280         return 0;
4281 }
4282
4283 static int tracing_release(struct inode *inode, struct file *file)
4284 {
4285         struct trace_array *tr = inode->i_private;
4286         struct seq_file *m = file->private_data;
4287         struct trace_iterator *iter;
4288         int cpu;
4289
4290         if (!(file->f_mode & FMODE_READ)) {
4291                 trace_array_put(tr);
4292                 return 0;
4293         }
4294
4295         /* Writes do not use seq_file */
4296         iter = m->private;
4297         mutex_lock(&trace_types_lock);
4298
4299         for_each_tracing_cpu(cpu) {
4300                 if (iter->buffer_iter[cpu])
4301                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4302         }
4303
4304         if (iter->trace && iter->trace->close)
4305                 iter->trace->close(iter);
4306
4307         if (!iter->snapshot)
4308                 /* reenable tracing if it was previously enabled */
4309                 tracing_start_tr(tr);
4310
4311         __trace_array_put(tr);
4312
4313         mutex_unlock(&trace_types_lock);
4314
4315         mutex_destroy(&iter->mutex);
4316         free_cpumask_var(iter->started);
4317         kfree(iter->trace);
4318         kfree(iter->buffer_iter);
4319         seq_release_private(inode, file);
4320
4321         return 0;
4322 }
4323
4324 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4325 {
4326         struct trace_array *tr = inode->i_private;
4327
4328         trace_array_put(tr);
4329         return 0;
4330 }
4331
4332 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4333 {
4334         struct trace_array *tr = inode->i_private;
4335
4336         trace_array_put(tr);
4337
4338         return single_release(inode, file);
4339 }
4340
4341 static int tracing_open(struct inode *inode, struct file *file)
4342 {
4343         struct trace_array *tr = inode->i_private;
4344         struct trace_iterator *iter;
4345         int ret;
4346
4347         ret = tracing_check_open_get_tr(tr);
4348         if (ret)
4349                 return ret;
4350
4351         /* If this file was open for write, then erase contents */
4352         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4353                 int cpu = tracing_get_cpu(inode);
4354                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4355
4356 #ifdef CONFIG_TRACER_MAX_TRACE
4357                 if (tr->current_trace->print_max)
4358                         trace_buf = &tr->max_buffer;
4359 #endif
4360
4361                 if (cpu == RING_BUFFER_ALL_CPUS)
4362                         tracing_reset_online_cpus(trace_buf);
4363                 else
4364                         tracing_reset_cpu(trace_buf, cpu);
4365         }
4366
4367         if (file->f_mode & FMODE_READ) {
4368                 iter = __tracing_open(inode, file, false);
4369                 if (IS_ERR(iter))
4370                         ret = PTR_ERR(iter);
4371                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4372                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4373         }
4374
4375         if (ret < 0)
4376                 trace_array_put(tr);
4377
4378         return ret;
4379 }
4380
4381 /*
4382  * Some tracers are not suitable for instance buffers.
4383  * A tracer is always available for the global array (toplevel)
4384  * or if it explicitly states that it is.
4385  */
4386 static bool
4387 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4388 {
4389         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4390 }
4391
4392 /* Find the next tracer that this trace array may use */
4393 static struct tracer *
4394 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4395 {
4396         while (t && !trace_ok_for_array(t, tr))
4397                 t = t->next;
4398
4399         return t;
4400 }
4401
4402 static void *
4403 t_next(struct seq_file *m, void *v, loff_t *pos)
4404 {
4405         struct trace_array *tr = m->private;
4406         struct tracer *t = v;
4407
4408         (*pos)++;
4409
4410         if (t)
4411                 t = get_tracer_for_array(tr, t->next);
4412
4413         return t;
4414 }
4415
4416 static void *t_start(struct seq_file *m, loff_t *pos)
4417 {
4418         struct trace_array *tr = m->private;
4419         struct tracer *t;
4420         loff_t l = 0;
4421
4422         mutex_lock(&trace_types_lock);
4423
4424         t = get_tracer_for_array(tr, trace_types);
4425         for (; t && l < *pos; t = t_next(m, t, &l))
4426                         ;
4427
4428         return t;
4429 }
4430
4431 static void t_stop(struct seq_file *m, void *p)
4432 {
4433         mutex_unlock(&trace_types_lock);
4434 }
4435
4436 static int t_show(struct seq_file *m, void *v)
4437 {
4438         struct tracer *t = v;
4439
4440         if (!t)
4441                 return 0;
4442
4443         seq_puts(m, t->name);
4444         if (t->next)
4445                 seq_putc(m, ' ');
4446         else
4447                 seq_putc(m, '\n');
4448
4449         return 0;
4450 }
4451
4452 static const struct seq_operations show_traces_seq_ops = {
4453         .start          = t_start,
4454         .next           = t_next,
4455         .stop           = t_stop,
4456         .show           = t_show,
4457 };
4458
4459 static int show_traces_open(struct inode *inode, struct file *file)
4460 {
4461         struct trace_array *tr = inode->i_private;
4462         struct seq_file *m;
4463         int ret;
4464
4465         ret = tracing_check_open_get_tr(tr);
4466         if (ret)
4467                 return ret;
4468
4469         ret = seq_open(file, &show_traces_seq_ops);
4470         if (ret) {
4471                 trace_array_put(tr);
4472                 return ret;
4473         }
4474
4475         m = file->private_data;
4476         m->private = tr;
4477
4478         return 0;
4479 }
4480
4481 static int show_traces_release(struct inode *inode, struct file *file)
4482 {
4483         struct trace_array *tr = inode->i_private;
4484
4485         trace_array_put(tr);
4486         return seq_release(inode, file);
4487 }
4488
4489 static ssize_t
4490 tracing_write_stub(struct file *filp, const char __user *ubuf,
4491                    size_t count, loff_t *ppos)
4492 {
4493         return count;
4494 }
4495
4496 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4497 {
4498         int ret;
4499
4500         if (file->f_mode & FMODE_READ)
4501                 ret = seq_lseek(file, offset, whence);
4502         else
4503                 file->f_pos = ret = 0;
4504
4505         return ret;
4506 }
4507
4508 static const struct file_operations tracing_fops = {
4509         .open           = tracing_open,
4510         .read           = seq_read,
4511         .write          = tracing_write_stub,
4512         .llseek         = tracing_lseek,
4513         .release        = tracing_release,
4514 };
4515
4516 static const struct file_operations show_traces_fops = {
4517         .open           = show_traces_open,
4518         .read           = seq_read,
4519         .llseek         = seq_lseek,
4520         .release        = show_traces_release,
4521 };
4522
4523 static ssize_t
4524 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4525                      size_t count, loff_t *ppos)
4526 {
4527         struct trace_array *tr = file_inode(filp)->i_private;
4528         char *mask_str;
4529         int len;
4530
4531         len = snprintf(NULL, 0, "%*pb\n",
4532                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4533         mask_str = kmalloc(len, GFP_KERNEL);
4534         if (!mask_str)
4535                 return -ENOMEM;
4536
4537         len = snprintf(mask_str, len, "%*pb\n",
4538                        cpumask_pr_args(tr->tracing_cpumask));
4539         if (len >= count) {
4540                 count = -EINVAL;
4541                 goto out_err;
4542         }
4543         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4544
4545 out_err:
4546         kfree(mask_str);
4547
4548         return count;
4549 }
4550
4551 static ssize_t
4552 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4553                       size_t count, loff_t *ppos)
4554 {
4555         struct trace_array *tr = file_inode(filp)->i_private;
4556         cpumask_var_t tracing_cpumask_new;
4557         int err, cpu;
4558
4559         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4560                 return -ENOMEM;
4561
4562         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4563         if (err)
4564                 goto err_unlock;
4565
4566         local_irq_disable();
4567         arch_spin_lock(&tr->max_lock);
4568         for_each_tracing_cpu(cpu) {
4569                 /*
4570                  * Increase/decrease the disabled counter if we are
4571                  * about to flip a bit in the cpumask:
4572                  */
4573                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4574                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4575                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4576                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4577                 }
4578                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4579                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4580                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4581                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4582                 }
4583         }
4584         arch_spin_unlock(&tr->max_lock);
4585         local_irq_enable();
4586
4587         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4588         free_cpumask_var(tracing_cpumask_new);
4589
4590         return count;
4591
4592 err_unlock:
4593         free_cpumask_var(tracing_cpumask_new);
4594
4595         return err;
4596 }
4597
4598 static const struct file_operations tracing_cpumask_fops = {
4599         .open           = tracing_open_generic_tr,
4600         .read           = tracing_cpumask_read,
4601         .write          = tracing_cpumask_write,
4602         .release        = tracing_release_generic_tr,
4603         .llseek         = generic_file_llseek,
4604 };
4605
4606 static int tracing_trace_options_show(struct seq_file *m, void *v)
4607 {
4608         struct tracer_opt *trace_opts;
4609         struct trace_array *tr = m->private;
4610         u32 tracer_flags;
4611         int i;
4612
4613         mutex_lock(&trace_types_lock);
4614         tracer_flags = tr->current_trace->flags->val;
4615         trace_opts = tr->current_trace->flags->opts;
4616
4617         for (i = 0; trace_options[i]; i++) {
4618                 if (tr->trace_flags & (1 << i))
4619                         seq_printf(m, "%s\n", trace_options[i]);
4620                 else
4621                         seq_printf(m, "no%s\n", trace_options[i]);
4622         }
4623
4624         for (i = 0; trace_opts[i].name; i++) {
4625                 if (tracer_flags & trace_opts[i].bit)
4626                         seq_printf(m, "%s\n", trace_opts[i].name);
4627                 else
4628                         seq_printf(m, "no%s\n", trace_opts[i].name);
4629         }
4630         mutex_unlock(&trace_types_lock);
4631
4632         return 0;
4633 }
4634
4635 static int __set_tracer_option(struct trace_array *tr,
4636                                struct tracer_flags *tracer_flags,
4637                                struct tracer_opt *opts, int neg)
4638 {
4639         struct tracer *trace = tracer_flags->trace;
4640         int ret;
4641
4642         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4643         if (ret)
4644                 return ret;
4645
4646         if (neg)
4647                 tracer_flags->val &= ~opts->bit;
4648         else
4649                 tracer_flags->val |= opts->bit;
4650         return 0;
4651 }
4652
4653 /* Try to assign a tracer specific option */
4654 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4655 {
4656         struct tracer *trace = tr->current_trace;
4657         struct tracer_flags *tracer_flags = trace->flags;
4658         struct tracer_opt *opts = NULL;
4659         int i;
4660
4661         for (i = 0; tracer_flags->opts[i].name; i++) {
4662                 opts = &tracer_flags->opts[i];
4663
4664                 if (strcmp(cmp, opts->name) == 0)
4665                         return __set_tracer_option(tr, trace->flags, opts, neg);
4666         }
4667
4668         return -EINVAL;
4669 }
4670
4671 /* Some tracers require overwrite to stay enabled */
4672 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4673 {
4674         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4675                 return -1;
4676
4677         return 0;
4678 }
4679
4680 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4681 {
4682         /* do nothing if flag is already set */
4683         if (!!(tr->trace_flags & mask) == !!enabled)
4684                 return 0;
4685
4686         /* Give the tracer a chance to approve the change */
4687         if (tr->current_trace->flag_changed)
4688                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4689                         return -EINVAL;
4690
4691         if (enabled)
4692                 tr->trace_flags |= mask;
4693         else
4694                 tr->trace_flags &= ~mask;
4695
4696         if (mask == TRACE_ITER_RECORD_CMD)
4697                 trace_event_enable_cmd_record(enabled);
4698
4699         if (mask == TRACE_ITER_RECORD_TGID) {
4700                 if (!tgid_map)
4701                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4702                                            sizeof(*tgid_map),
4703                                            GFP_KERNEL);
4704                 if (!tgid_map) {
4705                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4706                         return -ENOMEM;
4707                 }
4708
4709                 trace_event_enable_tgid_record(enabled);
4710         }
4711
4712         if (mask == TRACE_ITER_EVENT_FORK)
4713                 trace_event_follow_fork(tr, enabled);
4714
4715         if (mask == TRACE_ITER_FUNC_FORK)
4716                 ftrace_pid_follow_fork(tr, enabled);
4717
4718         if (mask == TRACE_ITER_OVERWRITE) {
4719                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4720 #ifdef CONFIG_TRACER_MAX_TRACE
4721                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4722 #endif
4723         }
4724
4725         if (mask == TRACE_ITER_PRINTK) {
4726                 trace_printk_start_stop_comm(enabled);
4727                 trace_printk_control(enabled);
4728         }
4729
4730         return 0;
4731 }
4732
4733 static int trace_set_options(struct trace_array *tr, char *option)
4734 {
4735         char *cmp;
4736         int neg = 0;
4737         int ret;
4738         size_t orig_len = strlen(option);
4739         int len;
4740
4741         cmp = strstrip(option);
4742
4743         len = str_has_prefix(cmp, "no");
4744         if (len)
4745                 neg = 1;
4746
4747         cmp += len;
4748
4749         mutex_lock(&trace_types_lock);
4750
4751         ret = match_string(trace_options, -1, cmp);
4752         /* If no option could be set, test the specific tracer options */
4753         if (ret < 0)
4754                 ret = set_tracer_option(tr, cmp, neg);
4755         else
4756                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4757
4758         mutex_unlock(&trace_types_lock);
4759
4760         /*
4761          * If the first trailing whitespace is replaced with '\0' by strstrip,
4762          * turn it back into a space.
4763          */
4764         if (orig_len > strlen(option))
4765                 option[strlen(option)] = ' ';
4766
4767         return ret;
4768 }
4769
4770 static void __init apply_trace_boot_options(void)
4771 {
4772         char *buf = trace_boot_options_buf;
4773         char *option;
4774
4775         while (true) {
4776                 option = strsep(&buf, ",");
4777
4778                 if (!option)
4779                         break;
4780
4781                 if (*option)
4782                         trace_set_options(&global_trace, option);
4783
4784                 /* Put back the comma to allow this to be called again */
4785                 if (buf)
4786                         *(buf - 1) = ',';
4787         }
4788 }
4789
4790 static ssize_t
4791 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4792                         size_t cnt, loff_t *ppos)
4793 {
4794         struct seq_file *m = filp->private_data;
4795         struct trace_array *tr = m->private;
4796         char buf[64];
4797         int ret;
4798
4799         if (cnt >= sizeof(buf))
4800                 return -EINVAL;
4801
4802         if (copy_from_user(buf, ubuf, cnt))
4803                 return -EFAULT;
4804
4805         buf[cnt] = 0;
4806
4807         ret = trace_set_options(tr, buf);
4808         if (ret < 0)
4809                 return ret;
4810
4811         *ppos += cnt;
4812
4813         return cnt;
4814 }
4815
4816 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4817 {
4818         struct trace_array *tr = inode->i_private;
4819         int ret;
4820
4821         ret = tracing_check_open_get_tr(tr);
4822         if (ret)
4823                 return ret;
4824
4825         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4826         if (ret < 0)
4827                 trace_array_put(tr);
4828
4829         return ret;
4830 }
4831
4832 static const struct file_operations tracing_iter_fops = {
4833         .open           = tracing_trace_options_open,
4834         .read           = seq_read,
4835         .llseek         = seq_lseek,
4836         .release        = tracing_single_release_tr,
4837         .write          = tracing_trace_options_write,
4838 };
4839
4840 static const char readme_msg[] =
4841         "tracing mini-HOWTO:\n\n"
4842         "# echo 0 > tracing_on : quick way to disable tracing\n"
4843         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4844         " Important files:\n"
4845         "  trace\t\t\t- The static contents of the buffer\n"
4846         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4847         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4848         "  current_tracer\t- function and latency tracers\n"
4849         "  available_tracers\t- list of configured tracers for current_tracer\n"
4850         "  error_log\t- error log for failed commands (that support it)\n"
4851         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4852         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4853         "  trace_clock\t\t-change the clock used to order events\n"
4854         "       local:   Per cpu clock but may not be synced across CPUs\n"
4855         "      global:   Synced across CPUs but slows tracing down.\n"
4856         "     counter:   Not a clock, but just an increment\n"
4857         "      uptime:   Jiffy counter from time of boot\n"
4858         "        perf:   Same clock that perf events use\n"
4859 #ifdef CONFIG_X86_64
4860         "     x86-tsc:   TSC cycle counter\n"
4861 #endif
4862         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4863         "       delta:   Delta difference against a buffer-wide timestamp\n"
4864         "    absolute:   Absolute (standalone) timestamp\n"
4865         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4866         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4867         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4868         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4869         "\t\t\t  Remove sub-buffer with rmdir\n"
4870         "  trace_options\t\t- Set format or modify how tracing happens\n"
4871         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4872         "\t\t\t  option name\n"
4873         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4874 #ifdef CONFIG_DYNAMIC_FTRACE
4875         "\n  available_filter_functions - list of functions that can be filtered on\n"
4876         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4877         "\t\t\t  functions\n"
4878         "\t     accepts: func_full_name or glob-matching-pattern\n"
4879         "\t     modules: Can select a group via module\n"
4880         "\t      Format: :mod:<module-name>\n"
4881         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4882         "\t    triggers: a command to perform when function is hit\n"
4883         "\t      Format: <function>:<trigger>[:count]\n"
4884         "\t     trigger: traceon, traceoff\n"
4885         "\t\t      enable_event:<system>:<event>\n"
4886         "\t\t      disable_event:<system>:<event>\n"
4887 #ifdef CONFIG_STACKTRACE
4888         "\t\t      stacktrace\n"
4889 #endif
4890 #ifdef CONFIG_TRACER_SNAPSHOT
4891         "\t\t      snapshot\n"
4892 #endif
4893         "\t\t      dump\n"
4894         "\t\t      cpudump\n"
4895         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4896         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4897         "\t     The first one will disable tracing every time do_fault is hit\n"
4898         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4899         "\t       The first time do trap is hit and it disables tracing, the\n"
4900         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4901         "\t       the counter will not decrement. It only decrements when the\n"
4902         "\t       trigger did work\n"
4903         "\t     To remove trigger without count:\n"
4904         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4905         "\t     To remove trigger with a count:\n"
4906         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4907         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4908         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4909         "\t    modules: Can select a group via module command :mod:\n"
4910         "\t    Does not accept triggers\n"
4911 #endif /* CONFIG_DYNAMIC_FTRACE */
4912 #ifdef CONFIG_FUNCTION_TRACER
4913         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4914         "\t\t    (function)\n"
4915 #endif
4916 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4917         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4918         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4919         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4920 #endif
4921 #ifdef CONFIG_TRACER_SNAPSHOT
4922         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4923         "\t\t\t  snapshot buffer. Read the contents for more\n"
4924         "\t\t\t  information\n"
4925 #endif
4926 #ifdef CONFIG_STACK_TRACER
4927         "  stack_trace\t\t- Shows the max stack trace when active\n"
4928         "  stack_max_size\t- Shows current max stack size that was traced\n"
4929         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4930         "\t\t\t  new trace)\n"
4931 #ifdef CONFIG_DYNAMIC_FTRACE
4932         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4933         "\t\t\t  traces\n"
4934 #endif
4935 #endif /* CONFIG_STACK_TRACER */
4936 #ifdef CONFIG_DYNAMIC_EVENTS
4937         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4938         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4939 #endif
4940 #ifdef CONFIG_KPROBE_EVENTS
4941         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4942         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4943 #endif
4944 #ifdef CONFIG_UPROBE_EVENTS
4945         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4946         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4947 #endif
4948 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4949         "\t  accepts: event-definitions (one definition per line)\n"
4950         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4951         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4952 #ifdef CONFIG_HIST_TRIGGERS
4953         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4954 #endif
4955         "\t           -:[<group>/]<event>\n"
4956 #ifdef CONFIG_KPROBE_EVENTS
4957         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4958   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4959 #endif
4960 #ifdef CONFIG_UPROBE_EVENTS
4961   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4962 #endif
4963         "\t     args: <name>=fetcharg[:type]\n"
4964         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4965 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4966         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4967 #else
4968         "\t           $stack<index>, $stack, $retval, $comm,\n"
4969 #endif
4970         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4971         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4972         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4973         "\t           <type>\\[<array-size>\\]\n"
4974 #ifdef CONFIG_HIST_TRIGGERS
4975         "\t    field: <stype> <name>;\n"
4976         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4977         "\t           [unsigned] char/int/long\n"
4978 #endif
4979 #endif
4980         "  events/\t\t- Directory containing all trace event subsystems:\n"
4981         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4982         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4983         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4984         "\t\t\t  events\n"
4985         "      filter\t\t- If set, only events passing filter are traced\n"
4986         "  events/<system>/<event>/\t- Directory containing control files for\n"
4987         "\t\t\t  <event>:\n"
4988         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4989         "      filter\t\t- If set, only events passing filter are traced\n"
4990         "      trigger\t\t- If set, a command to perform when event is hit\n"
4991         "\t    Format: <trigger>[:count][if <filter>]\n"
4992         "\t   trigger: traceon, traceoff\n"
4993         "\t            enable_event:<system>:<event>\n"
4994         "\t            disable_event:<system>:<event>\n"
4995 #ifdef CONFIG_HIST_TRIGGERS
4996         "\t            enable_hist:<system>:<event>\n"
4997         "\t            disable_hist:<system>:<event>\n"
4998 #endif
4999 #ifdef CONFIG_STACKTRACE
5000         "\t\t    stacktrace\n"
5001 #endif
5002 #ifdef CONFIG_TRACER_SNAPSHOT
5003         "\t\t    snapshot\n"
5004 #endif
5005 #ifdef CONFIG_HIST_TRIGGERS
5006         "\t\t    hist (see below)\n"
5007 #endif
5008         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5009         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5010         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5011         "\t                  events/block/block_unplug/trigger\n"
5012         "\t   The first disables tracing every time block_unplug is hit.\n"
5013         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5014         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5015         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5016         "\t   Like function triggers, the counter is only decremented if it\n"
5017         "\t    enabled or disabled tracing.\n"
5018         "\t   To remove a trigger without a count:\n"
5019         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5020         "\t   To remove a trigger with a count:\n"
5021         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5022         "\t   Filters can be ignored when removing a trigger.\n"
5023 #ifdef CONFIG_HIST_TRIGGERS
5024         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5025         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5026         "\t            [:values=<field1[,field2,...]>]\n"
5027         "\t            [:sort=<field1[,field2,...]>]\n"
5028         "\t            [:size=#entries]\n"
5029         "\t            [:pause][:continue][:clear]\n"
5030         "\t            [:name=histname1]\n"
5031         "\t            [:<handler>.<action>]\n"
5032         "\t            [if <filter>]\n\n"
5033         "\t    When a matching event is hit, an entry is added to a hash\n"
5034         "\t    table using the key(s) and value(s) named, and the value of a\n"
5035         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5036         "\t    correspond to fields in the event's format description.  Keys\n"
5037         "\t    can be any field, or the special string 'stacktrace'.\n"
5038         "\t    Compound keys consisting of up to two fields can be specified\n"
5039         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5040         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5041         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5042         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5043         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5044         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5045         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5046         "\t    its histogram data will be shared with other triggers of the\n"
5047         "\t    same name, and trigger hits will update this common data.\n\n"
5048         "\t    Reading the 'hist' file for the event will dump the hash\n"
5049         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5050         "\t    triggers attached to an event, there will be a table for each\n"
5051         "\t    trigger in the output.  The table displayed for a named\n"
5052         "\t    trigger will be the same as any other instance having the\n"
5053         "\t    same name.  The default format used to display a given field\n"
5054         "\t    can be modified by appending any of the following modifiers\n"
5055         "\t    to the field name, as applicable:\n\n"
5056         "\t            .hex        display a number as a hex value\n"
5057         "\t            .sym        display an address as a symbol\n"
5058         "\t            .sym-offset display an address as a symbol and offset\n"
5059         "\t            .execname   display a common_pid as a program name\n"
5060         "\t            .syscall    display a syscall id as a syscall name\n"
5061         "\t            .log2       display log2 value rather than raw number\n"
5062         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5063         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5064         "\t    trigger or to start a hist trigger but not log any events\n"
5065         "\t    until told to do so.  'continue' can be used to start or\n"
5066         "\t    restart a paused hist trigger.\n\n"
5067         "\t    The 'clear' parameter will clear the contents of a running\n"
5068         "\t    hist trigger and leave its current paused/active state\n"
5069         "\t    unchanged.\n\n"
5070         "\t    The enable_hist and disable_hist triggers can be used to\n"
5071         "\t    have one event conditionally start and stop another event's\n"
5072         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5073         "\t    the enable_event and disable_event triggers.\n\n"
5074         "\t    Hist trigger handlers and actions are executed whenever a\n"
5075         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5076         "\t        <handler>.<action>\n\n"
5077         "\t    The available handlers are:\n\n"
5078         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5079         "\t        onmax(var)               - invoke if var exceeds current max\n"
5080         "\t        onchange(var)            - invoke action if var changes\n\n"
5081         "\t    The available actions are:\n\n"
5082         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5083         "\t        save(field,...)                      - save current event fields\n"
5084 #ifdef CONFIG_TRACER_SNAPSHOT
5085         "\t        snapshot()                           - snapshot the trace buffer\n"
5086 #endif
5087 #endif
5088 ;
5089
5090 static ssize_t
5091 tracing_readme_read(struct file *filp, char __user *ubuf,
5092                        size_t cnt, loff_t *ppos)
5093 {
5094         return simple_read_from_buffer(ubuf, cnt, ppos,
5095                                         readme_msg, strlen(readme_msg));
5096 }
5097
5098 static const struct file_operations tracing_readme_fops = {
5099         .open           = tracing_open_generic,
5100         .read           = tracing_readme_read,
5101         .llseek         = generic_file_llseek,
5102 };
5103
5104 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5105 {
5106         int *ptr = v;
5107
5108         if (*pos || m->count)
5109                 ptr++;
5110
5111         (*pos)++;
5112
5113         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5114                 if (trace_find_tgid(*ptr))
5115                         return ptr;
5116         }
5117
5118         return NULL;
5119 }
5120
5121 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5122 {
5123         void *v;
5124         loff_t l = 0;
5125
5126         if (!tgid_map)
5127                 return NULL;
5128
5129         v = &tgid_map[0];
5130         while (l <= *pos) {
5131                 v = saved_tgids_next(m, v, &l);
5132                 if (!v)
5133                         return NULL;
5134         }
5135
5136         return v;
5137 }
5138
5139 static void saved_tgids_stop(struct seq_file *m, void *v)
5140 {
5141 }
5142
5143 static int saved_tgids_show(struct seq_file *m, void *v)
5144 {
5145         int pid = (int *)v - tgid_map;
5146
5147         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5148         return 0;
5149 }
5150
5151 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5152         .start          = saved_tgids_start,
5153         .stop           = saved_tgids_stop,
5154         .next           = saved_tgids_next,
5155         .show           = saved_tgids_show,
5156 };
5157
5158 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5159 {
5160         int ret;
5161
5162         ret = tracing_check_open_get_tr(NULL);
5163         if (ret)
5164                 return ret;
5165
5166         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5167 }
5168
5169
5170 static const struct file_operations tracing_saved_tgids_fops = {
5171         .open           = tracing_saved_tgids_open,
5172         .read           = seq_read,
5173         .llseek         = seq_lseek,
5174         .release        = seq_release,
5175 };
5176
5177 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5178 {
5179         unsigned int *ptr = v;
5180
5181         if (*pos || m->count)
5182                 ptr++;
5183
5184         (*pos)++;
5185
5186         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5187              ptr++) {
5188                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5189                         continue;
5190
5191                 return ptr;
5192         }
5193
5194         return NULL;
5195 }
5196
5197 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5198 {
5199         void *v;
5200         loff_t l = 0;
5201
5202         preempt_disable();
5203         arch_spin_lock(&trace_cmdline_lock);
5204
5205         v = &savedcmd->map_cmdline_to_pid[0];
5206         while (l <= *pos) {
5207                 v = saved_cmdlines_next(m, v, &l);
5208                 if (!v)
5209                         return NULL;
5210         }
5211
5212         return v;
5213 }
5214
5215 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5216 {
5217         arch_spin_unlock(&trace_cmdline_lock);
5218         preempt_enable();
5219 }
5220
5221 static int saved_cmdlines_show(struct seq_file *m, void *v)
5222 {
5223         char buf[TASK_COMM_LEN];
5224         unsigned int *pid = v;
5225
5226         __trace_find_cmdline(*pid, buf);
5227         seq_printf(m, "%d %s\n", *pid, buf);
5228         return 0;
5229 }
5230
5231 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5232         .start          = saved_cmdlines_start,
5233         .next           = saved_cmdlines_next,
5234         .stop           = saved_cmdlines_stop,
5235         .show           = saved_cmdlines_show,
5236 };
5237
5238 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5239 {
5240         int ret;
5241
5242         ret = tracing_check_open_get_tr(NULL);
5243         if (ret)
5244                 return ret;
5245
5246         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5247 }
5248
5249 static const struct file_operations tracing_saved_cmdlines_fops = {
5250         .open           = tracing_saved_cmdlines_open,
5251         .read           = seq_read,
5252         .llseek         = seq_lseek,
5253         .release        = seq_release,
5254 };
5255
5256 static ssize_t
5257 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5258                                  size_t cnt, loff_t *ppos)
5259 {
5260         char buf[64];
5261         int r;
5262
5263         arch_spin_lock(&trace_cmdline_lock);
5264         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5265         arch_spin_unlock(&trace_cmdline_lock);
5266
5267         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5268 }
5269
5270 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5271 {
5272         kfree(s->saved_cmdlines);
5273         kfree(s->map_cmdline_to_pid);
5274         kfree(s);
5275 }
5276
5277 static int tracing_resize_saved_cmdlines(unsigned int val)
5278 {
5279         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5280
5281         s = kmalloc(sizeof(*s), GFP_KERNEL);
5282         if (!s)
5283                 return -ENOMEM;
5284
5285         if (allocate_cmdlines_buffer(val, s) < 0) {
5286                 kfree(s);
5287                 return -ENOMEM;
5288         }
5289
5290         arch_spin_lock(&trace_cmdline_lock);
5291         savedcmd_temp = savedcmd;
5292         savedcmd = s;
5293         arch_spin_unlock(&trace_cmdline_lock);
5294         free_saved_cmdlines_buffer(savedcmd_temp);
5295
5296         return 0;
5297 }
5298
5299 static ssize_t
5300 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5301                                   size_t cnt, loff_t *ppos)
5302 {
5303         unsigned long val;
5304         int ret;
5305
5306         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5307         if (ret)
5308                 return ret;
5309
5310         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5311         if (!val || val > PID_MAX_DEFAULT)
5312                 return -EINVAL;
5313
5314         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5315         if (ret < 0)
5316                 return ret;
5317
5318         *ppos += cnt;
5319
5320         return cnt;
5321 }
5322
5323 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5324         .open           = tracing_open_generic,
5325         .read           = tracing_saved_cmdlines_size_read,
5326         .write          = tracing_saved_cmdlines_size_write,
5327 };
5328
5329 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5330 static union trace_eval_map_item *
5331 update_eval_map(union trace_eval_map_item *ptr)
5332 {
5333         if (!ptr->map.eval_string) {
5334                 if (ptr->tail.next) {
5335                         ptr = ptr->tail.next;
5336                         /* Set ptr to the next real item (skip head) */
5337                         ptr++;
5338                 } else
5339                         return NULL;
5340         }
5341         return ptr;
5342 }
5343
5344 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5345 {
5346         union trace_eval_map_item *ptr = v;
5347
5348         /*
5349          * Paranoid! If ptr points to end, we don't want to increment past it.
5350          * This really should never happen.
5351          */
5352         ptr = update_eval_map(ptr);
5353         if (WARN_ON_ONCE(!ptr))
5354                 return NULL;
5355
5356         ptr++;
5357
5358         (*pos)++;
5359
5360         ptr = update_eval_map(ptr);
5361
5362         return ptr;
5363 }
5364
5365 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5366 {
5367         union trace_eval_map_item *v;
5368         loff_t l = 0;
5369
5370         mutex_lock(&trace_eval_mutex);
5371
5372         v = trace_eval_maps;
5373         if (v)
5374                 v++;
5375
5376         while (v && l < *pos) {
5377                 v = eval_map_next(m, v, &l);
5378         }
5379
5380         return v;
5381 }
5382
5383 static void eval_map_stop(struct seq_file *m, void *v)
5384 {
5385         mutex_unlock(&trace_eval_mutex);
5386 }
5387
5388 static int eval_map_show(struct seq_file *m, void *v)
5389 {
5390         union trace_eval_map_item *ptr = v;
5391
5392         seq_printf(m, "%s %ld (%s)\n",
5393                    ptr->map.eval_string, ptr->map.eval_value,
5394                    ptr->map.system);
5395
5396         return 0;
5397 }
5398
5399 static const struct seq_operations tracing_eval_map_seq_ops = {
5400         .start          = eval_map_start,
5401         .next           = eval_map_next,
5402         .stop           = eval_map_stop,
5403         .show           = eval_map_show,
5404 };
5405
5406 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5407 {
5408         int ret;
5409
5410         ret = tracing_check_open_get_tr(NULL);
5411         if (ret)
5412                 return ret;
5413
5414         return seq_open(filp, &tracing_eval_map_seq_ops);
5415 }
5416
5417 static const struct file_operations tracing_eval_map_fops = {
5418         .open           = tracing_eval_map_open,
5419         .read           = seq_read,
5420         .llseek         = seq_lseek,
5421         .release        = seq_release,
5422 };
5423
5424 static inline union trace_eval_map_item *
5425 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5426 {
5427         /* Return tail of array given the head */
5428         return ptr + ptr->head.length + 1;
5429 }
5430
5431 static void
5432 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5433                            int len)
5434 {
5435         struct trace_eval_map **stop;
5436         struct trace_eval_map **map;
5437         union trace_eval_map_item *map_array;
5438         union trace_eval_map_item *ptr;
5439
5440         stop = start + len;
5441
5442         /*
5443          * The trace_eval_maps contains the map plus a head and tail item,
5444          * where the head holds the module and length of array, and the
5445          * tail holds a pointer to the next list.
5446          */
5447         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5448         if (!map_array) {
5449                 pr_warn("Unable to allocate trace eval mapping\n");
5450                 return;
5451         }
5452
5453         mutex_lock(&trace_eval_mutex);
5454
5455         if (!trace_eval_maps)
5456                 trace_eval_maps = map_array;
5457         else {
5458                 ptr = trace_eval_maps;
5459                 for (;;) {
5460                         ptr = trace_eval_jmp_to_tail(ptr);
5461                         if (!ptr->tail.next)
5462                                 break;
5463                         ptr = ptr->tail.next;
5464
5465                 }
5466                 ptr->tail.next = map_array;
5467         }
5468         map_array->head.mod = mod;
5469         map_array->head.length = len;
5470         map_array++;
5471
5472         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5473                 map_array->map = **map;
5474                 map_array++;
5475         }
5476         memset(map_array, 0, sizeof(*map_array));
5477
5478         mutex_unlock(&trace_eval_mutex);
5479 }
5480
5481 static void trace_create_eval_file(struct dentry *d_tracer)
5482 {
5483         trace_create_file("eval_map", 0444, d_tracer,
5484                           NULL, &tracing_eval_map_fops);
5485 }
5486
5487 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5488 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5489 static inline void trace_insert_eval_map_file(struct module *mod,
5490                               struct trace_eval_map **start, int len) { }
5491 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5492
5493 static void trace_insert_eval_map(struct module *mod,
5494                                   struct trace_eval_map **start, int len)
5495 {
5496         struct trace_eval_map **map;
5497
5498         if (len <= 0)
5499                 return;
5500
5501         map = start;
5502
5503         trace_event_eval_update(map, len);
5504
5505         trace_insert_eval_map_file(mod, start, len);
5506 }
5507
5508 static ssize_t
5509 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5510                        size_t cnt, loff_t *ppos)
5511 {
5512         struct trace_array *tr = filp->private_data;
5513         char buf[MAX_TRACER_SIZE+2];
5514         int r;
5515
5516         mutex_lock(&trace_types_lock);
5517         r = sprintf(buf, "%s\n", tr->current_trace->name);
5518         mutex_unlock(&trace_types_lock);
5519
5520         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5521 }
5522
5523 int tracer_init(struct tracer *t, struct trace_array *tr)
5524 {
5525         tracing_reset_online_cpus(&tr->trace_buffer);
5526         return t->init(tr);
5527 }
5528
5529 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5530 {
5531         int cpu;
5532
5533         for_each_tracing_cpu(cpu)
5534                 per_cpu_ptr(buf->data, cpu)->entries = val;
5535 }
5536
5537 #ifdef CONFIG_TRACER_MAX_TRACE
5538 /* resize @tr's buffer to the size of @size_tr's entries */
5539 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5540                                         struct trace_buffer *size_buf, int cpu_id)
5541 {
5542         int cpu, ret = 0;
5543
5544         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5545                 for_each_tracing_cpu(cpu) {
5546                         ret = ring_buffer_resize(trace_buf->buffer,
5547                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5548                         if (ret < 0)
5549                                 break;
5550                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5551                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5552                 }
5553         } else {
5554                 ret = ring_buffer_resize(trace_buf->buffer,
5555                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5556                 if (ret == 0)
5557                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5558                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5559         }
5560
5561         return ret;
5562 }
5563 #endif /* CONFIG_TRACER_MAX_TRACE */
5564
5565 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5566                                         unsigned long size, int cpu)
5567 {
5568         int ret;
5569
5570         /*
5571          * If kernel or user changes the size of the ring buffer
5572          * we use the size that was given, and we can forget about
5573          * expanding it later.
5574          */
5575         ring_buffer_expanded = true;
5576
5577         /* May be called before buffers are initialized */
5578         if (!tr->trace_buffer.buffer)
5579                 return 0;
5580
5581         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5582         if (ret < 0)
5583                 return ret;
5584
5585 #ifdef CONFIG_TRACER_MAX_TRACE
5586         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5587             !tr->current_trace->use_max_tr)
5588                 goto out;
5589
5590         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5591         if (ret < 0) {
5592                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5593                                                      &tr->trace_buffer, cpu);
5594                 if (r < 0) {
5595                         /*
5596                          * AARGH! We are left with different
5597                          * size max buffer!!!!
5598                          * The max buffer is our "snapshot" buffer.
5599                          * When a tracer needs a snapshot (one of the
5600                          * latency tracers), it swaps the max buffer
5601                          * with the saved snap shot. We succeeded to
5602                          * update the size of the main buffer, but failed to
5603                          * update the size of the max buffer. But when we tried
5604                          * to reset the main buffer to the original size, we
5605                          * failed there too. This is very unlikely to
5606                          * happen, but if it does, warn and kill all
5607                          * tracing.
5608                          */
5609                         WARN_ON(1);
5610                         tracing_disabled = 1;
5611                 }
5612                 return ret;
5613         }
5614
5615         if (cpu == RING_BUFFER_ALL_CPUS)
5616                 set_buffer_entries(&tr->max_buffer, size);
5617         else
5618                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5619
5620  out:
5621 #endif /* CONFIG_TRACER_MAX_TRACE */
5622
5623         if (cpu == RING_BUFFER_ALL_CPUS)
5624                 set_buffer_entries(&tr->trace_buffer, size);
5625         else
5626                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5627
5628         return ret;
5629 }
5630
5631 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5632                                           unsigned long size, int cpu_id)
5633 {
5634         int ret = size;
5635
5636         mutex_lock(&trace_types_lock);
5637
5638         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5639                 /* make sure, this cpu is enabled in the mask */
5640                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5641                         ret = -EINVAL;
5642                         goto out;
5643                 }
5644         }
5645
5646         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5647         if (ret < 0)
5648                 ret = -ENOMEM;
5649
5650 out:
5651         mutex_unlock(&trace_types_lock);
5652
5653         return ret;
5654 }
5655
5656
5657 /**
5658  * tracing_update_buffers - used by tracing facility to expand ring buffers
5659  *
5660  * To save on memory when the tracing is never used on a system with it
5661  * configured in. The ring buffers are set to a minimum size. But once
5662  * a user starts to use the tracing facility, then they need to grow
5663  * to their default size.
5664  *
5665  * This function is to be called when a tracer is about to be used.
5666  */
5667 int tracing_update_buffers(void)
5668 {
5669         int ret = 0;
5670
5671         mutex_lock(&trace_types_lock);
5672         if (!ring_buffer_expanded)
5673                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5674                                                 RING_BUFFER_ALL_CPUS);
5675         mutex_unlock(&trace_types_lock);
5676
5677         return ret;
5678 }
5679
5680 struct trace_option_dentry;
5681
5682 static void
5683 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5684
5685 /*
5686  * Used to clear out the tracer before deletion of an instance.
5687  * Must have trace_types_lock held.
5688  */
5689 static void tracing_set_nop(struct trace_array *tr)
5690 {
5691         if (tr->current_trace == &nop_trace)
5692                 return;
5693         
5694         tr->current_trace->enabled--;
5695
5696         if (tr->current_trace->reset)
5697                 tr->current_trace->reset(tr);
5698
5699         tr->current_trace = &nop_trace;
5700 }
5701
5702 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5703 {
5704         /* Only enable if the directory has been created already. */
5705         if (!tr->dir)
5706                 return;
5707
5708         create_trace_option_files(tr, t);
5709 }
5710
5711 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5712 {
5713         struct tracer *t;
5714 #ifdef CONFIG_TRACER_MAX_TRACE
5715         bool had_max_tr;
5716 #endif
5717         int ret = 0;
5718
5719         mutex_lock(&trace_types_lock);
5720
5721         if (!ring_buffer_expanded) {
5722                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5723                                                 RING_BUFFER_ALL_CPUS);
5724                 if (ret < 0)
5725                         goto out;
5726                 ret = 0;
5727         }
5728
5729         for (t = trace_types; t; t = t->next) {
5730                 if (strcmp(t->name, buf) == 0)
5731                         break;
5732         }
5733         if (!t) {
5734                 ret = -EINVAL;
5735                 goto out;
5736         }
5737         if (t == tr->current_trace)
5738                 goto out;
5739
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741         if (t->use_max_tr) {
5742                 arch_spin_lock(&tr->max_lock);
5743                 if (tr->cond_snapshot)
5744                         ret = -EBUSY;
5745                 arch_spin_unlock(&tr->max_lock);
5746                 if (ret)
5747                         goto out;
5748         }
5749 #endif
5750         /* Some tracers won't work on kernel command line */
5751         if (system_state < SYSTEM_RUNNING && t->noboot) {
5752                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5753                         t->name);
5754                 goto out;
5755         }
5756
5757         /* Some tracers are only allowed for the top level buffer */
5758         if (!trace_ok_for_array(t, tr)) {
5759                 ret = -EINVAL;
5760                 goto out;
5761         }
5762
5763         /* If trace pipe files are being read, we can't change the tracer */
5764         if (tr->current_trace->ref) {
5765                 ret = -EBUSY;
5766                 goto out;
5767         }
5768
5769         trace_branch_disable();
5770
5771         tr->current_trace->enabled--;
5772
5773         if (tr->current_trace->reset)
5774                 tr->current_trace->reset(tr);
5775
5776         /* Current trace needs to be nop_trace before synchronize_rcu */
5777         tr->current_trace = &nop_trace;
5778
5779 #ifdef CONFIG_TRACER_MAX_TRACE
5780         had_max_tr = tr->allocated_snapshot;
5781
5782         if (had_max_tr && !t->use_max_tr) {
5783                 /*
5784                  * We need to make sure that the update_max_tr sees that
5785                  * current_trace changed to nop_trace to keep it from
5786                  * swapping the buffers after we resize it.
5787                  * The update_max_tr is called from interrupts disabled
5788                  * so a synchronized_sched() is sufficient.
5789                  */
5790                 synchronize_rcu();
5791                 free_snapshot(tr);
5792         }
5793 #endif
5794
5795 #ifdef CONFIG_TRACER_MAX_TRACE
5796         if (t->use_max_tr && !had_max_tr) {
5797                 ret = tracing_alloc_snapshot_instance(tr);
5798                 if (ret < 0)
5799                         goto out;
5800         }
5801 #endif
5802
5803         if (t->init) {
5804                 ret = tracer_init(t, tr);
5805                 if (ret)
5806                         goto out;
5807         }
5808
5809         tr->current_trace = t;
5810         tr->current_trace->enabled++;
5811         trace_branch_enable(tr);
5812  out:
5813         mutex_unlock(&trace_types_lock);
5814
5815         return ret;
5816 }
5817
5818 static ssize_t
5819 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5820                         size_t cnt, loff_t *ppos)
5821 {
5822         struct trace_array *tr = filp->private_data;
5823         char buf[MAX_TRACER_SIZE+1];
5824         int i;
5825         size_t ret;
5826         int err;
5827
5828         ret = cnt;
5829
5830         if (cnt > MAX_TRACER_SIZE)
5831                 cnt = MAX_TRACER_SIZE;
5832
5833         if (copy_from_user(buf, ubuf, cnt))
5834                 return -EFAULT;
5835
5836         buf[cnt] = 0;
5837
5838         /* strip ending whitespace. */
5839         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5840                 buf[i] = 0;
5841
5842         err = tracing_set_tracer(tr, buf);
5843         if (err)
5844                 return err;
5845
5846         *ppos += ret;
5847
5848         return ret;
5849 }
5850
5851 static ssize_t
5852 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5853                    size_t cnt, loff_t *ppos)
5854 {
5855         char buf[64];
5856         int r;
5857
5858         r = snprintf(buf, sizeof(buf), "%ld\n",
5859                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5860         if (r > sizeof(buf))
5861                 r = sizeof(buf);
5862         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5863 }
5864
5865 static ssize_t
5866 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5867                     size_t cnt, loff_t *ppos)
5868 {
5869         unsigned long val;
5870         int ret;
5871
5872         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5873         if (ret)
5874                 return ret;
5875
5876         *ptr = val * 1000;
5877
5878         return cnt;
5879 }
5880
5881 static ssize_t
5882 tracing_thresh_read(struct file *filp, char __user *ubuf,
5883                     size_t cnt, loff_t *ppos)
5884 {
5885         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5886 }
5887
5888 static ssize_t
5889 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5890                      size_t cnt, loff_t *ppos)
5891 {
5892         struct trace_array *tr = filp->private_data;
5893         int ret;
5894
5895         mutex_lock(&trace_types_lock);
5896         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5897         if (ret < 0)
5898                 goto out;
5899
5900         if (tr->current_trace->update_thresh) {
5901                 ret = tr->current_trace->update_thresh(tr);
5902                 if (ret < 0)
5903                         goto out;
5904         }
5905
5906         ret = cnt;
5907 out:
5908         mutex_unlock(&trace_types_lock);
5909
5910         return ret;
5911 }
5912
5913 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5914
5915 static ssize_t
5916 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5917                      size_t cnt, loff_t *ppos)
5918 {
5919         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5920 }
5921
5922 static ssize_t
5923 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5924                       size_t cnt, loff_t *ppos)
5925 {
5926         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5927 }
5928
5929 #endif
5930
5931 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5932 {
5933         struct trace_array *tr = inode->i_private;
5934         struct trace_iterator *iter;
5935         int ret;
5936
5937         ret = tracing_check_open_get_tr(tr);
5938         if (ret)
5939                 return ret;
5940
5941         mutex_lock(&trace_types_lock);
5942
5943         /* create a buffer to store the information to pass to userspace */
5944         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5945         if (!iter) {
5946                 ret = -ENOMEM;
5947                 __trace_array_put(tr);
5948                 goto out;
5949         }
5950
5951         trace_seq_init(&iter->seq);
5952         iter->trace = tr->current_trace;
5953
5954         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5955                 ret = -ENOMEM;
5956                 goto fail;
5957         }
5958
5959         /* trace pipe does not show start of buffer */
5960         cpumask_setall(iter->started);
5961
5962         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5963                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5964
5965         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5966         if (trace_clocks[tr->clock_id].in_ns)
5967                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5968
5969         iter->tr = tr;
5970         iter->trace_buffer = &tr->trace_buffer;
5971         iter->cpu_file = tracing_get_cpu(inode);
5972         mutex_init(&iter->mutex);
5973         filp->private_data = iter;
5974
5975         if (iter->trace->pipe_open)
5976                 iter->trace->pipe_open(iter);
5977
5978         nonseekable_open(inode, filp);
5979
5980         tr->current_trace->ref++;
5981 out:
5982         mutex_unlock(&trace_types_lock);
5983         return ret;
5984
5985 fail:
5986         kfree(iter);
5987         __trace_array_put(tr);
5988         mutex_unlock(&trace_types_lock);
5989         return ret;
5990 }
5991
5992 static int tracing_release_pipe(struct inode *inode, struct file *file)
5993 {
5994         struct trace_iterator *iter = file->private_data;
5995         struct trace_array *tr = inode->i_private;
5996
5997         mutex_lock(&trace_types_lock);
5998
5999         tr->current_trace->ref--;
6000
6001         if (iter->trace->pipe_close)
6002                 iter->trace->pipe_close(iter);
6003
6004         mutex_unlock(&trace_types_lock);
6005
6006         free_cpumask_var(iter->started);
6007         mutex_destroy(&iter->mutex);
6008         kfree(iter);
6009
6010         trace_array_put(tr);
6011
6012         return 0;
6013 }
6014
6015 static __poll_t
6016 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6017 {
6018         struct trace_array *tr = iter->tr;
6019
6020         /* Iterators are static, they should be filled or empty */
6021         if (trace_buffer_iter(iter, iter->cpu_file))
6022                 return EPOLLIN | EPOLLRDNORM;
6023
6024         if (tr->trace_flags & TRACE_ITER_BLOCK)
6025                 /*
6026                  * Always select as readable when in blocking mode
6027                  */
6028                 return EPOLLIN | EPOLLRDNORM;
6029         else
6030                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6031                                              filp, poll_table);
6032 }
6033
6034 static __poll_t
6035 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6036 {
6037         struct trace_iterator *iter = filp->private_data;
6038
6039         return trace_poll(iter, filp, poll_table);
6040 }
6041
6042 /* Must be called with iter->mutex held. */
6043 static int tracing_wait_pipe(struct file *filp)
6044 {
6045         struct trace_iterator *iter = filp->private_data;
6046         int ret;
6047
6048         while (trace_empty(iter)) {
6049
6050                 if ((filp->f_flags & O_NONBLOCK)) {
6051                         return -EAGAIN;
6052                 }
6053
6054                 /*
6055                  * We block until we read something and tracing is disabled.
6056                  * We still block if tracing is disabled, but we have never
6057                  * read anything. This allows a user to cat this file, and
6058                  * then enable tracing. But after we have read something,
6059                  * we give an EOF when tracing is again disabled.
6060                  *
6061                  * iter->pos will be 0 if we haven't read anything.
6062                  */
6063                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6064                         break;
6065
6066                 mutex_unlock(&iter->mutex);
6067
6068                 ret = wait_on_pipe(iter, 0);
6069
6070                 mutex_lock(&iter->mutex);
6071
6072                 if (ret)
6073                         return ret;
6074         }
6075
6076         return 1;
6077 }
6078
6079 /*
6080  * Consumer reader.
6081  */
6082 static ssize_t
6083 tracing_read_pipe(struct file *filp, char __user *ubuf,
6084                   size_t cnt, loff_t *ppos)
6085 {
6086         struct trace_iterator *iter = filp->private_data;
6087         ssize_t sret;
6088
6089         /*
6090          * Avoid more than one consumer on a single file descriptor
6091          * This is just a matter of traces coherency, the ring buffer itself
6092          * is protected.
6093          */
6094         mutex_lock(&iter->mutex);
6095
6096         /* return any leftover data */
6097         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6098         if (sret != -EBUSY)
6099                 goto out;
6100
6101         trace_seq_init(&iter->seq);
6102
6103         if (iter->trace->read) {
6104                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6105                 if (sret)
6106                         goto out;
6107         }
6108
6109 waitagain:
6110         sret = tracing_wait_pipe(filp);
6111         if (sret <= 0)
6112                 goto out;
6113
6114         /* stop when tracing is finished */
6115         if (trace_empty(iter)) {
6116                 sret = 0;
6117                 goto out;
6118         }
6119
6120         if (cnt >= PAGE_SIZE)
6121                 cnt = PAGE_SIZE - 1;
6122
6123         /* reset all but tr, trace, and overruns */
6124         memset(&iter->seq, 0,
6125                sizeof(struct trace_iterator) -
6126                offsetof(struct trace_iterator, seq));
6127         cpumask_clear(iter->started);
6128         trace_seq_init(&iter->seq);
6129         iter->pos = -1;
6130
6131         trace_event_read_lock();
6132         trace_access_lock(iter->cpu_file);
6133         while (trace_find_next_entry_inc(iter) != NULL) {
6134                 enum print_line_t ret;
6135                 int save_len = iter->seq.seq.len;
6136
6137                 ret = print_trace_line(iter);
6138                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6139                         /* don't print partial lines */
6140                         iter->seq.seq.len = save_len;
6141                         break;
6142                 }
6143                 if (ret != TRACE_TYPE_NO_CONSUME)
6144                         trace_consume(iter);
6145
6146                 if (trace_seq_used(&iter->seq) >= cnt)
6147                         break;
6148
6149                 /*
6150                  * Setting the full flag means we reached the trace_seq buffer
6151                  * size and we should leave by partial output condition above.
6152                  * One of the trace_seq_* functions is not used properly.
6153                  */
6154                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6155                           iter->ent->type);
6156         }
6157         trace_access_unlock(iter->cpu_file);
6158         trace_event_read_unlock();
6159
6160         /* Now copy what we have to the user */
6161         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6162         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6163                 trace_seq_init(&iter->seq);
6164
6165         /*
6166          * If there was nothing to send to user, in spite of consuming trace
6167          * entries, go back to wait for more entries.
6168          */
6169         if (sret == -EBUSY)
6170                 goto waitagain;
6171
6172 out:
6173         mutex_unlock(&iter->mutex);
6174
6175         return sret;
6176 }
6177
6178 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6179                                      unsigned int idx)
6180 {
6181         __free_page(spd->pages[idx]);
6182 }
6183
6184 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6185         .confirm                = generic_pipe_buf_confirm,
6186         .release                = generic_pipe_buf_release,
6187         .steal                  = generic_pipe_buf_steal,
6188         .get                    = generic_pipe_buf_get,
6189 };
6190
6191 static size_t
6192 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6193 {
6194         size_t count;
6195         int save_len;
6196         int ret;
6197
6198         /* Seq buffer is page-sized, exactly what we need. */
6199         for (;;) {
6200                 save_len = iter->seq.seq.len;
6201                 ret = print_trace_line(iter);
6202
6203                 if (trace_seq_has_overflowed(&iter->seq)) {
6204                         iter->seq.seq.len = save_len;
6205                         break;
6206                 }
6207
6208                 /*
6209                  * This should not be hit, because it should only
6210                  * be set if the iter->seq overflowed. But check it
6211                  * anyway to be safe.
6212                  */
6213                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6214                         iter->seq.seq.len = save_len;
6215                         break;
6216                 }
6217
6218                 count = trace_seq_used(&iter->seq) - save_len;
6219                 if (rem < count) {
6220                         rem = 0;
6221                         iter->seq.seq.len = save_len;
6222                         break;
6223                 }
6224
6225                 if (ret != TRACE_TYPE_NO_CONSUME)
6226                         trace_consume(iter);
6227                 rem -= count;
6228                 if (!trace_find_next_entry_inc(iter))   {
6229                         rem = 0;
6230                         iter->ent = NULL;
6231                         break;
6232                 }
6233         }
6234
6235         return rem;
6236 }
6237
6238 static ssize_t tracing_splice_read_pipe(struct file *filp,
6239                                         loff_t *ppos,
6240                                         struct pipe_inode_info *pipe,
6241                                         size_t len,
6242                                         unsigned int flags)
6243 {
6244         struct page *pages_def[PIPE_DEF_BUFFERS];
6245         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6246         struct trace_iterator *iter = filp->private_data;
6247         struct splice_pipe_desc spd = {
6248                 .pages          = pages_def,
6249                 .partial        = partial_def,
6250                 .nr_pages       = 0, /* This gets updated below. */
6251                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6252                 .ops            = &tracing_pipe_buf_ops,
6253                 .spd_release    = tracing_spd_release_pipe,
6254         };
6255         ssize_t ret;
6256         size_t rem;
6257         unsigned int i;
6258
6259         if (splice_grow_spd(pipe, &spd))
6260                 return -ENOMEM;
6261
6262         mutex_lock(&iter->mutex);
6263
6264         if (iter->trace->splice_read) {
6265                 ret = iter->trace->splice_read(iter, filp,
6266                                                ppos, pipe, len, flags);
6267                 if (ret)
6268                         goto out_err;
6269         }
6270
6271         ret = tracing_wait_pipe(filp);
6272         if (ret <= 0)
6273                 goto out_err;
6274
6275         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6276                 ret = -EFAULT;
6277                 goto out_err;
6278         }
6279
6280         trace_event_read_lock();
6281         trace_access_lock(iter->cpu_file);
6282
6283         /* Fill as many pages as possible. */
6284         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6285                 spd.pages[i] = alloc_page(GFP_KERNEL);
6286                 if (!spd.pages[i])
6287                         break;
6288
6289                 rem = tracing_fill_pipe_page(rem, iter);
6290
6291                 /* Copy the data into the page, so we can start over. */
6292                 ret = trace_seq_to_buffer(&iter->seq,
6293                                           page_address(spd.pages[i]),
6294                                           trace_seq_used(&iter->seq));
6295                 if (ret < 0) {
6296                         __free_page(spd.pages[i]);
6297                         break;
6298                 }
6299                 spd.partial[i].offset = 0;
6300                 spd.partial[i].len = trace_seq_used(&iter->seq);
6301
6302                 trace_seq_init(&iter->seq);
6303         }
6304
6305         trace_access_unlock(iter->cpu_file);
6306         trace_event_read_unlock();
6307         mutex_unlock(&iter->mutex);
6308
6309         spd.nr_pages = i;
6310
6311         if (i)
6312                 ret = splice_to_pipe(pipe, &spd);
6313         else
6314                 ret = 0;
6315 out:
6316         splice_shrink_spd(&spd);
6317         return ret;
6318
6319 out_err:
6320         mutex_unlock(&iter->mutex);
6321         goto out;
6322 }
6323
6324 static ssize_t
6325 tracing_entries_read(struct file *filp, char __user *ubuf,
6326                      size_t cnt, loff_t *ppos)
6327 {
6328         struct inode *inode = file_inode(filp);
6329         struct trace_array *tr = inode->i_private;
6330         int cpu = tracing_get_cpu(inode);
6331         char buf[64];
6332         int r = 0;
6333         ssize_t ret;
6334
6335         mutex_lock(&trace_types_lock);
6336
6337         if (cpu == RING_BUFFER_ALL_CPUS) {
6338                 int cpu, buf_size_same;
6339                 unsigned long size;
6340
6341                 size = 0;
6342                 buf_size_same = 1;
6343                 /* check if all cpu sizes are same */
6344                 for_each_tracing_cpu(cpu) {
6345                         /* fill in the size from first enabled cpu */
6346                         if (size == 0)
6347                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6348                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6349                                 buf_size_same = 0;
6350                                 break;
6351                         }
6352                 }
6353
6354                 if (buf_size_same) {
6355                         if (!ring_buffer_expanded)
6356                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6357                                             size >> 10,
6358                                             trace_buf_size >> 10);
6359                         else
6360                                 r = sprintf(buf, "%lu\n", size >> 10);
6361                 } else
6362                         r = sprintf(buf, "X\n");
6363         } else
6364                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6365
6366         mutex_unlock(&trace_types_lock);
6367
6368         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6369         return ret;
6370 }
6371
6372 static ssize_t
6373 tracing_entries_write(struct file *filp, const char __user *ubuf,
6374                       size_t cnt, loff_t *ppos)
6375 {
6376         struct inode *inode = file_inode(filp);
6377         struct trace_array *tr = inode->i_private;
6378         unsigned long val;
6379         int ret;
6380
6381         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6382         if (ret)
6383                 return ret;
6384
6385         /* must have at least 1 entry */
6386         if (!val)
6387                 return -EINVAL;
6388
6389         /* value is in KB */
6390         val <<= 10;
6391         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6392         if (ret < 0)
6393                 return ret;
6394
6395         *ppos += cnt;
6396
6397         return cnt;
6398 }
6399
6400 static ssize_t
6401 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6402                                 size_t cnt, loff_t *ppos)
6403 {
6404         struct trace_array *tr = filp->private_data;
6405         char buf[64];
6406         int r, cpu;
6407         unsigned long size = 0, expanded_size = 0;
6408
6409         mutex_lock(&trace_types_lock);
6410         for_each_tracing_cpu(cpu) {
6411                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6412                 if (!ring_buffer_expanded)
6413                         expanded_size += trace_buf_size >> 10;
6414         }
6415         if (ring_buffer_expanded)
6416                 r = sprintf(buf, "%lu\n", size);
6417         else
6418                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6419         mutex_unlock(&trace_types_lock);
6420
6421         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6422 }
6423
6424 static ssize_t
6425 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6426                           size_t cnt, loff_t *ppos)
6427 {
6428         /*
6429          * There is no need to read what the user has written, this function
6430          * is just to make sure that there is no error when "echo" is used
6431          */
6432
6433         *ppos += cnt;
6434
6435         return cnt;
6436 }
6437
6438 static int
6439 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6440 {
6441         struct trace_array *tr = inode->i_private;
6442
6443         /* disable tracing ? */
6444         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6445                 tracer_tracing_off(tr);
6446         /* resize the ring buffer to 0 */
6447         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6448
6449         trace_array_put(tr);
6450
6451         return 0;
6452 }
6453
6454 static ssize_t
6455 tracing_mark_write(struct file *filp, const char __user *ubuf,
6456                                         size_t cnt, loff_t *fpos)
6457 {
6458         struct trace_array *tr = filp->private_data;
6459         struct ring_buffer_event *event;
6460         enum event_trigger_type tt = ETT_NONE;
6461         struct ring_buffer *buffer;
6462         struct print_entry *entry;
6463         unsigned long irq_flags;
6464         ssize_t written;
6465         int size;
6466         int len;
6467
6468 /* Used in tracing_mark_raw_write() as well */
6469 #define FAULTED_STR "<faulted>"
6470 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6471
6472         if (tracing_disabled)
6473                 return -EINVAL;
6474
6475         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6476                 return -EINVAL;
6477
6478         if (cnt > TRACE_BUF_SIZE)
6479                 cnt = TRACE_BUF_SIZE;
6480
6481         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6482
6483         local_save_flags(irq_flags);
6484         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6485
6486         /* If less than "<faulted>", then make sure we can still add that */
6487         if (cnt < FAULTED_SIZE)
6488                 size += FAULTED_SIZE - cnt;
6489
6490         buffer = tr->trace_buffer.buffer;
6491         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6492                                             irq_flags, preempt_count());
6493         if (unlikely(!event))
6494                 /* Ring buffer disabled, return as if not open for write */
6495                 return -EBADF;
6496
6497         entry = ring_buffer_event_data(event);
6498         entry->ip = _THIS_IP_;
6499
6500         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6501         if (len) {
6502                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6503                 cnt = FAULTED_SIZE;
6504                 written = -EFAULT;
6505         } else
6506                 written = cnt;
6507         len = cnt;
6508
6509         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6510                 /* do not add \n before testing triggers, but add \0 */
6511                 entry->buf[cnt] = '\0';
6512                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6513         }
6514
6515         if (entry->buf[cnt - 1] != '\n') {
6516                 entry->buf[cnt] = '\n';
6517                 entry->buf[cnt + 1] = '\0';
6518         } else
6519                 entry->buf[cnt] = '\0';
6520
6521         __buffer_unlock_commit(buffer, event);
6522
6523         if (tt)
6524                 event_triggers_post_call(tr->trace_marker_file, tt);
6525
6526         if (written > 0)
6527                 *fpos += written;
6528
6529         return written;
6530 }
6531
6532 /* Limit it for now to 3K (including tag) */
6533 #define RAW_DATA_MAX_SIZE (1024*3)
6534
6535 static ssize_t
6536 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6537                                         size_t cnt, loff_t *fpos)
6538 {
6539         struct trace_array *tr = filp->private_data;
6540         struct ring_buffer_event *event;
6541         struct ring_buffer *buffer;
6542         struct raw_data_entry *entry;
6543         unsigned long irq_flags;
6544         ssize_t written;
6545         int size;
6546         int len;
6547
6548 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6549
6550         if (tracing_disabled)
6551                 return -EINVAL;
6552
6553         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6554                 return -EINVAL;
6555
6556         /* The marker must at least have a tag id */
6557         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6558                 return -EINVAL;
6559
6560         if (cnt > TRACE_BUF_SIZE)
6561                 cnt = TRACE_BUF_SIZE;
6562
6563         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6564
6565         local_save_flags(irq_flags);
6566         size = sizeof(*entry) + cnt;
6567         if (cnt < FAULT_SIZE_ID)
6568                 size += FAULT_SIZE_ID - cnt;
6569
6570         buffer = tr->trace_buffer.buffer;
6571         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6572                                             irq_flags, preempt_count());
6573         if (!event)
6574                 /* Ring buffer disabled, return as if not open for write */
6575                 return -EBADF;
6576
6577         entry = ring_buffer_event_data(event);
6578
6579         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6580         if (len) {
6581                 entry->id = -1;
6582                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6583                 written = -EFAULT;
6584         } else
6585                 written = cnt;
6586
6587         __buffer_unlock_commit(buffer, event);
6588
6589         if (written > 0)
6590                 *fpos += written;
6591
6592         return written;
6593 }
6594
6595 static int tracing_clock_show(struct seq_file *m, void *v)
6596 {
6597         struct trace_array *tr = m->private;
6598         int i;
6599
6600         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6601                 seq_printf(m,
6602                         "%s%s%s%s", i ? " " : "",
6603                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6604                         i == tr->clock_id ? "]" : "");
6605         seq_putc(m, '\n');
6606
6607         return 0;
6608 }
6609
6610 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6611 {
6612         int i;
6613
6614         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6615                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6616                         break;
6617         }
6618         if (i == ARRAY_SIZE(trace_clocks))
6619                 return -EINVAL;
6620
6621         mutex_lock(&trace_types_lock);
6622
6623         tr->clock_id = i;
6624
6625         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6626
6627         /*
6628          * New clock may not be consistent with the previous clock.
6629          * Reset the buffer so that it doesn't have incomparable timestamps.
6630          */
6631         tracing_reset_online_cpus(&tr->trace_buffer);
6632
6633 #ifdef CONFIG_TRACER_MAX_TRACE
6634         if (tr->max_buffer.buffer)
6635                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6636         tracing_reset_online_cpus(&tr->max_buffer);
6637 #endif
6638
6639         mutex_unlock(&trace_types_lock);
6640
6641         return 0;
6642 }
6643
6644 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6645                                    size_t cnt, loff_t *fpos)
6646 {
6647         struct seq_file *m = filp->private_data;
6648         struct trace_array *tr = m->private;
6649         char buf[64];
6650         const char *clockstr;
6651         int ret;
6652
6653         if (cnt >= sizeof(buf))
6654                 return -EINVAL;
6655
6656         if (copy_from_user(buf, ubuf, cnt))
6657                 return -EFAULT;
6658
6659         buf[cnt] = 0;
6660
6661         clockstr = strstrip(buf);
6662
6663         ret = tracing_set_clock(tr, clockstr);
6664         if (ret)
6665                 return ret;
6666
6667         *fpos += cnt;
6668
6669         return cnt;
6670 }
6671
6672 static int tracing_clock_open(struct inode *inode, struct file *file)
6673 {
6674         struct trace_array *tr = inode->i_private;
6675         int ret;
6676
6677         ret = tracing_check_open_get_tr(tr);
6678         if (ret)
6679                 return ret;
6680
6681         ret = single_open(file, tracing_clock_show, inode->i_private);
6682         if (ret < 0)
6683                 trace_array_put(tr);
6684
6685         return ret;
6686 }
6687
6688 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6689 {
6690         struct trace_array *tr = m->private;
6691
6692         mutex_lock(&trace_types_lock);
6693
6694         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6695                 seq_puts(m, "delta [absolute]\n");
6696         else
6697                 seq_puts(m, "[delta] absolute\n");
6698
6699         mutex_unlock(&trace_types_lock);
6700
6701         return 0;
6702 }
6703
6704 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6705 {
6706         struct trace_array *tr = inode->i_private;
6707         int ret;
6708
6709         ret = tracing_check_open_get_tr(tr);
6710         if (ret)
6711                 return ret;
6712
6713         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6714         if (ret < 0)
6715                 trace_array_put(tr);
6716
6717         return ret;
6718 }
6719
6720 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6721 {
6722         int ret = 0;
6723
6724         mutex_lock(&trace_types_lock);
6725
6726         if (abs && tr->time_stamp_abs_ref++)
6727                 goto out;
6728
6729         if (!abs) {
6730                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6731                         ret = -EINVAL;
6732                         goto out;
6733                 }
6734
6735                 if (--tr->time_stamp_abs_ref)
6736                         goto out;
6737         }
6738
6739         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6740
6741 #ifdef CONFIG_TRACER_MAX_TRACE
6742         if (tr->max_buffer.buffer)
6743                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6744 #endif
6745  out:
6746         mutex_unlock(&trace_types_lock);
6747
6748         return ret;
6749 }
6750
6751 struct ftrace_buffer_info {
6752         struct trace_iterator   iter;
6753         void                    *spare;
6754         unsigned int            spare_cpu;
6755         unsigned int            read;
6756 };
6757
6758 #ifdef CONFIG_TRACER_SNAPSHOT
6759 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6760 {
6761         struct trace_array *tr = inode->i_private;
6762         struct trace_iterator *iter;
6763         struct seq_file *m;
6764         int ret;
6765
6766         ret = tracing_check_open_get_tr(tr);
6767         if (ret)
6768                 return ret;
6769
6770         if (file->f_mode & FMODE_READ) {
6771                 iter = __tracing_open(inode, file, true);
6772                 if (IS_ERR(iter))
6773                         ret = PTR_ERR(iter);
6774         } else {
6775                 /* Writes still need the seq_file to hold the private data */
6776                 ret = -ENOMEM;
6777                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6778                 if (!m)
6779                         goto out;
6780                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6781                 if (!iter) {
6782                         kfree(m);
6783                         goto out;
6784                 }
6785                 ret = 0;
6786
6787                 iter->tr = tr;
6788                 iter->trace_buffer = &tr->max_buffer;
6789                 iter->cpu_file = tracing_get_cpu(inode);
6790                 m->private = iter;
6791                 file->private_data = m;
6792         }
6793 out:
6794         if (ret < 0)
6795                 trace_array_put(tr);
6796
6797         return ret;
6798 }
6799
6800 static ssize_t
6801 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6802                        loff_t *ppos)
6803 {
6804         struct seq_file *m = filp->private_data;
6805         struct trace_iterator *iter = m->private;
6806         struct trace_array *tr = iter->tr;
6807         unsigned long val;
6808         int ret;
6809
6810         ret = tracing_update_buffers();
6811         if (ret < 0)
6812                 return ret;
6813
6814         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6815         if (ret)
6816                 return ret;
6817
6818         mutex_lock(&trace_types_lock);
6819
6820         if (tr->current_trace->use_max_tr) {
6821                 ret = -EBUSY;
6822                 goto out;
6823         }
6824
6825         arch_spin_lock(&tr->max_lock);
6826         if (tr->cond_snapshot)
6827                 ret = -EBUSY;
6828         arch_spin_unlock(&tr->max_lock);
6829         if (ret)
6830                 goto out;
6831
6832         switch (val) {
6833         case 0:
6834                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6835                         ret = -EINVAL;
6836                         break;
6837                 }
6838                 if (tr->allocated_snapshot)
6839                         free_snapshot(tr);
6840                 break;
6841         case 1:
6842 /* Only allow per-cpu swap if the ring buffer supports it */
6843 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6844                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6845                         ret = -EINVAL;
6846                         break;
6847                 }
6848 #endif
6849                 if (tr->allocated_snapshot)
6850                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6851                                         &tr->trace_buffer, iter->cpu_file);
6852                 else
6853                         ret = tracing_alloc_snapshot_instance(tr);
6854                 if (ret < 0)
6855                         break;
6856                 local_irq_disable();
6857                 /* Now, we're going to swap */
6858                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6859                         update_max_tr(tr, current, smp_processor_id(), NULL);
6860                 else
6861                         update_max_tr_single(tr, current, iter->cpu_file);
6862                 local_irq_enable();
6863                 break;
6864         default:
6865                 if (tr->allocated_snapshot) {
6866                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6867                                 tracing_reset_online_cpus(&tr->max_buffer);
6868                         else
6869                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6870                 }
6871                 break;
6872         }
6873
6874         if (ret >= 0) {
6875                 *ppos += cnt;
6876                 ret = cnt;
6877         }
6878 out:
6879         mutex_unlock(&trace_types_lock);
6880         return ret;
6881 }
6882
6883 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6884 {
6885         struct seq_file *m = file->private_data;
6886         int ret;
6887
6888         ret = tracing_release(inode, file);
6889
6890         if (file->f_mode & FMODE_READ)
6891                 return ret;
6892
6893         /* If write only, the seq_file is just a stub */
6894         if (m)
6895                 kfree(m->private);
6896         kfree(m);
6897
6898         return 0;
6899 }
6900
6901 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6902 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6903                                     size_t count, loff_t *ppos);
6904 static int tracing_buffers_release(struct inode *inode, struct file *file);
6905 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6906                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6907
6908 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6909 {
6910         struct ftrace_buffer_info *info;
6911         int ret;
6912
6913         /* The following checks for tracefs lockdown */
6914         ret = tracing_buffers_open(inode, filp);
6915         if (ret < 0)
6916                 return ret;
6917
6918         info = filp->private_data;
6919
6920         if (info->iter.trace->use_max_tr) {
6921                 tracing_buffers_release(inode, filp);
6922                 return -EBUSY;
6923         }
6924
6925         info->iter.snapshot = true;
6926         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6927
6928         return ret;
6929 }
6930
6931 #endif /* CONFIG_TRACER_SNAPSHOT */
6932
6933
6934 static const struct file_operations tracing_thresh_fops = {
6935         .open           = tracing_open_generic,
6936         .read           = tracing_thresh_read,
6937         .write          = tracing_thresh_write,
6938         .llseek         = generic_file_llseek,
6939 };
6940
6941 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6942 static const struct file_operations tracing_max_lat_fops = {
6943         .open           = tracing_open_generic,
6944         .read           = tracing_max_lat_read,
6945         .write          = tracing_max_lat_write,
6946         .llseek         = generic_file_llseek,
6947 };
6948 #endif
6949
6950 static const struct file_operations set_tracer_fops = {
6951         .open           = tracing_open_generic,
6952         .read           = tracing_set_trace_read,
6953         .write          = tracing_set_trace_write,
6954         .llseek         = generic_file_llseek,
6955 };
6956
6957 static const struct file_operations tracing_pipe_fops = {
6958         .open           = tracing_open_pipe,
6959         .poll           = tracing_poll_pipe,
6960         .read           = tracing_read_pipe,
6961         .splice_read    = tracing_splice_read_pipe,
6962         .release        = tracing_release_pipe,
6963         .llseek         = no_llseek,
6964 };
6965
6966 static const struct file_operations tracing_entries_fops = {
6967         .open           = tracing_open_generic_tr,
6968         .read           = tracing_entries_read,
6969         .write          = tracing_entries_write,
6970         .llseek         = generic_file_llseek,
6971         .release        = tracing_release_generic_tr,
6972 };
6973
6974 static const struct file_operations tracing_total_entries_fops = {
6975         .open           = tracing_open_generic_tr,
6976         .read           = tracing_total_entries_read,
6977         .llseek         = generic_file_llseek,
6978         .release        = tracing_release_generic_tr,
6979 };
6980
6981 static const struct file_operations tracing_free_buffer_fops = {
6982         .open           = tracing_open_generic_tr,
6983         .write          = tracing_free_buffer_write,
6984         .release        = tracing_free_buffer_release,
6985 };
6986
6987 static const struct file_operations tracing_mark_fops = {
6988         .open           = tracing_open_generic_tr,
6989         .write          = tracing_mark_write,
6990         .llseek         = generic_file_llseek,
6991         .release        = tracing_release_generic_tr,
6992 };
6993
6994 static const struct file_operations tracing_mark_raw_fops = {
6995         .open           = tracing_open_generic_tr,
6996         .write          = tracing_mark_raw_write,
6997         .llseek         = generic_file_llseek,
6998         .release        = tracing_release_generic_tr,
6999 };
7000
7001 static const struct file_operations trace_clock_fops = {
7002         .open           = tracing_clock_open,
7003         .read           = seq_read,
7004         .llseek         = seq_lseek,
7005         .release        = tracing_single_release_tr,
7006         .write          = tracing_clock_write,
7007 };
7008
7009 static const struct file_operations trace_time_stamp_mode_fops = {
7010         .open           = tracing_time_stamp_mode_open,
7011         .read           = seq_read,
7012         .llseek         = seq_lseek,
7013         .release        = tracing_single_release_tr,
7014 };
7015
7016 #ifdef CONFIG_TRACER_SNAPSHOT
7017 static const struct file_operations snapshot_fops = {
7018         .open           = tracing_snapshot_open,
7019         .read           = seq_read,
7020         .write          = tracing_snapshot_write,
7021         .llseek         = tracing_lseek,
7022         .release        = tracing_snapshot_release,
7023 };
7024
7025 static const struct file_operations snapshot_raw_fops = {
7026         .open           = snapshot_raw_open,
7027         .read           = tracing_buffers_read,
7028         .release        = tracing_buffers_release,
7029         .splice_read    = tracing_buffers_splice_read,
7030         .llseek         = no_llseek,
7031 };
7032
7033 #endif /* CONFIG_TRACER_SNAPSHOT */
7034
7035 #define TRACING_LOG_ERRS_MAX    8
7036 #define TRACING_LOG_LOC_MAX     128
7037
7038 #define CMD_PREFIX "  Command: "
7039
7040 struct err_info {
7041         const char      **errs; /* ptr to loc-specific array of err strings */
7042         u8              type;   /* index into errs -> specific err string */
7043         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7044         u64             ts;
7045 };
7046
7047 struct tracing_log_err {
7048         struct list_head        list;
7049         struct err_info         info;
7050         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7051         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7052 };
7053
7054 static DEFINE_MUTEX(tracing_err_log_lock);
7055
7056 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7057 {
7058         struct tracing_log_err *err;
7059
7060         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7061                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7062                 if (!err)
7063                         err = ERR_PTR(-ENOMEM);
7064                 tr->n_err_log_entries++;
7065
7066                 return err;
7067         }
7068
7069         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7070         list_del(&err->list);
7071
7072         return err;
7073 }
7074
7075 /**
7076  * err_pos - find the position of a string within a command for error careting
7077  * @cmd: The tracing command that caused the error
7078  * @str: The string to position the caret at within @cmd
7079  *
7080  * Finds the position of the first occurence of @str within @cmd.  The
7081  * return value can be passed to tracing_log_err() for caret placement
7082  * within @cmd.
7083  *
7084  * Returns the index within @cmd of the first occurence of @str or 0
7085  * if @str was not found.
7086  */
7087 unsigned int err_pos(char *cmd, const char *str)
7088 {
7089         char *found;
7090
7091         if (WARN_ON(!strlen(cmd)))
7092                 return 0;
7093
7094         found = strstr(cmd, str);
7095         if (found)
7096                 return found - cmd;
7097
7098         return 0;
7099 }
7100
7101 /**
7102  * tracing_log_err - write an error to the tracing error log
7103  * @tr: The associated trace array for the error (NULL for top level array)
7104  * @loc: A string describing where the error occurred
7105  * @cmd: The tracing command that caused the error
7106  * @errs: The array of loc-specific static error strings
7107  * @type: The index into errs[], which produces the specific static err string
7108  * @pos: The position the caret should be placed in the cmd
7109  *
7110  * Writes an error into tracing/error_log of the form:
7111  *
7112  * <loc>: error: <text>
7113  *   Command: <cmd>
7114  *              ^
7115  *
7116  * tracing/error_log is a small log file containing the last
7117  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7118  * unless there has been a tracing error, and the error log can be
7119  * cleared and have its memory freed by writing the empty string in
7120  * truncation mode to it i.e. echo > tracing/error_log.
7121  *
7122  * NOTE: the @errs array along with the @type param are used to
7123  * produce a static error string - this string is not copied and saved
7124  * when the error is logged - only a pointer to it is saved.  See
7125  * existing callers for examples of how static strings are typically
7126  * defined for use with tracing_log_err().
7127  */
7128 void tracing_log_err(struct trace_array *tr,
7129                      const char *loc, const char *cmd,
7130                      const char **errs, u8 type, u8 pos)
7131 {
7132         struct tracing_log_err *err;
7133
7134         if (!tr)
7135                 tr = &global_trace;
7136
7137         mutex_lock(&tracing_err_log_lock);
7138         err = get_tracing_log_err(tr);
7139         if (PTR_ERR(err) == -ENOMEM) {
7140                 mutex_unlock(&tracing_err_log_lock);
7141                 return;
7142         }
7143
7144         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7145         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7146
7147         err->info.errs = errs;
7148         err->info.type = type;
7149         err->info.pos = pos;
7150         err->info.ts = local_clock();
7151
7152         list_add_tail(&err->list, &tr->err_log);
7153         mutex_unlock(&tracing_err_log_lock);
7154 }
7155
7156 static void clear_tracing_err_log(struct trace_array *tr)
7157 {
7158         struct tracing_log_err *err, *next;
7159
7160         mutex_lock(&tracing_err_log_lock);
7161         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7162                 list_del(&err->list);
7163                 kfree(err);
7164         }
7165
7166         tr->n_err_log_entries = 0;
7167         mutex_unlock(&tracing_err_log_lock);
7168 }
7169
7170 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7171 {
7172         struct trace_array *tr = m->private;
7173
7174         mutex_lock(&tracing_err_log_lock);
7175
7176         return seq_list_start(&tr->err_log, *pos);
7177 }
7178
7179 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7180 {
7181         struct trace_array *tr = m->private;
7182
7183         return seq_list_next(v, &tr->err_log, pos);
7184 }
7185
7186 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7187 {
7188         mutex_unlock(&tracing_err_log_lock);
7189 }
7190
7191 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7192 {
7193         u8 i;
7194
7195         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7196                 seq_putc(m, ' ');
7197         for (i = 0; i < pos; i++)
7198                 seq_putc(m, ' ');
7199         seq_puts(m, "^\n");
7200 }
7201
7202 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7203 {
7204         struct tracing_log_err *err = v;
7205
7206         if (err) {
7207                 const char *err_text = err->info.errs[err->info.type];
7208                 u64 sec = err->info.ts;
7209                 u32 nsec;
7210
7211                 nsec = do_div(sec, NSEC_PER_SEC);
7212                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7213                            err->loc, err_text);
7214                 seq_printf(m, "%s", err->cmd);
7215                 tracing_err_log_show_pos(m, err->info.pos);
7216         }
7217
7218         return 0;
7219 }
7220
7221 static const struct seq_operations tracing_err_log_seq_ops = {
7222         .start  = tracing_err_log_seq_start,
7223         .next   = tracing_err_log_seq_next,
7224         .stop   = tracing_err_log_seq_stop,
7225         .show   = tracing_err_log_seq_show
7226 };
7227
7228 static int tracing_err_log_open(struct inode *inode, struct file *file)
7229 {
7230         struct trace_array *tr = inode->i_private;
7231         int ret = 0;
7232
7233         ret = tracing_check_open_get_tr(tr);
7234         if (ret)
7235                 return ret;
7236
7237         /* If this file was opened for write, then erase contents */
7238         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7239                 clear_tracing_err_log(tr);
7240
7241         if (file->f_mode & FMODE_READ) {
7242                 ret = seq_open(file, &tracing_err_log_seq_ops);
7243                 if (!ret) {
7244                         struct seq_file *m = file->private_data;
7245                         m->private = tr;
7246                 } else {
7247                         trace_array_put(tr);
7248                 }
7249         }
7250         return ret;
7251 }
7252
7253 static ssize_t tracing_err_log_write(struct file *file,
7254                                      const char __user *buffer,
7255                                      size_t count, loff_t *ppos)
7256 {
7257         return count;
7258 }
7259
7260 static int tracing_err_log_release(struct inode *inode, struct file *file)
7261 {
7262         struct trace_array *tr = inode->i_private;
7263
7264         trace_array_put(tr);
7265
7266         if (file->f_mode & FMODE_READ)
7267                 seq_release(inode, file);
7268
7269         return 0;
7270 }
7271
7272 static const struct file_operations tracing_err_log_fops = {
7273         .open           = tracing_err_log_open,
7274         .write          = tracing_err_log_write,
7275         .read           = seq_read,
7276         .llseek         = seq_lseek,
7277         .release        = tracing_err_log_release,
7278 };
7279
7280 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7281 {
7282         struct trace_array *tr = inode->i_private;
7283         struct ftrace_buffer_info *info;
7284         int ret;
7285
7286         ret = tracing_check_open_get_tr(tr);
7287         if (ret)
7288                 return ret;
7289
7290         info = kzalloc(sizeof(*info), GFP_KERNEL);
7291         if (!info) {
7292                 trace_array_put(tr);
7293                 return -ENOMEM;
7294         }
7295
7296         mutex_lock(&trace_types_lock);
7297
7298         info->iter.tr           = tr;
7299         info->iter.cpu_file     = tracing_get_cpu(inode);
7300         info->iter.trace        = tr->current_trace;
7301         info->iter.trace_buffer = &tr->trace_buffer;
7302         info->spare             = NULL;
7303         /* Force reading ring buffer for first read */
7304         info->read              = (unsigned int)-1;
7305
7306         filp->private_data = info;
7307
7308         tr->current_trace->ref++;
7309
7310         mutex_unlock(&trace_types_lock);
7311
7312         ret = nonseekable_open(inode, filp);
7313         if (ret < 0)
7314                 trace_array_put(tr);
7315
7316         return ret;
7317 }
7318
7319 static __poll_t
7320 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7321 {
7322         struct ftrace_buffer_info *info = filp->private_data;
7323         struct trace_iterator *iter = &info->iter;
7324
7325         return trace_poll(iter, filp, poll_table);
7326 }
7327
7328 static ssize_t
7329 tracing_buffers_read(struct file *filp, char __user *ubuf,
7330                      size_t count, loff_t *ppos)
7331 {
7332         struct ftrace_buffer_info *info = filp->private_data;
7333         struct trace_iterator *iter = &info->iter;
7334         ssize_t ret = 0;
7335         ssize_t size;
7336
7337         if (!count)
7338                 return 0;
7339
7340 #ifdef CONFIG_TRACER_MAX_TRACE
7341         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7342                 return -EBUSY;
7343 #endif
7344
7345         if (!info->spare) {
7346                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7347                                                           iter->cpu_file);
7348                 if (IS_ERR(info->spare)) {
7349                         ret = PTR_ERR(info->spare);
7350                         info->spare = NULL;
7351                 } else {
7352                         info->spare_cpu = iter->cpu_file;
7353                 }
7354         }
7355         if (!info->spare)
7356                 return ret;
7357
7358         /* Do we have previous read data to read? */
7359         if (info->read < PAGE_SIZE)
7360                 goto read;
7361
7362  again:
7363         trace_access_lock(iter->cpu_file);
7364         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7365                                     &info->spare,
7366                                     count,
7367                                     iter->cpu_file, 0);
7368         trace_access_unlock(iter->cpu_file);
7369
7370         if (ret < 0) {
7371                 if (trace_empty(iter)) {
7372                         if ((filp->f_flags & O_NONBLOCK))
7373                                 return -EAGAIN;
7374
7375                         ret = wait_on_pipe(iter, 0);
7376                         if (ret)
7377                                 return ret;
7378
7379                         goto again;
7380                 }
7381                 return 0;
7382         }
7383
7384         info->read = 0;
7385  read:
7386         size = PAGE_SIZE - info->read;
7387         if (size > count)
7388                 size = count;
7389
7390         ret = copy_to_user(ubuf, info->spare + info->read, size);
7391         if (ret == size)
7392                 return -EFAULT;
7393
7394         size -= ret;
7395
7396         *ppos += size;
7397         info->read += size;
7398
7399         return size;
7400 }
7401
7402 static int tracing_buffers_release(struct inode *inode, struct file *file)
7403 {
7404         struct ftrace_buffer_info *info = file->private_data;
7405         struct trace_iterator *iter = &info->iter;
7406
7407         mutex_lock(&trace_types_lock);
7408
7409         iter->tr->current_trace->ref--;
7410
7411         __trace_array_put(iter->tr);
7412
7413         if (info->spare)
7414                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7415                                            info->spare_cpu, info->spare);
7416         kfree(info);
7417
7418         mutex_unlock(&trace_types_lock);
7419
7420         return 0;
7421 }
7422
7423 struct buffer_ref {
7424         struct ring_buffer      *buffer;
7425         void                    *page;
7426         int                     cpu;
7427         refcount_t              refcount;
7428 };
7429
7430 static void buffer_ref_release(struct buffer_ref *ref)
7431 {
7432         if (!refcount_dec_and_test(&ref->refcount))
7433                 return;
7434         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7435         kfree(ref);
7436 }
7437
7438 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7439                                     struct pipe_buffer *buf)
7440 {
7441         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7442
7443         buffer_ref_release(ref);
7444         buf->private = 0;
7445 }
7446
7447 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7448                                 struct pipe_buffer *buf)
7449 {
7450         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7451
7452         if (refcount_read(&ref->refcount) > INT_MAX/2)
7453                 return false;
7454
7455         refcount_inc(&ref->refcount);
7456         return true;
7457 }
7458
7459 /* Pipe buffer operations for a buffer. */
7460 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7461         .confirm                = generic_pipe_buf_confirm,
7462         .release                = buffer_pipe_buf_release,
7463         .steal                  = generic_pipe_buf_nosteal,
7464         .get                    = buffer_pipe_buf_get,
7465 };
7466
7467 /*
7468  * Callback from splice_to_pipe(), if we need to release some pages
7469  * at the end of the spd in case we error'ed out in filling the pipe.
7470  */
7471 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7472 {
7473         struct buffer_ref *ref =
7474                 (struct buffer_ref *)spd->partial[i].private;
7475
7476         buffer_ref_release(ref);
7477         spd->partial[i].private = 0;
7478 }
7479
7480 static ssize_t
7481 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7482                             struct pipe_inode_info *pipe, size_t len,
7483                             unsigned int flags)
7484 {
7485         struct ftrace_buffer_info *info = file->private_data;
7486         struct trace_iterator *iter = &info->iter;
7487         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7488         struct page *pages_def[PIPE_DEF_BUFFERS];
7489         struct splice_pipe_desc spd = {
7490                 .pages          = pages_def,
7491                 .partial        = partial_def,
7492                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7493                 .ops            = &buffer_pipe_buf_ops,
7494                 .spd_release    = buffer_spd_release,
7495         };
7496         struct buffer_ref *ref;
7497         int entries, i;
7498         ssize_t ret = 0;
7499
7500 #ifdef CONFIG_TRACER_MAX_TRACE
7501         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7502                 return -EBUSY;
7503 #endif
7504
7505         if (*ppos & (PAGE_SIZE - 1))
7506                 return -EINVAL;
7507
7508         if (len & (PAGE_SIZE - 1)) {
7509                 if (len < PAGE_SIZE)
7510                         return -EINVAL;
7511                 len &= PAGE_MASK;
7512         }
7513
7514         if (splice_grow_spd(pipe, &spd))
7515                 return -ENOMEM;
7516
7517  again:
7518         trace_access_lock(iter->cpu_file);
7519         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7520
7521         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7522                 struct page *page;
7523                 int r;
7524
7525                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7526                 if (!ref) {
7527                         ret = -ENOMEM;
7528                         break;
7529                 }
7530
7531                 refcount_set(&ref->refcount, 1);
7532                 ref->buffer = iter->trace_buffer->buffer;
7533                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7534                 if (IS_ERR(ref->page)) {
7535                         ret = PTR_ERR(ref->page);
7536                         ref->page = NULL;
7537                         kfree(ref);
7538                         break;
7539                 }
7540                 ref->cpu = iter->cpu_file;
7541
7542                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7543                                           len, iter->cpu_file, 1);
7544                 if (r < 0) {
7545                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7546                                                    ref->page);
7547                         kfree(ref);
7548                         break;
7549                 }
7550
7551                 page = virt_to_page(ref->page);
7552
7553                 spd.pages[i] = page;
7554                 spd.partial[i].len = PAGE_SIZE;
7555                 spd.partial[i].offset = 0;
7556                 spd.partial[i].private = (unsigned long)ref;
7557                 spd.nr_pages++;
7558                 *ppos += PAGE_SIZE;
7559
7560                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7561         }
7562
7563         trace_access_unlock(iter->cpu_file);
7564         spd.nr_pages = i;
7565
7566         /* did we read anything? */
7567         if (!spd.nr_pages) {
7568                 if (ret)
7569                         goto out;
7570
7571                 ret = -EAGAIN;
7572                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7573                         goto out;
7574
7575                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7576                 if (ret)
7577                         goto out;
7578
7579                 goto again;
7580         }
7581
7582         ret = splice_to_pipe(pipe, &spd);
7583 out:
7584         splice_shrink_spd(&spd);
7585
7586         return ret;
7587 }
7588
7589 static const struct file_operations tracing_buffers_fops = {
7590         .open           = tracing_buffers_open,
7591         .read           = tracing_buffers_read,
7592         .poll           = tracing_buffers_poll,
7593         .release        = tracing_buffers_release,
7594         .splice_read    = tracing_buffers_splice_read,
7595         .llseek         = no_llseek,
7596 };
7597
7598 static ssize_t
7599 tracing_stats_read(struct file *filp, char __user *ubuf,
7600                    size_t count, loff_t *ppos)
7601 {
7602         struct inode *inode = file_inode(filp);
7603         struct trace_array *tr = inode->i_private;
7604         struct trace_buffer *trace_buf = &tr->trace_buffer;
7605         int cpu = tracing_get_cpu(inode);
7606         struct trace_seq *s;
7607         unsigned long cnt;
7608         unsigned long long t;
7609         unsigned long usec_rem;
7610
7611         s = kmalloc(sizeof(*s), GFP_KERNEL);
7612         if (!s)
7613                 return -ENOMEM;
7614
7615         trace_seq_init(s);
7616
7617         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7618         trace_seq_printf(s, "entries: %ld\n", cnt);
7619
7620         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7621         trace_seq_printf(s, "overrun: %ld\n", cnt);
7622
7623         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7624         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7625
7626         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7627         trace_seq_printf(s, "bytes: %ld\n", cnt);
7628
7629         if (trace_clocks[tr->clock_id].in_ns) {
7630                 /* local or global for trace_clock */
7631                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7632                 usec_rem = do_div(t, USEC_PER_SEC);
7633                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7634                                                                 t, usec_rem);
7635
7636                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7637                 usec_rem = do_div(t, USEC_PER_SEC);
7638                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7639         } else {
7640                 /* counter or tsc mode for trace_clock */
7641                 trace_seq_printf(s, "oldest event ts: %llu\n",
7642                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7643
7644                 trace_seq_printf(s, "now ts: %llu\n",
7645                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7646         }
7647
7648         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7649         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7650
7651         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7652         trace_seq_printf(s, "read events: %ld\n", cnt);
7653
7654         count = simple_read_from_buffer(ubuf, count, ppos,
7655                                         s->buffer, trace_seq_used(s));
7656
7657         kfree(s);
7658
7659         return count;
7660 }
7661
7662 static const struct file_operations tracing_stats_fops = {
7663         .open           = tracing_open_generic_tr,
7664         .read           = tracing_stats_read,
7665         .llseek         = generic_file_llseek,
7666         .release        = tracing_release_generic_tr,
7667 };
7668
7669 #ifdef CONFIG_DYNAMIC_FTRACE
7670
7671 static ssize_t
7672 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7673                   size_t cnt, loff_t *ppos)
7674 {
7675         ssize_t ret;
7676         char *buf;
7677         int r;
7678
7679         /* 256 should be plenty to hold the amount needed */
7680         buf = kmalloc(256, GFP_KERNEL);
7681         if (!buf)
7682                 return -ENOMEM;
7683
7684         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7685                       ftrace_update_tot_cnt,
7686                       ftrace_number_of_pages,
7687                       ftrace_number_of_groups);
7688
7689         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7690         kfree(buf);
7691         return ret;
7692 }
7693
7694 static const struct file_operations tracing_dyn_info_fops = {
7695         .open           = tracing_open_generic,
7696         .read           = tracing_read_dyn_info,
7697         .llseek         = generic_file_llseek,
7698 };
7699 #endif /* CONFIG_DYNAMIC_FTRACE */
7700
7701 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7702 static void
7703 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7704                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7705                 void *data)
7706 {
7707         tracing_snapshot_instance(tr);
7708 }
7709
7710 static void
7711 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7712                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7713                       void *data)
7714 {
7715         struct ftrace_func_mapper *mapper = data;
7716         long *count = NULL;
7717
7718         if (mapper)
7719                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7720
7721         if (count) {
7722
7723                 if (*count <= 0)
7724                         return;
7725
7726                 (*count)--;
7727         }
7728
7729         tracing_snapshot_instance(tr);
7730 }
7731
7732 static int
7733 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7734                       struct ftrace_probe_ops *ops, void *data)
7735 {
7736         struct ftrace_func_mapper *mapper = data;
7737         long *count = NULL;
7738
7739         seq_printf(m, "%ps:", (void *)ip);
7740
7741         seq_puts(m, "snapshot");
7742
7743         if (mapper)
7744                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7745
7746         if (count)
7747                 seq_printf(m, ":count=%ld\n", *count);
7748         else
7749                 seq_puts(m, ":unlimited\n");
7750
7751         return 0;
7752 }
7753
7754 static int
7755 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7756                      unsigned long ip, void *init_data, void **data)
7757 {
7758         struct ftrace_func_mapper *mapper = *data;
7759
7760         if (!mapper) {
7761                 mapper = allocate_ftrace_func_mapper();
7762                 if (!mapper)
7763                         return -ENOMEM;
7764                 *data = mapper;
7765         }
7766
7767         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7768 }
7769
7770 static void
7771 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7772                      unsigned long ip, void *data)
7773 {
7774         struct ftrace_func_mapper *mapper = data;
7775
7776         if (!ip) {
7777                 if (!mapper)
7778                         return;
7779                 free_ftrace_func_mapper(mapper, NULL);
7780                 return;
7781         }
7782
7783         ftrace_func_mapper_remove_ip(mapper, ip);
7784 }
7785
7786 static struct ftrace_probe_ops snapshot_probe_ops = {
7787         .func                   = ftrace_snapshot,
7788         .print                  = ftrace_snapshot_print,
7789 };
7790
7791 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7792         .func                   = ftrace_count_snapshot,
7793         .print                  = ftrace_snapshot_print,
7794         .init                   = ftrace_snapshot_init,
7795         .free                   = ftrace_snapshot_free,
7796 };
7797
7798 static int
7799 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7800                                char *glob, char *cmd, char *param, int enable)
7801 {
7802         struct ftrace_probe_ops *ops;
7803         void *count = (void *)-1;
7804         char *number;
7805         int ret;
7806
7807         if (!tr)
7808                 return -ENODEV;
7809
7810         /* hash funcs only work with set_ftrace_filter */
7811         if (!enable)
7812                 return -EINVAL;
7813
7814         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7815
7816         if (glob[0] == '!')
7817                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7818
7819         if (!param)
7820                 goto out_reg;
7821
7822         number = strsep(&param, ":");
7823
7824         if (!strlen(number))
7825                 goto out_reg;
7826
7827         /*
7828          * We use the callback data field (which is a pointer)
7829          * as our counter.
7830          */
7831         ret = kstrtoul(number, 0, (unsigned long *)&count);
7832         if (ret)
7833                 return ret;
7834
7835  out_reg:
7836         ret = tracing_alloc_snapshot_instance(tr);
7837         if (ret < 0)
7838                 goto out;
7839
7840         ret = register_ftrace_function_probe(glob, tr, ops, count);
7841
7842  out:
7843         return ret < 0 ? ret : 0;
7844 }
7845
7846 static struct ftrace_func_command ftrace_snapshot_cmd = {
7847         .name                   = "snapshot",
7848         .func                   = ftrace_trace_snapshot_callback,
7849 };
7850
7851 static __init int register_snapshot_cmd(void)
7852 {
7853         return register_ftrace_command(&ftrace_snapshot_cmd);
7854 }
7855 #else
7856 static inline __init int register_snapshot_cmd(void) { return 0; }
7857 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7858
7859 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7860 {
7861         if (WARN_ON(!tr->dir))
7862                 return ERR_PTR(-ENODEV);
7863
7864         /* Top directory uses NULL as the parent */
7865         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7866                 return NULL;
7867
7868         /* All sub buffers have a descriptor */
7869         return tr->dir;
7870 }
7871
7872 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7873 {
7874         struct dentry *d_tracer;
7875
7876         if (tr->percpu_dir)
7877                 return tr->percpu_dir;
7878
7879         d_tracer = tracing_get_dentry(tr);
7880         if (IS_ERR(d_tracer))
7881                 return NULL;
7882
7883         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7884
7885         WARN_ONCE(!tr->percpu_dir,
7886                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7887
7888         return tr->percpu_dir;
7889 }
7890
7891 static struct dentry *
7892 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7893                       void *data, long cpu, const struct file_operations *fops)
7894 {
7895         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7896
7897         if (ret) /* See tracing_get_cpu() */
7898                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7899         return ret;
7900 }
7901
7902 static void
7903 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7904 {
7905         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7906         struct dentry *d_cpu;
7907         char cpu_dir[30]; /* 30 characters should be more than enough */
7908
7909         if (!d_percpu)
7910                 return;
7911
7912         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7913         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7914         if (!d_cpu) {
7915                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7916                 return;
7917         }
7918
7919         /* per cpu trace_pipe */
7920         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7921                                 tr, cpu, &tracing_pipe_fops);
7922
7923         /* per cpu trace */
7924         trace_create_cpu_file("trace", 0644, d_cpu,
7925                                 tr, cpu, &tracing_fops);
7926
7927         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7928                                 tr, cpu, &tracing_buffers_fops);
7929
7930         trace_create_cpu_file("stats", 0444, d_cpu,
7931                                 tr, cpu, &tracing_stats_fops);
7932
7933         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7934                                 tr, cpu, &tracing_entries_fops);
7935
7936 #ifdef CONFIG_TRACER_SNAPSHOT
7937         trace_create_cpu_file("snapshot", 0644, d_cpu,
7938                                 tr, cpu, &snapshot_fops);
7939
7940         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7941                                 tr, cpu, &snapshot_raw_fops);
7942 #endif
7943 }
7944
7945 #ifdef CONFIG_FTRACE_SELFTEST
7946 /* Let selftest have access to static functions in this file */
7947 #include "trace_selftest.c"
7948 #endif
7949
7950 static ssize_t
7951 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7952                         loff_t *ppos)
7953 {
7954         struct trace_option_dentry *topt = filp->private_data;
7955         char *buf;
7956
7957         if (topt->flags->val & topt->opt->bit)
7958                 buf = "1\n";
7959         else
7960                 buf = "0\n";
7961
7962         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7963 }
7964
7965 static ssize_t
7966 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7967                          loff_t *ppos)
7968 {
7969         struct trace_option_dentry *topt = filp->private_data;
7970         unsigned long val;
7971         int ret;
7972
7973         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7974         if (ret)
7975                 return ret;
7976
7977         if (val != 0 && val != 1)
7978                 return -EINVAL;
7979
7980         if (!!(topt->flags->val & topt->opt->bit) != val) {
7981                 mutex_lock(&trace_types_lock);
7982                 ret = __set_tracer_option(topt->tr, topt->flags,
7983                                           topt->opt, !val);
7984                 mutex_unlock(&trace_types_lock);
7985                 if (ret)
7986                         return ret;
7987         }
7988
7989         *ppos += cnt;
7990
7991         return cnt;
7992 }
7993
7994
7995 static const struct file_operations trace_options_fops = {
7996         .open = tracing_open_generic,
7997         .read = trace_options_read,
7998         .write = trace_options_write,
7999         .llseek = generic_file_llseek,
8000 };
8001
8002 /*
8003  * In order to pass in both the trace_array descriptor as well as the index
8004  * to the flag that the trace option file represents, the trace_array
8005  * has a character array of trace_flags_index[], which holds the index
8006  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8007  * The address of this character array is passed to the flag option file
8008  * read/write callbacks.
8009  *
8010  * In order to extract both the index and the trace_array descriptor,
8011  * get_tr_index() uses the following algorithm.
8012  *
8013  *   idx = *ptr;
8014  *
8015  * As the pointer itself contains the address of the index (remember
8016  * index[1] == 1).
8017  *
8018  * Then to get the trace_array descriptor, by subtracting that index
8019  * from the ptr, we get to the start of the index itself.
8020  *
8021  *   ptr - idx == &index[0]
8022  *
8023  * Then a simple container_of() from that pointer gets us to the
8024  * trace_array descriptor.
8025  */
8026 static void get_tr_index(void *data, struct trace_array **ptr,
8027                          unsigned int *pindex)
8028 {
8029         *pindex = *(unsigned char *)data;
8030
8031         *ptr = container_of(data - *pindex, struct trace_array,
8032                             trace_flags_index);
8033 }
8034
8035 static ssize_t
8036 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8037                         loff_t *ppos)
8038 {
8039         void *tr_index = filp->private_data;
8040         struct trace_array *tr;
8041         unsigned int index;
8042         char *buf;
8043
8044         get_tr_index(tr_index, &tr, &index);
8045
8046         if (tr->trace_flags & (1 << index))
8047                 buf = "1\n";
8048         else
8049                 buf = "0\n";
8050
8051         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8052 }
8053
8054 static ssize_t
8055 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8056                          loff_t *ppos)
8057 {
8058         void *tr_index = filp->private_data;
8059         struct trace_array *tr;
8060         unsigned int index;
8061         unsigned long val;
8062         int ret;
8063
8064         get_tr_index(tr_index, &tr, &index);
8065
8066         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8067         if (ret)
8068                 return ret;
8069
8070         if (val != 0 && val != 1)
8071                 return -EINVAL;
8072
8073         mutex_lock(&trace_types_lock);
8074         ret = set_tracer_flag(tr, 1 << index, val);
8075         mutex_unlock(&trace_types_lock);
8076
8077         if (ret < 0)
8078                 return ret;
8079
8080         *ppos += cnt;
8081
8082         return cnt;
8083 }
8084
8085 static const struct file_operations trace_options_core_fops = {
8086         .open = tracing_open_generic,
8087         .read = trace_options_core_read,
8088         .write = trace_options_core_write,
8089         .llseek = generic_file_llseek,
8090 };
8091
8092 struct dentry *trace_create_file(const char *name,
8093                                  umode_t mode,
8094                                  struct dentry *parent,
8095                                  void *data,
8096                                  const struct file_operations *fops)
8097 {
8098         struct dentry *ret;
8099
8100         ret = tracefs_create_file(name, mode, parent, data, fops);
8101         if (!ret)
8102                 pr_warn("Could not create tracefs '%s' entry\n", name);
8103
8104         return ret;
8105 }
8106
8107
8108 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8109 {
8110         struct dentry *d_tracer;
8111
8112         if (tr->options)
8113                 return tr->options;
8114
8115         d_tracer = tracing_get_dentry(tr);
8116         if (IS_ERR(d_tracer))
8117                 return NULL;
8118
8119         tr->options = tracefs_create_dir("options", d_tracer);
8120         if (!tr->options) {
8121                 pr_warn("Could not create tracefs directory 'options'\n");
8122                 return NULL;
8123         }
8124
8125         return tr->options;
8126 }
8127
8128 static void
8129 create_trace_option_file(struct trace_array *tr,
8130                          struct trace_option_dentry *topt,
8131                          struct tracer_flags *flags,
8132                          struct tracer_opt *opt)
8133 {
8134         struct dentry *t_options;
8135
8136         t_options = trace_options_init_dentry(tr);
8137         if (!t_options)
8138                 return;
8139
8140         topt->flags = flags;
8141         topt->opt = opt;
8142         topt->tr = tr;
8143
8144         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8145                                     &trace_options_fops);
8146
8147 }
8148
8149 static void
8150 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8151 {
8152         struct trace_option_dentry *topts;
8153         struct trace_options *tr_topts;
8154         struct tracer_flags *flags;
8155         struct tracer_opt *opts;
8156         int cnt;
8157         int i;
8158
8159         if (!tracer)
8160                 return;
8161
8162         flags = tracer->flags;
8163
8164         if (!flags || !flags->opts)
8165                 return;
8166
8167         /*
8168          * If this is an instance, only create flags for tracers
8169          * the instance may have.
8170          */
8171         if (!trace_ok_for_array(tracer, tr))
8172                 return;
8173
8174         for (i = 0; i < tr->nr_topts; i++) {
8175                 /* Make sure there's no duplicate flags. */
8176                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8177                         return;
8178         }
8179
8180         opts = flags->opts;
8181
8182         for (cnt = 0; opts[cnt].name; cnt++)
8183                 ;
8184
8185         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8186         if (!topts)
8187                 return;
8188
8189         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8190                             GFP_KERNEL);
8191         if (!tr_topts) {
8192                 kfree(topts);
8193                 return;
8194         }
8195
8196         tr->topts = tr_topts;
8197         tr->topts[tr->nr_topts].tracer = tracer;
8198         tr->topts[tr->nr_topts].topts = topts;
8199         tr->nr_topts++;
8200
8201         for (cnt = 0; opts[cnt].name; cnt++) {
8202                 create_trace_option_file(tr, &topts[cnt], flags,
8203                                          &opts[cnt]);
8204                 WARN_ONCE(topts[cnt].entry == NULL,
8205                           "Failed to create trace option: %s",
8206                           opts[cnt].name);
8207         }
8208 }
8209
8210 static struct dentry *
8211 create_trace_option_core_file(struct trace_array *tr,
8212                               const char *option, long index)
8213 {
8214         struct dentry *t_options;
8215
8216         t_options = trace_options_init_dentry(tr);
8217         if (!t_options)
8218                 return NULL;
8219
8220         return trace_create_file(option, 0644, t_options,
8221                                  (void *)&tr->trace_flags_index[index],
8222                                  &trace_options_core_fops);
8223 }
8224
8225 static void create_trace_options_dir(struct trace_array *tr)
8226 {
8227         struct dentry *t_options;
8228         bool top_level = tr == &global_trace;
8229         int i;
8230
8231         t_options = trace_options_init_dentry(tr);
8232         if (!t_options)
8233                 return;
8234
8235         for (i = 0; trace_options[i]; i++) {
8236                 if (top_level ||
8237                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8238                         create_trace_option_core_file(tr, trace_options[i], i);
8239         }
8240 }
8241
8242 static ssize_t
8243 rb_simple_read(struct file *filp, char __user *ubuf,
8244                size_t cnt, loff_t *ppos)
8245 {
8246         struct trace_array *tr = filp->private_data;
8247         char buf[64];
8248         int r;
8249
8250         r = tracer_tracing_is_on(tr);
8251         r = sprintf(buf, "%d\n", r);
8252
8253         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8254 }
8255
8256 static ssize_t
8257 rb_simple_write(struct file *filp, const char __user *ubuf,
8258                 size_t cnt, loff_t *ppos)
8259 {
8260         struct trace_array *tr = filp->private_data;
8261         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8262         unsigned long val;
8263         int ret;
8264
8265         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8266         if (ret)
8267                 return ret;
8268
8269         if (buffer) {
8270                 mutex_lock(&trace_types_lock);
8271                 if (!!val == tracer_tracing_is_on(tr)) {
8272                         val = 0; /* do nothing */
8273                 } else if (val) {
8274                         tracer_tracing_on(tr);
8275                         if (tr->current_trace->start)
8276                                 tr->current_trace->start(tr);
8277                 } else {
8278                         tracer_tracing_off(tr);
8279                         if (tr->current_trace->stop)
8280                                 tr->current_trace->stop(tr);
8281                 }
8282                 mutex_unlock(&trace_types_lock);
8283         }
8284
8285         (*ppos)++;
8286
8287         return cnt;
8288 }
8289
8290 static const struct file_operations rb_simple_fops = {
8291         .open           = tracing_open_generic_tr,
8292         .read           = rb_simple_read,
8293         .write          = rb_simple_write,
8294         .release        = tracing_release_generic_tr,
8295         .llseek         = default_llseek,
8296 };
8297
8298 static ssize_t
8299 buffer_percent_read(struct file *filp, char __user *ubuf,
8300                     size_t cnt, loff_t *ppos)
8301 {
8302         struct trace_array *tr = filp->private_data;
8303         char buf[64];
8304         int r;
8305
8306         r = tr->buffer_percent;
8307         r = sprintf(buf, "%d\n", r);
8308
8309         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8310 }
8311
8312 static ssize_t
8313 buffer_percent_write(struct file *filp, const char __user *ubuf,
8314                      size_t cnt, loff_t *ppos)
8315 {
8316         struct trace_array *tr = filp->private_data;
8317         unsigned long val;
8318         int ret;
8319
8320         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8321         if (ret)
8322                 return ret;
8323
8324         if (val > 100)
8325                 return -EINVAL;
8326
8327         if (!val)
8328                 val = 1;
8329
8330         tr->buffer_percent = val;
8331
8332         (*ppos)++;
8333
8334         return cnt;
8335 }
8336
8337 static const struct file_operations buffer_percent_fops = {
8338         .open           = tracing_open_generic_tr,
8339         .read           = buffer_percent_read,
8340         .write          = buffer_percent_write,
8341         .release        = tracing_release_generic_tr,
8342         .llseek         = default_llseek,
8343 };
8344
8345 static struct dentry *trace_instance_dir;
8346
8347 static void
8348 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8349
8350 static int
8351 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8352 {
8353         enum ring_buffer_flags rb_flags;
8354
8355         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8356
8357         buf->tr = tr;
8358
8359         buf->buffer = ring_buffer_alloc(size, rb_flags);
8360         if (!buf->buffer)
8361                 return -ENOMEM;
8362
8363         buf->data = alloc_percpu(struct trace_array_cpu);
8364         if (!buf->data) {
8365                 ring_buffer_free(buf->buffer);
8366                 buf->buffer = NULL;
8367                 return -ENOMEM;
8368         }
8369
8370         /* Allocate the first page for all buffers */
8371         set_buffer_entries(&tr->trace_buffer,
8372                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8373
8374         return 0;
8375 }
8376
8377 static int allocate_trace_buffers(struct trace_array *tr, int size)
8378 {
8379         int ret;
8380
8381         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8382         if (ret)
8383                 return ret;
8384
8385 #ifdef CONFIG_TRACER_MAX_TRACE
8386         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8387                                     allocate_snapshot ? size : 1);
8388         if (WARN_ON(ret)) {
8389                 ring_buffer_free(tr->trace_buffer.buffer);
8390                 tr->trace_buffer.buffer = NULL;
8391                 free_percpu(tr->trace_buffer.data);
8392                 tr->trace_buffer.data = NULL;
8393                 return -ENOMEM;
8394         }
8395         tr->allocated_snapshot = allocate_snapshot;
8396
8397         /*
8398          * Only the top level trace array gets its snapshot allocated
8399          * from the kernel command line.
8400          */
8401         allocate_snapshot = false;
8402 #endif
8403         return 0;
8404 }
8405
8406 static void free_trace_buffer(struct trace_buffer *buf)
8407 {
8408         if (buf->buffer) {
8409                 ring_buffer_free(buf->buffer);
8410                 buf->buffer = NULL;
8411                 free_percpu(buf->data);
8412                 buf->data = NULL;
8413         }
8414 }
8415
8416 static void free_trace_buffers(struct trace_array *tr)
8417 {
8418         if (!tr)
8419                 return;
8420
8421         free_trace_buffer(&tr->trace_buffer);
8422
8423 #ifdef CONFIG_TRACER_MAX_TRACE
8424         free_trace_buffer(&tr->max_buffer);
8425 #endif
8426 }
8427
8428 static void init_trace_flags_index(struct trace_array *tr)
8429 {
8430         int i;
8431
8432         /* Used by the trace options files */
8433         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8434                 tr->trace_flags_index[i] = i;
8435 }
8436
8437 static void __update_tracer_options(struct trace_array *tr)
8438 {
8439         struct tracer *t;
8440
8441         for (t = trace_types; t; t = t->next)
8442                 add_tracer_options(tr, t);
8443 }
8444
8445 static void update_tracer_options(struct trace_array *tr)
8446 {
8447         mutex_lock(&trace_types_lock);
8448         __update_tracer_options(tr);
8449         mutex_unlock(&trace_types_lock);
8450 }
8451
8452 static struct trace_array *trace_array_create(const char *name)
8453 {
8454         struct trace_array *tr;
8455         int ret;
8456
8457         ret = -ENOMEM;
8458         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8459         if (!tr)
8460                 return ERR_PTR(ret);
8461
8462         tr->name = kstrdup(name, GFP_KERNEL);
8463         if (!tr->name)
8464                 goto out_free_tr;
8465
8466         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8467                 goto out_free_tr;
8468
8469         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8470
8471         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8472
8473         raw_spin_lock_init(&tr->start_lock);
8474
8475         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8476
8477         tr->current_trace = &nop_trace;
8478
8479         INIT_LIST_HEAD(&tr->systems);
8480         INIT_LIST_HEAD(&tr->events);
8481         INIT_LIST_HEAD(&tr->hist_vars);
8482         INIT_LIST_HEAD(&tr->err_log);
8483
8484         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8485                 goto out_free_tr;
8486
8487         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8488         if (!tr->dir)
8489                 goto out_free_tr;
8490
8491         ret = event_trace_add_tracer(tr->dir, tr);
8492         if (ret) {
8493                 tracefs_remove_recursive(tr->dir);
8494                 goto out_free_tr;
8495         }
8496
8497         ftrace_init_trace_array(tr);
8498
8499         init_tracer_tracefs(tr, tr->dir);
8500         init_trace_flags_index(tr);
8501         __update_tracer_options(tr);
8502
8503         list_add(&tr->list, &ftrace_trace_arrays);
8504
8505         tr->ref++;
8506
8507
8508         return tr;
8509
8510  out_free_tr:
8511         free_trace_buffers(tr);
8512         free_cpumask_var(tr->tracing_cpumask);
8513         kfree(tr->name);
8514         kfree(tr);
8515
8516         return ERR_PTR(ret);
8517 }
8518
8519 static int instance_mkdir(const char *name)
8520 {
8521         struct trace_array *tr;
8522         int ret;
8523
8524         mutex_lock(&event_mutex);
8525         mutex_lock(&trace_types_lock);
8526
8527         ret = -EEXIST;
8528         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8529                 if (tr->name && strcmp(tr->name, name) == 0)
8530                         goto out_unlock;
8531         }
8532
8533         tr = trace_array_create(name);
8534
8535         ret = PTR_ERR_OR_ZERO(tr);
8536
8537 out_unlock:
8538         mutex_unlock(&trace_types_lock);
8539         mutex_unlock(&event_mutex);
8540         return ret;
8541 }
8542
8543 /**
8544  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8545  * @name: The name of the trace array to be looked up/created.
8546  *
8547  * Returns pointer to trace array with given name.
8548  * NULL, if it cannot be created.
8549  *
8550  * NOTE: This function increments the reference counter associated with the
8551  * trace array returned. This makes sure it cannot be freed while in use.
8552  * Use trace_array_put() once the trace array is no longer needed.
8553  *
8554  */
8555 struct trace_array *trace_array_get_by_name(const char *name)
8556 {
8557         struct trace_array *tr;
8558
8559         mutex_lock(&event_mutex);
8560         mutex_lock(&trace_types_lock);
8561
8562         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8563                 if (tr->name && strcmp(tr->name, name) == 0)
8564                         goto out_unlock;
8565         }
8566
8567         tr = trace_array_create(name);
8568
8569         if (IS_ERR(tr))
8570                 tr = NULL;
8571 out_unlock:
8572         if (tr)
8573                 tr->ref++;
8574
8575         mutex_unlock(&trace_types_lock);
8576         mutex_unlock(&event_mutex);
8577         return tr;
8578 }
8579 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8580
8581 static int __remove_instance(struct trace_array *tr)
8582 {
8583         int i;
8584
8585         /* Reference counter for a newly created trace array = 1. */
8586         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8587                 return -EBUSY;
8588
8589         list_del(&tr->list);
8590
8591         /* Disable all the flags that were enabled coming in */
8592         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8593                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8594                         set_tracer_flag(tr, 1 << i, 0);
8595         }
8596
8597         tracing_set_nop(tr);
8598         clear_ftrace_function_probes(tr);
8599         event_trace_del_tracer(tr);
8600         ftrace_clear_pids(tr);
8601         ftrace_destroy_function_files(tr);
8602         tracefs_remove_recursive(tr->dir);
8603         free_trace_buffers(tr);
8604
8605         for (i = 0; i < tr->nr_topts; i++) {
8606                 kfree(tr->topts[i].topts);
8607         }
8608         kfree(tr->topts);
8609
8610         free_cpumask_var(tr->tracing_cpumask);
8611         kfree(tr->name);
8612         kfree(tr);
8613         tr = NULL;
8614
8615         return 0;
8616 }
8617
8618 int trace_array_destroy(struct trace_array *this_tr)
8619 {
8620         struct trace_array *tr;
8621         int ret;
8622
8623         if (!this_tr)
8624                 return -EINVAL;
8625
8626         mutex_lock(&event_mutex);
8627         mutex_lock(&trace_types_lock);
8628
8629         ret = -ENODEV;
8630
8631         /* Making sure trace array exists before destroying it. */
8632         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8633                 if (tr == this_tr) {
8634                         ret = __remove_instance(tr);
8635                         break;
8636                 }
8637         }
8638
8639         mutex_unlock(&trace_types_lock);
8640         mutex_unlock(&event_mutex);
8641
8642         return ret;
8643 }
8644 EXPORT_SYMBOL_GPL(trace_array_destroy);
8645
8646 static int instance_rmdir(const char *name)
8647 {
8648         struct trace_array *tr;
8649         int ret;
8650
8651         mutex_lock(&event_mutex);
8652         mutex_lock(&trace_types_lock);
8653
8654         ret = -ENODEV;
8655         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8656                 if (tr->name && strcmp(tr->name, name) == 0) {
8657                         ret = __remove_instance(tr);
8658                         break;
8659                 }
8660         }
8661
8662         mutex_unlock(&trace_types_lock);
8663         mutex_unlock(&event_mutex);
8664
8665         return ret;
8666 }
8667
8668 static __init void create_trace_instances(struct dentry *d_tracer)
8669 {
8670         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8671                                                          instance_mkdir,
8672                                                          instance_rmdir);
8673         if (WARN_ON(!trace_instance_dir))
8674                 return;
8675 }
8676
8677 static void
8678 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8679 {
8680         struct trace_event_file *file;
8681         int cpu;
8682
8683         trace_create_file("available_tracers", 0444, d_tracer,
8684                         tr, &show_traces_fops);
8685
8686         trace_create_file("current_tracer", 0644, d_tracer,
8687                         tr, &set_tracer_fops);
8688
8689         trace_create_file("tracing_cpumask", 0644, d_tracer,
8690                           tr, &tracing_cpumask_fops);
8691
8692         trace_create_file("trace_options", 0644, d_tracer,
8693                           tr, &tracing_iter_fops);
8694
8695         trace_create_file("trace", 0644, d_tracer,
8696                           tr, &tracing_fops);
8697
8698         trace_create_file("trace_pipe", 0444, d_tracer,
8699                           tr, &tracing_pipe_fops);
8700
8701         trace_create_file("buffer_size_kb", 0644, d_tracer,
8702                           tr, &tracing_entries_fops);
8703
8704         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8705                           tr, &tracing_total_entries_fops);
8706
8707         trace_create_file("free_buffer", 0200, d_tracer,
8708                           tr, &tracing_free_buffer_fops);
8709
8710         trace_create_file("trace_marker", 0220, d_tracer,
8711                           tr, &tracing_mark_fops);
8712
8713         file = __find_event_file(tr, "ftrace", "print");
8714         if (file && file->dir)
8715                 trace_create_file("trigger", 0644, file->dir, file,
8716                                   &event_trigger_fops);
8717         tr->trace_marker_file = file;
8718
8719         trace_create_file("trace_marker_raw", 0220, d_tracer,
8720                           tr, &tracing_mark_raw_fops);
8721
8722         trace_create_file("trace_clock", 0644, d_tracer, tr,
8723                           &trace_clock_fops);
8724
8725         trace_create_file("tracing_on", 0644, d_tracer,
8726                           tr, &rb_simple_fops);
8727
8728         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8729                           &trace_time_stamp_mode_fops);
8730
8731         tr->buffer_percent = 50;
8732
8733         trace_create_file("buffer_percent", 0444, d_tracer,
8734                         tr, &buffer_percent_fops);
8735
8736         create_trace_options_dir(tr);
8737
8738 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8739         trace_create_maxlat_file(tr, d_tracer);
8740 #endif
8741
8742         if (ftrace_create_function_files(tr, d_tracer))
8743                 WARN(1, "Could not allocate function filter files");
8744
8745 #ifdef CONFIG_TRACER_SNAPSHOT
8746         trace_create_file("snapshot", 0644, d_tracer,
8747                           tr, &snapshot_fops);
8748 #endif
8749
8750         trace_create_file("error_log", 0644, d_tracer,
8751                           tr, &tracing_err_log_fops);
8752
8753         for_each_tracing_cpu(cpu)
8754                 tracing_init_tracefs_percpu(tr, cpu);
8755
8756         ftrace_init_tracefs(tr, d_tracer);
8757 }
8758
8759 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8760 {
8761         struct vfsmount *mnt;
8762         struct file_system_type *type;
8763
8764         /*
8765          * To maintain backward compatibility for tools that mount
8766          * debugfs to get to the tracing facility, tracefs is automatically
8767          * mounted to the debugfs/tracing directory.
8768          */
8769         type = get_fs_type("tracefs");
8770         if (!type)
8771                 return NULL;
8772         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8773         put_filesystem(type);
8774         if (IS_ERR(mnt))
8775                 return NULL;
8776         mntget(mnt);
8777
8778         return mnt;
8779 }
8780
8781 /**
8782  * tracing_init_dentry - initialize top level trace array
8783  *
8784  * This is called when creating files or directories in the tracing
8785  * directory. It is called via fs_initcall() by any of the boot up code
8786  * and expects to return the dentry of the top level tracing directory.
8787  */
8788 struct dentry *tracing_init_dentry(void)
8789 {
8790         struct trace_array *tr = &global_trace;
8791
8792         /* The top level trace array uses  NULL as parent */
8793         if (tr->dir)
8794                 return NULL;
8795
8796         if (WARN_ON(!tracefs_initialized()) ||
8797                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8798                  WARN_ON(!debugfs_initialized())))
8799                 return ERR_PTR(-ENODEV);
8800
8801         /*
8802          * As there may still be users that expect the tracing
8803          * files to exist in debugfs/tracing, we must automount
8804          * the tracefs file system there, so older tools still
8805          * work with the newer kerenl.
8806          */
8807         tr->dir = debugfs_create_automount("tracing", NULL,
8808                                            trace_automount, NULL);
8809
8810         return NULL;
8811 }
8812
8813 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8814 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8815
8816 static void __init trace_eval_init(void)
8817 {
8818         int len;
8819
8820         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8821         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8822 }
8823
8824 #ifdef CONFIG_MODULES
8825 static void trace_module_add_evals(struct module *mod)
8826 {
8827         if (!mod->num_trace_evals)
8828                 return;
8829
8830         /*
8831          * Modules with bad taint do not have events created, do
8832          * not bother with enums either.
8833          */
8834         if (trace_module_has_bad_taint(mod))
8835                 return;
8836
8837         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8838 }
8839
8840 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8841 static void trace_module_remove_evals(struct module *mod)
8842 {
8843         union trace_eval_map_item *map;
8844         union trace_eval_map_item **last = &trace_eval_maps;
8845
8846         if (!mod->num_trace_evals)
8847                 return;
8848
8849         mutex_lock(&trace_eval_mutex);
8850
8851         map = trace_eval_maps;
8852
8853         while (map) {
8854                 if (map->head.mod == mod)
8855                         break;
8856                 map = trace_eval_jmp_to_tail(map);
8857                 last = &map->tail.next;
8858                 map = map->tail.next;
8859         }
8860         if (!map)
8861                 goto out;
8862
8863         *last = trace_eval_jmp_to_tail(map)->tail.next;
8864         kfree(map);
8865  out:
8866         mutex_unlock(&trace_eval_mutex);
8867 }
8868 #else
8869 static inline void trace_module_remove_evals(struct module *mod) { }
8870 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8871
8872 static int trace_module_notify(struct notifier_block *self,
8873                                unsigned long val, void *data)
8874 {
8875         struct module *mod = data;
8876
8877         switch (val) {
8878         case MODULE_STATE_COMING:
8879                 trace_module_add_evals(mod);
8880                 break;
8881         case MODULE_STATE_GOING:
8882                 trace_module_remove_evals(mod);
8883                 break;
8884         }
8885
8886         return 0;
8887 }
8888
8889 static struct notifier_block trace_module_nb = {
8890         .notifier_call = trace_module_notify,
8891         .priority = 0,
8892 };
8893 #endif /* CONFIG_MODULES */
8894
8895 static __init int tracer_init_tracefs(void)
8896 {
8897         struct dentry *d_tracer;
8898
8899         trace_access_lock_init();
8900
8901         d_tracer = tracing_init_dentry();
8902         if (IS_ERR(d_tracer))
8903                 return 0;
8904
8905         event_trace_init();
8906
8907         init_tracer_tracefs(&global_trace, d_tracer);
8908         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8909
8910         trace_create_file("tracing_thresh", 0644, d_tracer,
8911                         &global_trace, &tracing_thresh_fops);
8912
8913         trace_create_file("README", 0444, d_tracer,
8914                         NULL, &tracing_readme_fops);
8915
8916         trace_create_file("saved_cmdlines", 0444, d_tracer,
8917                         NULL, &tracing_saved_cmdlines_fops);
8918
8919         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8920                           NULL, &tracing_saved_cmdlines_size_fops);
8921
8922         trace_create_file("saved_tgids", 0444, d_tracer,
8923                         NULL, &tracing_saved_tgids_fops);
8924
8925         trace_eval_init();
8926
8927         trace_create_eval_file(d_tracer);
8928
8929 #ifdef CONFIG_MODULES
8930         register_module_notifier(&trace_module_nb);
8931 #endif
8932
8933 #ifdef CONFIG_DYNAMIC_FTRACE
8934         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8935                         NULL, &tracing_dyn_info_fops);
8936 #endif
8937
8938         create_trace_instances(d_tracer);
8939
8940         update_tracer_options(&global_trace);
8941
8942         return 0;
8943 }
8944
8945 static int trace_panic_handler(struct notifier_block *this,
8946                                unsigned long event, void *unused)
8947 {
8948         if (ftrace_dump_on_oops)
8949                 ftrace_dump(ftrace_dump_on_oops);
8950         return NOTIFY_OK;
8951 }
8952
8953 static struct notifier_block trace_panic_notifier = {
8954         .notifier_call  = trace_panic_handler,
8955         .next           = NULL,
8956         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8957 };
8958
8959 static int trace_die_handler(struct notifier_block *self,
8960                              unsigned long val,
8961                              void *data)
8962 {
8963         switch (val) {
8964         case DIE_OOPS:
8965                 if (ftrace_dump_on_oops)
8966                         ftrace_dump(ftrace_dump_on_oops);
8967                 break;
8968         default:
8969                 break;
8970         }
8971         return NOTIFY_OK;
8972 }
8973
8974 static struct notifier_block trace_die_notifier = {
8975         .notifier_call = trace_die_handler,
8976         .priority = 200
8977 };
8978
8979 /*
8980  * printk is set to max of 1024, we really don't need it that big.
8981  * Nothing should be printing 1000 characters anyway.
8982  */
8983 #define TRACE_MAX_PRINT         1000
8984
8985 /*
8986  * Define here KERN_TRACE so that we have one place to modify
8987  * it if we decide to change what log level the ftrace dump
8988  * should be at.
8989  */
8990 #define KERN_TRACE              KERN_EMERG
8991
8992 void
8993 trace_printk_seq(struct trace_seq *s)
8994 {
8995         /* Probably should print a warning here. */
8996         if (s->seq.len >= TRACE_MAX_PRINT)
8997                 s->seq.len = TRACE_MAX_PRINT;
8998
8999         /*
9000          * More paranoid code. Although the buffer size is set to
9001          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9002          * an extra layer of protection.
9003          */
9004         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9005                 s->seq.len = s->seq.size - 1;
9006
9007         /* should be zero ended, but we are paranoid. */
9008         s->buffer[s->seq.len] = 0;
9009
9010         printk(KERN_TRACE "%s", s->buffer);
9011
9012         trace_seq_init(s);
9013 }
9014
9015 void trace_init_global_iter(struct trace_iterator *iter)
9016 {
9017         iter->tr = &global_trace;
9018         iter->trace = iter->tr->current_trace;
9019         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9020         iter->trace_buffer = &global_trace.trace_buffer;
9021
9022         if (iter->trace && iter->trace->open)
9023                 iter->trace->open(iter);
9024
9025         /* Annotate start of buffers if we had overruns */
9026         if (ring_buffer_overruns(iter->trace_buffer->buffer))
9027                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9028
9029         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9030         if (trace_clocks[iter->tr->clock_id].in_ns)
9031                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9032 }
9033
9034 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9035 {
9036         /* use static because iter can be a bit big for the stack */
9037         static struct trace_iterator iter;
9038         static atomic_t dump_running;
9039         struct trace_array *tr = &global_trace;
9040         unsigned int old_userobj;
9041         unsigned long flags;
9042         int cnt = 0, cpu;
9043
9044         /* Only allow one dump user at a time. */
9045         if (atomic_inc_return(&dump_running) != 1) {
9046                 atomic_dec(&dump_running);
9047                 return;
9048         }
9049
9050         /*
9051          * Always turn off tracing when we dump.
9052          * We don't need to show trace output of what happens
9053          * between multiple crashes.
9054          *
9055          * If the user does a sysrq-z, then they can re-enable
9056          * tracing with echo 1 > tracing_on.
9057          */
9058         tracing_off();
9059
9060         local_irq_save(flags);
9061         printk_nmi_direct_enter();
9062
9063         /* Simulate the iterator */
9064         trace_init_global_iter(&iter);
9065
9066         for_each_tracing_cpu(cpu) {
9067                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9068         }
9069
9070         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9071
9072         /* don't look at user memory in panic mode */
9073         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9074
9075         switch (oops_dump_mode) {
9076         case DUMP_ALL:
9077                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9078                 break;
9079         case DUMP_ORIG:
9080                 iter.cpu_file = raw_smp_processor_id();
9081                 break;
9082         case DUMP_NONE:
9083                 goto out_enable;
9084         default:
9085                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9086                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9087         }
9088
9089         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9090
9091         /* Did function tracer already get disabled? */
9092         if (ftrace_is_dead()) {
9093                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9094                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9095         }
9096
9097         /*
9098          * We need to stop all tracing on all CPUS to read the
9099          * the next buffer. This is a bit expensive, but is
9100          * not done often. We fill all what we can read,
9101          * and then release the locks again.
9102          */
9103
9104         while (!trace_empty(&iter)) {
9105
9106                 if (!cnt)
9107                         printk(KERN_TRACE "---------------------------------\n");
9108
9109                 cnt++;
9110
9111                 trace_iterator_reset(&iter);
9112                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9113
9114                 if (trace_find_next_entry_inc(&iter) != NULL) {
9115                         int ret;
9116
9117                         ret = print_trace_line(&iter);
9118                         if (ret != TRACE_TYPE_NO_CONSUME)
9119                                 trace_consume(&iter);
9120                 }
9121                 touch_nmi_watchdog();
9122
9123                 trace_printk_seq(&iter.seq);
9124         }
9125
9126         if (!cnt)
9127                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9128         else
9129                 printk(KERN_TRACE "---------------------------------\n");
9130
9131  out_enable:
9132         tr->trace_flags |= old_userobj;
9133
9134         for_each_tracing_cpu(cpu) {
9135                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9136         }
9137         atomic_dec(&dump_running);
9138         printk_nmi_direct_exit();
9139         local_irq_restore(flags);
9140 }
9141 EXPORT_SYMBOL_GPL(ftrace_dump);
9142
9143 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9144 {
9145         char **argv;
9146         int argc, ret;
9147
9148         argc = 0;
9149         ret = 0;
9150         argv = argv_split(GFP_KERNEL, buf, &argc);
9151         if (!argv)
9152                 return -ENOMEM;
9153
9154         if (argc)
9155                 ret = createfn(argc, argv);
9156
9157         argv_free(argv);
9158
9159         return ret;
9160 }
9161
9162 #define WRITE_BUFSIZE  4096
9163
9164 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9165                                 size_t count, loff_t *ppos,
9166                                 int (*createfn)(int, char **))
9167 {
9168         char *kbuf, *buf, *tmp;
9169         int ret = 0;
9170         size_t done = 0;
9171         size_t size;
9172
9173         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9174         if (!kbuf)
9175                 return -ENOMEM;
9176
9177         while (done < count) {
9178                 size = count - done;
9179
9180                 if (size >= WRITE_BUFSIZE)
9181                         size = WRITE_BUFSIZE - 1;
9182
9183                 if (copy_from_user(kbuf, buffer + done, size)) {
9184                         ret = -EFAULT;
9185                         goto out;
9186                 }
9187                 kbuf[size] = '\0';
9188                 buf = kbuf;
9189                 do {
9190                         tmp = strchr(buf, '\n');
9191                         if (tmp) {
9192                                 *tmp = '\0';
9193                                 size = tmp - buf + 1;
9194                         } else {
9195                                 size = strlen(buf);
9196                                 if (done + size < count) {
9197                                         if (buf != kbuf)
9198                                                 break;
9199                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9200                                         pr_warn("Line length is too long: Should be less than %d\n",
9201                                                 WRITE_BUFSIZE - 2);
9202                                         ret = -EINVAL;
9203                                         goto out;
9204                                 }
9205                         }
9206                         done += size;
9207
9208                         /* Remove comments */
9209                         tmp = strchr(buf, '#');
9210
9211                         if (tmp)
9212                                 *tmp = '\0';
9213
9214                         ret = trace_run_command(buf, createfn);
9215                         if (ret)
9216                                 goto out;
9217                         buf += size;
9218
9219                 } while (done < count);
9220         }
9221         ret = done;
9222
9223 out:
9224         kfree(kbuf);
9225
9226         return ret;
9227 }
9228
9229 __init static int tracer_alloc_buffers(void)
9230 {
9231         int ring_buf_size;
9232         int ret = -ENOMEM;
9233
9234         /*
9235          * Make sure we don't accidently add more trace options
9236          * than we have bits for.
9237          */
9238         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9239
9240         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9241                 goto out;
9242
9243         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9244                 goto out_free_buffer_mask;
9245
9246         /* Only allocate trace_printk buffers if a trace_printk exists */
9247         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9248                 /* Must be called before global_trace.buffer is allocated */
9249                 trace_printk_init_buffers();
9250
9251         /* To save memory, keep the ring buffer size to its minimum */
9252         if (ring_buffer_expanded)
9253                 ring_buf_size = trace_buf_size;
9254         else
9255                 ring_buf_size = 1;
9256
9257         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9258         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9259
9260         raw_spin_lock_init(&global_trace.start_lock);
9261
9262         /*
9263          * The prepare callbacks allocates some memory for the ring buffer. We
9264          * don't free the buffer if the if the CPU goes down. If we were to free
9265          * the buffer, then the user would lose any trace that was in the
9266          * buffer. The memory will be removed once the "instance" is removed.
9267          */
9268         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9269                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9270                                       NULL);
9271         if (ret < 0)
9272                 goto out_free_cpumask;
9273         /* Used for event triggers */
9274         ret = -ENOMEM;
9275         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9276         if (!temp_buffer)
9277                 goto out_rm_hp_state;
9278
9279         if (trace_create_savedcmd() < 0)
9280                 goto out_free_temp_buffer;
9281
9282         /* TODO: make the number of buffers hot pluggable with CPUS */
9283         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9284                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9285                 WARN_ON(1);
9286                 goto out_free_savedcmd;
9287         }
9288
9289         if (global_trace.buffer_disabled)
9290                 tracing_off();
9291
9292         if (trace_boot_clock) {
9293                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9294                 if (ret < 0)
9295                         pr_warn("Trace clock %s not defined, going back to default\n",
9296                                 trace_boot_clock);
9297         }
9298
9299         /*
9300          * register_tracer() might reference current_trace, so it
9301          * needs to be set before we register anything. This is
9302          * just a bootstrap of current_trace anyway.
9303          */
9304         global_trace.current_trace = &nop_trace;
9305
9306         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9307
9308         ftrace_init_global_array_ops(&global_trace);
9309
9310         init_trace_flags_index(&global_trace);
9311
9312         register_tracer(&nop_trace);
9313
9314         /* Function tracing may start here (via kernel command line) */
9315         init_function_trace();
9316
9317         /* All seems OK, enable tracing */
9318         tracing_disabled = 0;
9319
9320         atomic_notifier_chain_register(&panic_notifier_list,
9321                                        &trace_panic_notifier);
9322
9323         register_die_notifier(&trace_die_notifier);
9324
9325         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9326
9327         INIT_LIST_HEAD(&global_trace.systems);
9328         INIT_LIST_HEAD(&global_trace.events);
9329         INIT_LIST_HEAD(&global_trace.hist_vars);
9330         INIT_LIST_HEAD(&global_trace.err_log);
9331         list_add(&global_trace.list, &ftrace_trace_arrays);
9332
9333         apply_trace_boot_options();
9334
9335         register_snapshot_cmd();
9336
9337         return 0;
9338
9339 out_free_savedcmd:
9340         free_saved_cmdlines_buffer(savedcmd);
9341 out_free_temp_buffer:
9342         ring_buffer_free(temp_buffer);
9343 out_rm_hp_state:
9344         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9345 out_free_cpumask:
9346         free_cpumask_var(global_trace.tracing_cpumask);
9347 out_free_buffer_mask:
9348         free_cpumask_var(tracing_buffer_mask);
9349 out:
9350         return ret;
9351 }
9352
9353 void __init early_trace_init(void)
9354 {
9355         if (tracepoint_printk) {
9356                 tracepoint_print_iter =
9357                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9358                 if (WARN_ON(!tracepoint_print_iter))
9359                         tracepoint_printk = 0;
9360                 else
9361                         static_key_enable(&tracepoint_printk_key.key);
9362         }
9363         tracer_alloc_buffers();
9364 }
9365
9366 void __init trace_init(void)
9367 {
9368         trace_event_init();
9369 }
9370
9371 __init static int clear_boot_tracer(void)
9372 {
9373         /*
9374          * The default tracer at boot buffer is an init section.
9375          * This function is called in lateinit. If we did not
9376          * find the boot tracer, then clear it out, to prevent
9377          * later registration from accessing the buffer that is
9378          * about to be freed.
9379          */
9380         if (!default_bootup_tracer)
9381                 return 0;
9382
9383         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9384                default_bootup_tracer);
9385         default_bootup_tracer = NULL;
9386
9387         return 0;
9388 }
9389
9390 fs_initcall(tracer_init_tracefs);
9391 late_initcall_sync(clear_boot_tracer);
9392
9393 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9394 __init static int tracing_set_default_clock(void)
9395 {
9396         /* sched_clock_stable() is determined in late_initcall */
9397         if (!trace_boot_clock && !sched_clock_stable()) {
9398                 printk(KERN_WARNING
9399                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9400                        "If you want to keep using the local clock, then add:\n"
9401                        "  \"trace_clock=local\"\n"
9402                        "on the kernel command line\n");
9403                 tracing_set_clock(&global_trace, "global");
9404         }
9405
9406         return 0;
9407 }
9408 late_initcall_sync(tracing_set_default_clock);
9409 #endif