Merge tag 'gvt-fixes-2020-02-12' of https://github.com/intel/gvt-linux into drm-intel...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct ring_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct ring_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
390 {
391         /*
392          * Return false, because if filtered_pids does not exist,
393          * all pids are good to trace.
394          */
395         if (!filtered_pids)
396                 return false;
397
398         return !trace_find_filtered_pid(filtered_pids, task->pid);
399 }
400
401 /**
402  * trace_filter_add_remove_task - Add or remove a task from a pid_list
403  * @pid_list: The list to modify
404  * @self: The current task for fork or NULL for exit
405  * @task: The task to add or remove
406  *
407  * If adding a task, if @self is defined, the task is only added if @self
408  * is also included in @pid_list. This happens on fork and tasks should
409  * only be added when the parent is listed. If @self is NULL, then the
410  * @task pid will be removed from the list, which would happen on exit
411  * of a task.
412  */
413 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
414                                   struct task_struct *self,
415                                   struct task_struct *task)
416 {
417         if (!pid_list)
418                 return;
419
420         /* For forks, we only add if the forking task is listed */
421         if (self) {
422                 if (!trace_find_filtered_pid(pid_list, self->pid))
423                         return;
424         }
425
426         /* Sorry, but we don't support pid_max changing after setting */
427         if (task->pid >= pid_list->pid_max)
428                 return;
429
430         /* "self" is set for forks, and NULL for exits */
431         if (self)
432                 set_bit(task->pid, pid_list->pids);
433         else
434                 clear_bit(task->pid, pid_list->pids);
435 }
436
437 /**
438  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
439  * @pid_list: The pid list to show
440  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
441  * @pos: The position of the file
442  *
443  * This is used by the seq_file "next" operation to iterate the pids
444  * listed in a trace_pid_list structure.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
450 {
451         unsigned long pid = (unsigned long)v;
452
453         (*pos)++;
454
455         /* pid already is +1 of the actual prevous bit */
456         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
457
458         /* Return pid + 1 to allow zero to be represented */
459         if (pid < pid_list->pid_max)
460                 return (void *)(pid + 1);
461
462         return NULL;
463 }
464
465 /**
466  * trace_pid_start - Used for seq_file to start reading pid lists
467  * @pid_list: The pid list to show
468  * @pos: The position of the file
469  *
470  * This is used by seq_file "start" operation to start the iteration
471  * of listing pids.
472  *
473  * Returns the pid+1 as we want to display pid of zero, but NULL would
474  * stop the iteration.
475  */
476 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
477 {
478         unsigned long pid;
479         loff_t l = 0;
480
481         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
482         if (pid >= pid_list->pid_max)
483                 return NULL;
484
485         /* Return pid + 1 so that zero can be the exit value */
486         for (pid++; pid && l < *pos;
487              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
488                 ;
489         return (void *)pid;
490 }
491
492 /**
493  * trace_pid_show - show the current pid in seq_file processing
494  * @m: The seq_file structure to write into
495  * @v: A void pointer of the pid (+1) value to display
496  *
497  * Can be directly used by seq_file operations to display the current
498  * pid value.
499  */
500 int trace_pid_show(struct seq_file *m, void *v)
501 {
502         unsigned long pid = (unsigned long)v - 1;
503
504         seq_printf(m, "%lu\n", pid);
505         return 0;
506 }
507
508 /* 128 should be much more than enough */
509 #define PID_BUF_SIZE            127
510
511 int trace_pid_write(struct trace_pid_list *filtered_pids,
512                     struct trace_pid_list **new_pid_list,
513                     const char __user *ubuf, size_t cnt)
514 {
515         struct trace_pid_list *pid_list;
516         struct trace_parser parser;
517         unsigned long val;
518         int nr_pids = 0;
519         ssize_t read = 0;
520         ssize_t ret = 0;
521         loff_t pos;
522         pid_t pid;
523
524         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
525                 return -ENOMEM;
526
527         /*
528          * Always recreate a new array. The write is an all or nothing
529          * operation. Always create a new array when adding new pids by
530          * the user. If the operation fails, then the current list is
531          * not modified.
532          */
533         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
534         if (!pid_list) {
535                 trace_parser_put(&parser);
536                 return -ENOMEM;
537         }
538
539         pid_list->pid_max = READ_ONCE(pid_max);
540
541         /* Only truncating will shrink pid_max */
542         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
543                 pid_list->pid_max = filtered_pids->pid_max;
544
545         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
546         if (!pid_list->pids) {
547                 trace_parser_put(&parser);
548                 kfree(pid_list);
549                 return -ENOMEM;
550         }
551
552         if (filtered_pids) {
553                 /* copy the current bits to the new max */
554                 for_each_set_bit(pid, filtered_pids->pids,
555                                  filtered_pids->pid_max) {
556                         set_bit(pid, pid_list->pids);
557                         nr_pids++;
558                 }
559         }
560
561         while (cnt > 0) {
562
563                 pos = 0;
564
565                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
566                 if (ret < 0 || !trace_parser_loaded(&parser))
567                         break;
568
569                 read += ret;
570                 ubuf += ret;
571                 cnt -= ret;
572
573                 ret = -EINVAL;
574                 if (kstrtoul(parser.buffer, 0, &val))
575                         break;
576                 if (val >= pid_list->pid_max)
577                         break;
578
579                 pid = (pid_t)val;
580
581                 set_bit(pid, pid_list->pids);
582                 nr_pids++;
583
584                 trace_parser_clear(&parser);
585                 ret = 0;
586         }
587         trace_parser_put(&parser);
588
589         if (ret < 0) {
590                 trace_free_pid_list(pid_list);
591                 return ret;
592         }
593
594         if (!nr_pids) {
595                 /* Cleared the list of pids */
596                 trace_free_pid_list(pid_list);
597                 read = ret;
598                 pid_list = NULL;
599         }
600
601         *new_pid_list = pid_list;
602
603         return read;
604 }
605
606 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
607 {
608         u64 ts;
609
610         /* Early boot up does not have a buffer yet */
611         if (!buf->buffer)
612                 return trace_clock_local();
613
614         ts = ring_buffer_time_stamp(buf->buffer, cpu);
615         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
616
617         return ts;
618 }
619
620 u64 ftrace_now(int cpu)
621 {
622         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
623 }
624
625 /**
626  * tracing_is_enabled - Show if global_trace has been disabled
627  *
628  * Shows if the global trace has been enabled or not. It uses the
629  * mirror flag "buffer_disabled" to be used in fast paths such as for
630  * the irqsoff tracer. But it may be inaccurate due to races. If you
631  * need to know the accurate state, use tracing_is_on() which is a little
632  * slower, but accurate.
633  */
634 int tracing_is_enabled(void)
635 {
636         /*
637          * For quick access (irqsoff uses this in fast path), just
638          * return the mirror variable of the state of the ring buffer.
639          * It's a little racy, but we don't really care.
640          */
641         smp_rmb();
642         return !global_trace.buffer_disabled;
643 }
644
645 /*
646  * trace_buf_size is the size in bytes that is allocated
647  * for a buffer. Note, the number of bytes is always rounded
648  * to page size.
649  *
650  * This number is purposely set to a low number of 16384.
651  * If the dump on oops happens, it will be much appreciated
652  * to not have to wait for all that output. Anyway this can be
653  * boot time and run time configurable.
654  */
655 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
656
657 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
658
659 /* trace_types holds a link list of available tracers. */
660 static struct tracer            *trace_types __read_mostly;
661
662 /*
663  * trace_types_lock is used to protect the trace_types list.
664  */
665 DEFINE_MUTEX(trace_types_lock);
666
667 /*
668  * serialize the access of the ring buffer
669  *
670  * ring buffer serializes readers, but it is low level protection.
671  * The validity of the events (which returns by ring_buffer_peek() ..etc)
672  * are not protected by ring buffer.
673  *
674  * The content of events may become garbage if we allow other process consumes
675  * these events concurrently:
676  *   A) the page of the consumed events may become a normal page
677  *      (not reader page) in ring buffer, and this page will be rewrited
678  *      by events producer.
679  *   B) The page of the consumed events may become a page for splice_read,
680  *      and this page will be returned to system.
681  *
682  * These primitives allow multi process access to different cpu ring buffer
683  * concurrently.
684  *
685  * These primitives don't distinguish read-only and read-consume access.
686  * Multi read-only access are also serialized.
687  */
688
689 #ifdef CONFIG_SMP
690 static DECLARE_RWSEM(all_cpu_access_lock);
691 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         if (cpu == RING_BUFFER_ALL_CPUS) {
696                 /* gain it for accessing the whole ring buffer. */
697                 down_write(&all_cpu_access_lock);
698         } else {
699                 /* gain it for accessing a cpu ring buffer. */
700
701                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
702                 down_read(&all_cpu_access_lock);
703
704                 /* Secondly block other access to this @cpu ring buffer. */
705                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
706         }
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         if (cpu == RING_BUFFER_ALL_CPUS) {
712                 up_write(&all_cpu_access_lock);
713         } else {
714                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
715                 up_read(&all_cpu_access_lock);
716         }
717 }
718
719 static inline void trace_access_lock_init(void)
720 {
721         int cpu;
722
723         for_each_possible_cpu(cpu)
724                 mutex_init(&per_cpu(cpu_access_lock, cpu));
725 }
726
727 #else
728
729 static DEFINE_MUTEX(access_lock);
730
731 static inline void trace_access_lock(int cpu)
732 {
733         (void)cpu;
734         mutex_lock(&access_lock);
735 }
736
737 static inline void trace_access_unlock(int cpu)
738 {
739         (void)cpu;
740         mutex_unlock(&access_lock);
741 }
742
743 static inline void trace_access_lock_init(void)
744 {
745 }
746
747 #endif
748
749 #ifdef CONFIG_STACKTRACE
750 static void __ftrace_trace_stack(struct ring_buffer *buffer,
751                                  unsigned long flags,
752                                  int skip, int pc, struct pt_regs *regs);
753 static inline void ftrace_trace_stack(struct trace_array *tr,
754                                       struct ring_buffer *buffer,
755                                       unsigned long flags,
756                                       int skip, int pc, struct pt_regs *regs);
757
758 #else
759 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
760                                         unsigned long flags,
761                                         int skip, int pc, struct pt_regs *regs)
762 {
763 }
764 static inline void ftrace_trace_stack(struct trace_array *tr,
765                                       struct ring_buffer *buffer,
766                                       unsigned long flags,
767                                       int skip, int pc, struct pt_regs *regs)
768 {
769 }
770
771 #endif
772
773 static __always_inline void
774 trace_event_setup(struct ring_buffer_event *event,
775                   int type, unsigned long flags, int pc)
776 {
777         struct trace_entry *ent = ring_buffer_event_data(event);
778
779         tracing_generic_entry_update(ent, type, flags, pc);
780 }
781
782 static __always_inline struct ring_buffer_event *
783 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
784                           int type,
785                           unsigned long len,
786                           unsigned long flags, int pc)
787 {
788         struct ring_buffer_event *event;
789
790         event = ring_buffer_lock_reserve(buffer, len);
791         if (event != NULL)
792                 trace_event_setup(event, type, flags, pc);
793
794         return event;
795 }
796
797 void tracer_tracing_on(struct trace_array *tr)
798 {
799         if (tr->trace_buffer.buffer)
800                 ring_buffer_record_on(tr->trace_buffer.buffer);
801         /*
802          * This flag is looked at when buffers haven't been allocated
803          * yet, or by some tracers (like irqsoff), that just want to
804          * know if the ring buffer has been disabled, but it can handle
805          * races of where it gets disabled but we still do a record.
806          * As the check is in the fast path of the tracers, it is more
807          * important to be fast than accurate.
808          */
809         tr->buffer_disabled = 0;
810         /* Make the flag seen by readers */
811         smp_wmb();
812 }
813
814 /**
815  * tracing_on - enable tracing buffers
816  *
817  * This function enables tracing buffers that may have been
818  * disabled with tracing_off.
819  */
820 void tracing_on(void)
821 {
822         tracer_tracing_on(&global_trace);
823 }
824 EXPORT_SYMBOL_GPL(tracing_on);
825
826
827 static __always_inline void
828 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
829 {
830         __this_cpu_write(trace_taskinfo_save, true);
831
832         /* If this is the temp buffer, we need to commit fully */
833         if (this_cpu_read(trace_buffered_event) == event) {
834                 /* Length is in event->array[0] */
835                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
836                 /* Release the temp buffer */
837                 this_cpu_dec(trace_buffered_event_cnt);
838         } else
839                 ring_buffer_unlock_commit(buffer, event);
840 }
841
842 /**
843  * __trace_puts - write a constant string into the trace buffer.
844  * @ip:    The address of the caller
845  * @str:   The constant string to write
846  * @size:  The size of the string.
847  */
848 int __trace_puts(unsigned long ip, const char *str, int size)
849 {
850         struct ring_buffer_event *event;
851         struct ring_buffer *buffer;
852         struct print_entry *entry;
853         unsigned long irq_flags;
854         int alloc;
855         int pc;
856
857         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
858                 return 0;
859
860         pc = preempt_count();
861
862         if (unlikely(tracing_selftest_running || tracing_disabled))
863                 return 0;
864
865         alloc = sizeof(*entry) + size + 2; /* possible \n added */
866
867         local_save_flags(irq_flags);
868         buffer = global_trace.trace_buffer.buffer;
869         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
870                                             irq_flags, pc);
871         if (!event)
872                 return 0;
873
874         entry = ring_buffer_event_data(event);
875         entry->ip = ip;
876
877         memcpy(&entry->buf, str, size);
878
879         /* Add a newline if necessary */
880         if (entry->buf[size - 1] != '\n') {
881                 entry->buf[size] = '\n';
882                 entry->buf[size + 1] = '\0';
883         } else
884                 entry->buf[size] = '\0';
885
886         __buffer_unlock_commit(buffer, event);
887         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
888
889         return size;
890 }
891 EXPORT_SYMBOL_GPL(__trace_puts);
892
893 /**
894  * __trace_bputs - write the pointer to a constant string into trace buffer
895  * @ip:    The address of the caller
896  * @str:   The constant string to write to the buffer to
897  */
898 int __trace_bputs(unsigned long ip, const char *str)
899 {
900         struct ring_buffer_event *event;
901         struct ring_buffer *buffer;
902         struct bputs_entry *entry;
903         unsigned long irq_flags;
904         int size = sizeof(struct bputs_entry);
905         int pc;
906
907         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
908                 return 0;
909
910         pc = preempt_count();
911
912         if (unlikely(tracing_selftest_running || tracing_disabled))
913                 return 0;
914
915         local_save_flags(irq_flags);
916         buffer = global_trace.trace_buffer.buffer;
917         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
918                                             irq_flags, pc);
919         if (!event)
920                 return 0;
921
922         entry = ring_buffer_event_data(event);
923         entry->ip                       = ip;
924         entry->str                      = str;
925
926         __buffer_unlock_commit(buffer, event);
927         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
928
929         return 1;
930 }
931 EXPORT_SYMBOL_GPL(__trace_bputs);
932
933 #ifdef CONFIG_TRACER_SNAPSHOT
934 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
935 {
936         struct tracer *tracer = tr->current_trace;
937         unsigned long flags;
938
939         if (in_nmi()) {
940                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
941                 internal_trace_puts("*** snapshot is being ignored        ***\n");
942                 return;
943         }
944
945         if (!tr->allocated_snapshot) {
946                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
947                 internal_trace_puts("*** stopping trace here!   ***\n");
948                 tracing_off();
949                 return;
950         }
951
952         /* Note, snapshot can not be used when the tracer uses it */
953         if (tracer->use_max_tr) {
954                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
955                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
956                 return;
957         }
958
959         local_irq_save(flags);
960         update_max_tr(tr, current, smp_processor_id(), cond_data);
961         local_irq_restore(flags);
962 }
963
964 void tracing_snapshot_instance(struct trace_array *tr)
965 {
966         tracing_snapshot_instance_cond(tr, NULL);
967 }
968
969 /**
970  * tracing_snapshot - take a snapshot of the current buffer.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  *
976  * Note, make sure to allocate the snapshot with either
977  * a tracing_snapshot_alloc(), or by doing it manually
978  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
979  *
980  * If the snapshot buffer is not allocated, it will stop tracing.
981  * Basically making a permanent snapshot.
982  */
983 void tracing_snapshot(void)
984 {
985         struct trace_array *tr = &global_trace;
986
987         tracing_snapshot_instance(tr);
988 }
989 EXPORT_SYMBOL_GPL(tracing_snapshot);
990
991 /**
992  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
993  * @tr:         The tracing instance to snapshot
994  * @cond_data:  The data to be tested conditionally, and possibly saved
995  *
996  * This is the same as tracing_snapshot() except that the snapshot is
997  * conditional - the snapshot will only happen if the
998  * cond_snapshot.update() implementation receiving the cond_data
999  * returns true, which means that the trace array's cond_snapshot
1000  * update() operation used the cond_data to determine whether the
1001  * snapshot should be taken, and if it was, presumably saved it along
1002  * with the snapshot.
1003  */
1004 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1005 {
1006         tracing_snapshot_instance_cond(tr, cond_data);
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1009
1010 /**
1011  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1012  * @tr:         The tracing instance
1013  *
1014  * When the user enables a conditional snapshot using
1015  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1016  * with the snapshot.  This accessor is used to retrieve it.
1017  *
1018  * Should not be called from cond_snapshot.update(), since it takes
1019  * the tr->max_lock lock, which the code calling
1020  * cond_snapshot.update() has already done.
1021  *
1022  * Returns the cond_data associated with the trace array's snapshot.
1023  */
1024 void *tracing_cond_snapshot_data(struct trace_array *tr)
1025 {
1026         void *cond_data = NULL;
1027
1028         arch_spin_lock(&tr->max_lock);
1029
1030         if (tr->cond_snapshot)
1031                 cond_data = tr->cond_snapshot->cond_data;
1032
1033         arch_spin_unlock(&tr->max_lock);
1034
1035         return cond_data;
1036 }
1037 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1038
1039 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1040                                         struct trace_buffer *size_buf, int cpu_id);
1041 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1042
1043 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1044 {
1045         int ret;
1046
1047         if (!tr->allocated_snapshot) {
1048
1049                 /* allocate spare buffer */
1050                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1051                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1052                 if (ret < 0)
1053                         return ret;
1054
1055                 tr->allocated_snapshot = true;
1056         }
1057
1058         return 0;
1059 }
1060
1061 static void free_snapshot(struct trace_array *tr)
1062 {
1063         /*
1064          * We don't free the ring buffer. instead, resize it because
1065          * The max_tr ring buffer has some state (e.g. ring->clock) and
1066          * we want preserve it.
1067          */
1068         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1069         set_buffer_entries(&tr->max_buffer, 1);
1070         tracing_reset_online_cpus(&tr->max_buffer);
1071         tr->allocated_snapshot = false;
1072 }
1073
1074 /**
1075  * tracing_alloc_snapshot - allocate snapshot buffer.
1076  *
1077  * This only allocates the snapshot buffer if it isn't already
1078  * allocated - it doesn't also take a snapshot.
1079  *
1080  * This is meant to be used in cases where the snapshot buffer needs
1081  * to be set up for events that can't sleep but need to be able to
1082  * trigger a snapshot.
1083  */
1084 int tracing_alloc_snapshot(void)
1085 {
1086         struct trace_array *tr = &global_trace;
1087         int ret;
1088
1089         ret = tracing_alloc_snapshot_instance(tr);
1090         WARN_ON(ret < 0);
1091
1092         return ret;
1093 }
1094 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1095
1096 /**
1097  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1098  *
1099  * This is similar to tracing_snapshot(), but it will allocate the
1100  * snapshot buffer if it isn't already allocated. Use this only
1101  * where it is safe to sleep, as the allocation may sleep.
1102  *
1103  * This causes a swap between the snapshot buffer and the current live
1104  * tracing buffer. You can use this to take snapshots of the live
1105  * trace when some condition is triggered, but continue to trace.
1106  */
1107 void tracing_snapshot_alloc(void)
1108 {
1109         int ret;
1110
1111         ret = tracing_alloc_snapshot();
1112         if (ret < 0)
1113                 return;
1114
1115         tracing_snapshot();
1116 }
1117 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1118
1119 /**
1120  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1121  * @tr:         The tracing instance
1122  * @cond_data:  User data to associate with the snapshot
1123  * @update:     Implementation of the cond_snapshot update function
1124  *
1125  * Check whether the conditional snapshot for the given instance has
1126  * already been enabled, or if the current tracer is already using a
1127  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1128  * save the cond_data and update function inside.
1129  *
1130  * Returns 0 if successful, error otherwise.
1131  */
1132 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1133                                  cond_update_fn_t update)
1134 {
1135         struct cond_snapshot *cond_snapshot;
1136         int ret = 0;
1137
1138         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1139         if (!cond_snapshot)
1140                 return -ENOMEM;
1141
1142         cond_snapshot->cond_data = cond_data;
1143         cond_snapshot->update = update;
1144
1145         mutex_lock(&trace_types_lock);
1146
1147         ret = tracing_alloc_snapshot_instance(tr);
1148         if (ret)
1149                 goto fail_unlock;
1150
1151         if (tr->current_trace->use_max_tr) {
1152                 ret = -EBUSY;
1153                 goto fail_unlock;
1154         }
1155
1156         /*
1157          * The cond_snapshot can only change to NULL without the
1158          * trace_types_lock. We don't care if we race with it going
1159          * to NULL, but we want to make sure that it's not set to
1160          * something other than NULL when we get here, which we can
1161          * do safely with only holding the trace_types_lock and not
1162          * having to take the max_lock.
1163          */
1164         if (tr->cond_snapshot) {
1165                 ret = -EBUSY;
1166                 goto fail_unlock;
1167         }
1168
1169         arch_spin_lock(&tr->max_lock);
1170         tr->cond_snapshot = cond_snapshot;
1171         arch_spin_unlock(&tr->max_lock);
1172
1173         mutex_unlock(&trace_types_lock);
1174
1175         return ret;
1176
1177  fail_unlock:
1178         mutex_unlock(&trace_types_lock);
1179         kfree(cond_snapshot);
1180         return ret;
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1183
1184 /**
1185  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1186  * @tr:         The tracing instance
1187  *
1188  * Check whether the conditional snapshot for the given instance is
1189  * enabled; if so, free the cond_snapshot associated with it,
1190  * otherwise return -EINVAL.
1191  *
1192  * Returns 0 if successful, error otherwise.
1193  */
1194 int tracing_snapshot_cond_disable(struct trace_array *tr)
1195 {
1196         int ret = 0;
1197
1198         arch_spin_lock(&tr->max_lock);
1199
1200         if (!tr->cond_snapshot)
1201                 ret = -EINVAL;
1202         else {
1203                 kfree(tr->cond_snapshot);
1204                 tr->cond_snapshot = NULL;
1205         }
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return ret;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1212 #else
1213 void tracing_snapshot(void)
1214 {
1215         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1216 }
1217 EXPORT_SYMBOL_GPL(tracing_snapshot);
1218 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1219 {
1220         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1221 }
1222 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1223 int tracing_alloc_snapshot(void)
1224 {
1225         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1226         return -ENODEV;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1229 void tracing_snapshot_alloc(void)
1230 {
1231         /* Give warning */
1232         tracing_snapshot();
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1235 void *tracing_cond_snapshot_data(struct trace_array *tr)
1236 {
1237         return NULL;
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1240 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1241 {
1242         return -ENODEV;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1245 int tracing_snapshot_cond_disable(struct trace_array *tr)
1246 {
1247         return false;
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1250 #endif /* CONFIG_TRACER_SNAPSHOT */
1251
1252 void tracer_tracing_off(struct trace_array *tr)
1253 {
1254         if (tr->trace_buffer.buffer)
1255                 ring_buffer_record_off(tr->trace_buffer.buffer);
1256         /*
1257          * This flag is looked at when buffers haven't been allocated
1258          * yet, or by some tracers (like irqsoff), that just want to
1259          * know if the ring buffer has been disabled, but it can handle
1260          * races of where it gets disabled but we still do a record.
1261          * As the check is in the fast path of the tracers, it is more
1262          * important to be fast than accurate.
1263          */
1264         tr->buffer_disabled = 1;
1265         /* Make the flag seen by readers */
1266         smp_wmb();
1267 }
1268
1269 /**
1270  * tracing_off - turn off tracing buffers
1271  *
1272  * This function stops the tracing buffers from recording data.
1273  * It does not disable any overhead the tracers themselves may
1274  * be causing. This function simply causes all recording to
1275  * the ring buffers to fail.
1276  */
1277 void tracing_off(void)
1278 {
1279         tracer_tracing_off(&global_trace);
1280 }
1281 EXPORT_SYMBOL_GPL(tracing_off);
1282
1283 void disable_trace_on_warning(void)
1284 {
1285         if (__disable_trace_on_warning)
1286                 tracing_off();
1287 }
1288
1289 /**
1290  * tracer_tracing_is_on - show real state of ring buffer enabled
1291  * @tr : the trace array to know if ring buffer is enabled
1292  *
1293  * Shows real state of the ring buffer if it is enabled or not.
1294  */
1295 bool tracer_tracing_is_on(struct trace_array *tr)
1296 {
1297         if (tr->trace_buffer.buffer)
1298                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1299         return !tr->buffer_disabled;
1300 }
1301
1302 /**
1303  * tracing_is_on - show state of ring buffers enabled
1304  */
1305 int tracing_is_on(void)
1306 {
1307         return tracer_tracing_is_on(&global_trace);
1308 }
1309 EXPORT_SYMBOL_GPL(tracing_is_on);
1310
1311 static int __init set_buf_size(char *str)
1312 {
1313         unsigned long buf_size;
1314
1315         if (!str)
1316                 return 0;
1317         buf_size = memparse(str, &str);
1318         /* nr_entries can not be zero */
1319         if (buf_size == 0)
1320                 return 0;
1321         trace_buf_size = buf_size;
1322         return 1;
1323 }
1324 __setup("trace_buf_size=", set_buf_size);
1325
1326 static int __init set_tracing_thresh(char *str)
1327 {
1328         unsigned long threshold;
1329         int ret;
1330
1331         if (!str)
1332                 return 0;
1333         ret = kstrtoul(str, 0, &threshold);
1334         if (ret < 0)
1335                 return 0;
1336         tracing_thresh = threshold * 1000;
1337         return 1;
1338 }
1339 __setup("tracing_thresh=", set_tracing_thresh);
1340
1341 unsigned long nsecs_to_usecs(unsigned long nsecs)
1342 {
1343         return nsecs / 1000;
1344 }
1345
1346 /*
1347  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1348  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1349  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1350  * of strings in the order that the evals (enum) were defined.
1351  */
1352 #undef C
1353 #define C(a, b) b
1354
1355 /* These must match the bit postions in trace_iterator_flags */
1356 static const char *trace_options[] = {
1357         TRACE_FLAGS
1358         NULL
1359 };
1360
1361 static struct {
1362         u64 (*func)(void);
1363         const char *name;
1364         int in_ns;              /* is this clock in nanoseconds? */
1365 } trace_clocks[] = {
1366         { trace_clock_local,            "local",        1 },
1367         { trace_clock_global,           "global",       1 },
1368         { trace_clock_counter,          "counter",      0 },
1369         { trace_clock_jiffies,          "uptime",       0 },
1370         { trace_clock,                  "perf",         1 },
1371         { ktime_get_mono_fast_ns,       "mono",         1 },
1372         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1373         { ktime_get_boot_fast_ns,       "boot",         1 },
1374         ARCH_TRACE_CLOCKS
1375 };
1376
1377 bool trace_clock_in_ns(struct trace_array *tr)
1378 {
1379         if (trace_clocks[tr->clock_id].in_ns)
1380                 return true;
1381
1382         return false;
1383 }
1384
1385 /*
1386  * trace_parser_get_init - gets the buffer for trace parser
1387  */
1388 int trace_parser_get_init(struct trace_parser *parser, int size)
1389 {
1390         memset(parser, 0, sizeof(*parser));
1391
1392         parser->buffer = kmalloc(size, GFP_KERNEL);
1393         if (!parser->buffer)
1394                 return 1;
1395
1396         parser->size = size;
1397         return 0;
1398 }
1399
1400 /*
1401  * trace_parser_put - frees the buffer for trace parser
1402  */
1403 void trace_parser_put(struct trace_parser *parser)
1404 {
1405         kfree(parser->buffer);
1406         parser->buffer = NULL;
1407 }
1408
1409 /*
1410  * trace_get_user - reads the user input string separated by  space
1411  * (matched by isspace(ch))
1412  *
1413  * For each string found the 'struct trace_parser' is updated,
1414  * and the function returns.
1415  *
1416  * Returns number of bytes read.
1417  *
1418  * See kernel/trace/trace.h for 'struct trace_parser' details.
1419  */
1420 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1421         size_t cnt, loff_t *ppos)
1422 {
1423         char ch;
1424         size_t read = 0;
1425         ssize_t ret;
1426
1427         if (!*ppos)
1428                 trace_parser_clear(parser);
1429
1430         ret = get_user(ch, ubuf++);
1431         if (ret)
1432                 goto out;
1433
1434         read++;
1435         cnt--;
1436
1437         /*
1438          * The parser is not finished with the last write,
1439          * continue reading the user input without skipping spaces.
1440          */
1441         if (!parser->cont) {
1442                 /* skip white space */
1443                 while (cnt && isspace(ch)) {
1444                         ret = get_user(ch, ubuf++);
1445                         if (ret)
1446                                 goto out;
1447                         read++;
1448                         cnt--;
1449                 }
1450
1451                 parser->idx = 0;
1452
1453                 /* only spaces were written */
1454                 if (isspace(ch) || !ch) {
1455                         *ppos += read;
1456                         ret = read;
1457                         goto out;
1458                 }
1459         }
1460
1461         /* read the non-space input */
1462         while (cnt && !isspace(ch) && ch) {
1463                 if (parser->idx < parser->size - 1)
1464                         parser->buffer[parser->idx++] = ch;
1465                 else {
1466                         ret = -EINVAL;
1467                         goto out;
1468                 }
1469                 ret = get_user(ch, ubuf++);
1470                 if (ret)
1471                         goto out;
1472                 read++;
1473                 cnt--;
1474         }
1475
1476         /* We either got finished input or we have to wait for another call. */
1477         if (isspace(ch) || !ch) {
1478                 parser->buffer[parser->idx] = 0;
1479                 parser->cont = false;
1480         } else if (parser->idx < parser->size - 1) {
1481                 parser->cont = true;
1482                 parser->buffer[parser->idx++] = ch;
1483                 /* Make sure the parsed string always terminates with '\0'. */
1484                 parser->buffer[parser->idx] = 0;
1485         } else {
1486                 ret = -EINVAL;
1487                 goto out;
1488         }
1489
1490         *ppos += read;
1491         ret = read;
1492
1493 out:
1494         return ret;
1495 }
1496
1497 /* TODO add a seq_buf_to_buffer() */
1498 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1499 {
1500         int len;
1501
1502         if (trace_seq_used(s) <= s->seq.readpos)
1503                 return -EBUSY;
1504
1505         len = trace_seq_used(s) - s->seq.readpos;
1506         if (cnt > len)
1507                 cnt = len;
1508         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1509
1510         s->seq.readpos += cnt;
1511         return cnt;
1512 }
1513
1514 unsigned long __read_mostly     tracing_thresh;
1515 static const struct file_operations tracing_max_lat_fops;
1516
1517 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1518         defined(CONFIG_FSNOTIFY)
1519
1520 static struct workqueue_struct *fsnotify_wq;
1521
1522 static void latency_fsnotify_workfn(struct work_struct *work)
1523 {
1524         struct trace_array *tr = container_of(work, struct trace_array,
1525                                               fsnotify_work);
1526         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1527                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1528 }
1529
1530 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1531 {
1532         struct trace_array *tr = container_of(iwork, struct trace_array,
1533                                               fsnotify_irqwork);
1534         queue_work(fsnotify_wq, &tr->fsnotify_work);
1535 }
1536
1537 static void trace_create_maxlat_file(struct trace_array *tr,
1538                                      struct dentry *d_tracer)
1539 {
1540         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1543                                               d_tracer, &tr->max_latency,
1544                                               &tracing_max_lat_fops);
1545 }
1546
1547 __init static int latency_fsnotify_init(void)
1548 {
1549         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1550                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1551         if (!fsnotify_wq) {
1552                 pr_err("Unable to allocate tr_max_lat_wq\n");
1553                 return -ENOMEM;
1554         }
1555         return 0;
1556 }
1557
1558 late_initcall_sync(latency_fsnotify_init);
1559
1560 void latency_fsnotify(struct trace_array *tr)
1561 {
1562         if (!fsnotify_wq)
1563                 return;
1564         /*
1565          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1566          * possible that we are called from __schedule() or do_idle(), which
1567          * could cause a deadlock.
1568          */
1569         irq_work_queue(&tr->fsnotify_irqwork);
1570 }
1571
1572 /*
1573  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1574  *  defined(CONFIG_FSNOTIFY)
1575  */
1576 #else
1577
1578 #define trace_create_maxlat_file(tr, d_tracer)                          \
1579         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1580                           &tr->max_latency, &tracing_max_lat_fops)
1581
1582 #endif
1583
1584 #ifdef CONFIG_TRACER_MAX_TRACE
1585 /*
1586  * Copy the new maximum trace into the separate maximum-trace
1587  * structure. (this way the maximum trace is permanently saved,
1588  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1589  */
1590 static void
1591 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1592 {
1593         struct trace_buffer *trace_buf = &tr->trace_buffer;
1594         struct trace_buffer *max_buf = &tr->max_buffer;
1595         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1596         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1597
1598         max_buf->cpu = cpu;
1599         max_buf->time_start = data->preempt_timestamp;
1600
1601         max_data->saved_latency = tr->max_latency;
1602         max_data->critical_start = data->critical_start;
1603         max_data->critical_end = data->critical_end;
1604
1605         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1606         max_data->pid = tsk->pid;
1607         /*
1608          * If tsk == current, then use current_uid(), as that does not use
1609          * RCU. The irq tracer can be called out of RCU scope.
1610          */
1611         if (tsk == current)
1612                 max_data->uid = current_uid();
1613         else
1614                 max_data->uid = task_uid(tsk);
1615
1616         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1617         max_data->policy = tsk->policy;
1618         max_data->rt_priority = tsk->rt_priority;
1619
1620         /* record this tasks comm */
1621         tracing_record_cmdline(tsk);
1622         latency_fsnotify(tr);
1623 }
1624
1625 /**
1626  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1627  * @tr: tracer
1628  * @tsk: the task with the latency
1629  * @cpu: The cpu that initiated the trace.
1630  * @cond_data: User data associated with a conditional snapshot
1631  *
1632  * Flip the buffers between the @tr and the max_tr and record information
1633  * about which task was the cause of this latency.
1634  */
1635 void
1636 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1637               void *cond_data)
1638 {
1639         if (tr->stop_count)
1640                 return;
1641
1642         WARN_ON_ONCE(!irqs_disabled());
1643
1644         if (!tr->allocated_snapshot) {
1645                 /* Only the nop tracer should hit this when disabling */
1646                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1647                 return;
1648         }
1649
1650         arch_spin_lock(&tr->max_lock);
1651
1652         /* Inherit the recordable setting from trace_buffer */
1653         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1654                 ring_buffer_record_on(tr->max_buffer.buffer);
1655         else
1656                 ring_buffer_record_off(tr->max_buffer.buffer);
1657
1658 #ifdef CONFIG_TRACER_SNAPSHOT
1659         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1660                 goto out_unlock;
1661 #endif
1662         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1663
1664         __update_max_tr(tr, tsk, cpu);
1665
1666  out_unlock:
1667         arch_spin_unlock(&tr->max_lock);
1668 }
1669
1670 /**
1671  * update_max_tr_single - only copy one trace over, and reset the rest
1672  * @tr: tracer
1673  * @tsk: task with the latency
1674  * @cpu: the cpu of the buffer to copy.
1675  *
1676  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1677  */
1678 void
1679 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1680 {
1681         int ret;
1682
1683         if (tr->stop_count)
1684                 return;
1685
1686         WARN_ON_ONCE(!irqs_disabled());
1687         if (!tr->allocated_snapshot) {
1688                 /* Only the nop tracer should hit this when disabling */
1689                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1690                 return;
1691         }
1692
1693         arch_spin_lock(&tr->max_lock);
1694
1695         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1696
1697         if (ret == -EBUSY) {
1698                 /*
1699                  * We failed to swap the buffer due to a commit taking
1700                  * place on this CPU. We fail to record, but we reset
1701                  * the max trace buffer (no one writes directly to it)
1702                  * and flag that it failed.
1703                  */
1704                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1705                         "Failed to swap buffers due to commit in progress\n");
1706         }
1707
1708         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1709
1710         __update_max_tr(tr, tsk, cpu);
1711         arch_spin_unlock(&tr->max_lock);
1712 }
1713 #endif /* CONFIG_TRACER_MAX_TRACE */
1714
1715 static int wait_on_pipe(struct trace_iterator *iter, int full)
1716 {
1717         /* Iterators are static, they should be filled or empty */
1718         if (trace_buffer_iter(iter, iter->cpu_file))
1719                 return 0;
1720
1721         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1722                                 full);
1723 }
1724
1725 #ifdef CONFIG_FTRACE_STARTUP_TEST
1726 static bool selftests_can_run;
1727
1728 struct trace_selftests {
1729         struct list_head                list;
1730         struct tracer                   *type;
1731 };
1732
1733 static LIST_HEAD(postponed_selftests);
1734
1735 static int save_selftest(struct tracer *type)
1736 {
1737         struct trace_selftests *selftest;
1738
1739         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1740         if (!selftest)
1741                 return -ENOMEM;
1742
1743         selftest->type = type;
1744         list_add(&selftest->list, &postponed_selftests);
1745         return 0;
1746 }
1747
1748 static int run_tracer_selftest(struct tracer *type)
1749 {
1750         struct trace_array *tr = &global_trace;
1751         struct tracer *saved_tracer = tr->current_trace;
1752         int ret;
1753
1754         if (!type->selftest || tracing_selftest_disabled)
1755                 return 0;
1756
1757         /*
1758          * If a tracer registers early in boot up (before scheduling is
1759          * initialized and such), then do not run its selftests yet.
1760          * Instead, run it a little later in the boot process.
1761          */
1762         if (!selftests_can_run)
1763                 return save_selftest(type);
1764
1765         /*
1766          * Run a selftest on this tracer.
1767          * Here we reset the trace buffer, and set the current
1768          * tracer to be this tracer. The tracer can then run some
1769          * internal tracing to verify that everything is in order.
1770          * If we fail, we do not register this tracer.
1771          */
1772         tracing_reset_online_cpus(&tr->trace_buffer);
1773
1774         tr->current_trace = type;
1775
1776 #ifdef CONFIG_TRACER_MAX_TRACE
1777         if (type->use_max_tr) {
1778                 /* If we expanded the buffers, make sure the max is expanded too */
1779                 if (ring_buffer_expanded)
1780                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1781                                            RING_BUFFER_ALL_CPUS);
1782                 tr->allocated_snapshot = true;
1783         }
1784 #endif
1785
1786         /* the test is responsible for initializing and enabling */
1787         pr_info("Testing tracer %s: ", type->name);
1788         ret = type->selftest(type, tr);
1789         /* the test is responsible for resetting too */
1790         tr->current_trace = saved_tracer;
1791         if (ret) {
1792                 printk(KERN_CONT "FAILED!\n");
1793                 /* Add the warning after printing 'FAILED' */
1794                 WARN_ON(1);
1795                 return -1;
1796         }
1797         /* Only reset on passing, to avoid touching corrupted buffers */
1798         tracing_reset_online_cpus(&tr->trace_buffer);
1799
1800 #ifdef CONFIG_TRACER_MAX_TRACE
1801         if (type->use_max_tr) {
1802                 tr->allocated_snapshot = false;
1803
1804                 /* Shrink the max buffer again */
1805                 if (ring_buffer_expanded)
1806                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1807                                            RING_BUFFER_ALL_CPUS);
1808         }
1809 #endif
1810
1811         printk(KERN_CONT "PASSED\n");
1812         return 0;
1813 }
1814
1815 static __init int init_trace_selftests(void)
1816 {
1817         struct trace_selftests *p, *n;
1818         struct tracer *t, **last;
1819         int ret;
1820
1821         selftests_can_run = true;
1822
1823         mutex_lock(&trace_types_lock);
1824
1825         if (list_empty(&postponed_selftests))
1826                 goto out;
1827
1828         pr_info("Running postponed tracer tests:\n");
1829
1830         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1831                 /* This loop can take minutes when sanitizers are enabled, so
1832                  * lets make sure we allow RCU processing.
1833                  */
1834                 cond_resched();
1835                 ret = run_tracer_selftest(p->type);
1836                 /* If the test fails, then warn and remove from available_tracers */
1837                 if (ret < 0) {
1838                         WARN(1, "tracer: %s failed selftest, disabling\n",
1839                              p->type->name);
1840                         last = &trace_types;
1841                         for (t = trace_types; t; t = t->next) {
1842                                 if (t == p->type) {
1843                                         *last = t->next;
1844                                         break;
1845                                 }
1846                                 last = &t->next;
1847                         }
1848                 }
1849                 list_del(&p->list);
1850                 kfree(p);
1851         }
1852
1853  out:
1854         mutex_unlock(&trace_types_lock);
1855
1856         return 0;
1857 }
1858 core_initcall(init_trace_selftests);
1859 #else
1860 static inline int run_tracer_selftest(struct tracer *type)
1861 {
1862         return 0;
1863 }
1864 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1865
1866 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1867
1868 static void __init apply_trace_boot_options(void);
1869
1870 /**
1871  * register_tracer - register a tracer with the ftrace system.
1872  * @type: the plugin for the tracer
1873  *
1874  * Register a new plugin tracer.
1875  */
1876 int __init register_tracer(struct tracer *type)
1877 {
1878         struct tracer *t;
1879         int ret = 0;
1880
1881         if (!type->name) {
1882                 pr_info("Tracer must have a name\n");
1883                 return -1;
1884         }
1885
1886         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1887                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1888                 return -1;
1889         }
1890
1891         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1892                 pr_warn("Can not register tracer %s due to lockdown\n",
1893                            type->name);
1894                 return -EPERM;
1895         }
1896
1897         mutex_lock(&trace_types_lock);
1898
1899         tracing_selftest_running = true;
1900
1901         for (t = trace_types; t; t = t->next) {
1902                 if (strcmp(type->name, t->name) == 0) {
1903                         /* already found */
1904                         pr_info("Tracer %s already registered\n",
1905                                 type->name);
1906                         ret = -1;
1907                         goto out;
1908                 }
1909         }
1910
1911         if (!type->set_flag)
1912                 type->set_flag = &dummy_set_flag;
1913         if (!type->flags) {
1914                 /*allocate a dummy tracer_flags*/
1915                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1916                 if (!type->flags) {
1917                         ret = -ENOMEM;
1918                         goto out;
1919                 }
1920                 type->flags->val = 0;
1921                 type->flags->opts = dummy_tracer_opt;
1922         } else
1923                 if (!type->flags->opts)
1924                         type->flags->opts = dummy_tracer_opt;
1925
1926         /* store the tracer for __set_tracer_option */
1927         type->flags->trace = type;
1928
1929         ret = run_tracer_selftest(type);
1930         if (ret < 0)
1931                 goto out;
1932
1933         type->next = trace_types;
1934         trace_types = type;
1935         add_tracer_options(&global_trace, type);
1936
1937  out:
1938         tracing_selftest_running = false;
1939         mutex_unlock(&trace_types_lock);
1940
1941         if (ret || !default_bootup_tracer)
1942                 goto out_unlock;
1943
1944         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1945                 goto out_unlock;
1946
1947         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1948         /* Do we want this tracer to start on bootup? */
1949         tracing_set_tracer(&global_trace, type->name);
1950         default_bootup_tracer = NULL;
1951
1952         apply_trace_boot_options();
1953
1954         /* disable other selftests, since this will break it. */
1955         tracing_selftest_disabled = true;
1956 #ifdef CONFIG_FTRACE_STARTUP_TEST
1957         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1958                type->name);
1959 #endif
1960
1961  out_unlock:
1962         return ret;
1963 }
1964
1965 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1966 {
1967         struct ring_buffer *buffer = buf->buffer;
1968
1969         if (!buffer)
1970                 return;
1971
1972         ring_buffer_record_disable(buffer);
1973
1974         /* Make sure all commits have finished */
1975         synchronize_rcu();
1976         ring_buffer_reset_cpu(buffer, cpu);
1977
1978         ring_buffer_record_enable(buffer);
1979 }
1980
1981 void tracing_reset_online_cpus(struct trace_buffer *buf)
1982 {
1983         struct ring_buffer *buffer = buf->buffer;
1984         int cpu;
1985
1986         if (!buffer)
1987                 return;
1988
1989         ring_buffer_record_disable(buffer);
1990
1991         /* Make sure all commits have finished */
1992         synchronize_rcu();
1993
1994         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1995
1996         for_each_online_cpu(cpu)
1997                 ring_buffer_reset_cpu(buffer, cpu);
1998
1999         ring_buffer_record_enable(buffer);
2000 }
2001
2002 /* Must have trace_types_lock held */
2003 void tracing_reset_all_online_cpus(void)
2004 {
2005         struct trace_array *tr;
2006
2007         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2008                 if (!tr->clear_trace)
2009                         continue;
2010                 tr->clear_trace = false;
2011                 tracing_reset_online_cpus(&tr->trace_buffer);
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013                 tracing_reset_online_cpus(&tr->max_buffer);
2014 #endif
2015         }
2016 }
2017
2018 static int *tgid_map;
2019
2020 #define SAVED_CMDLINES_DEFAULT 128
2021 #define NO_CMDLINE_MAP UINT_MAX
2022 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2023 struct saved_cmdlines_buffer {
2024         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2025         unsigned *map_cmdline_to_pid;
2026         unsigned cmdline_num;
2027         int cmdline_idx;
2028         char *saved_cmdlines;
2029 };
2030 static struct saved_cmdlines_buffer *savedcmd;
2031
2032 /* temporary disable recording */
2033 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2034
2035 static inline char *get_saved_cmdlines(int idx)
2036 {
2037         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2038 }
2039
2040 static inline void set_cmdline(int idx, const char *cmdline)
2041 {
2042         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2043 }
2044
2045 static int allocate_cmdlines_buffer(unsigned int val,
2046                                     struct saved_cmdlines_buffer *s)
2047 {
2048         s->map_cmdline_to_pid = kmalloc_array(val,
2049                                               sizeof(*s->map_cmdline_to_pid),
2050                                               GFP_KERNEL);
2051         if (!s->map_cmdline_to_pid)
2052                 return -ENOMEM;
2053
2054         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2055         if (!s->saved_cmdlines) {
2056                 kfree(s->map_cmdline_to_pid);
2057                 return -ENOMEM;
2058         }
2059
2060         s->cmdline_idx = 0;
2061         s->cmdline_num = val;
2062         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2063                sizeof(s->map_pid_to_cmdline));
2064         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2065                val * sizeof(*s->map_cmdline_to_pid));
2066
2067         return 0;
2068 }
2069
2070 static int trace_create_savedcmd(void)
2071 {
2072         int ret;
2073
2074         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2075         if (!savedcmd)
2076                 return -ENOMEM;
2077
2078         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2079         if (ret < 0) {
2080                 kfree(savedcmd);
2081                 savedcmd = NULL;
2082                 return -ENOMEM;
2083         }
2084
2085         return 0;
2086 }
2087
2088 int is_tracing_stopped(void)
2089 {
2090         return global_trace.stop_count;
2091 }
2092
2093 /**
2094  * tracing_start - quick start of the tracer
2095  *
2096  * If tracing is enabled but was stopped by tracing_stop,
2097  * this will start the tracer back up.
2098  */
2099 void tracing_start(void)
2100 {
2101         struct ring_buffer *buffer;
2102         unsigned long flags;
2103
2104         if (tracing_disabled)
2105                 return;
2106
2107         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2108         if (--global_trace.stop_count) {
2109                 if (global_trace.stop_count < 0) {
2110                         /* Someone screwed up their debugging */
2111                         WARN_ON_ONCE(1);
2112                         global_trace.stop_count = 0;
2113                 }
2114                 goto out;
2115         }
2116
2117         /* Prevent the buffers from switching */
2118         arch_spin_lock(&global_trace.max_lock);
2119
2120         buffer = global_trace.trace_buffer.buffer;
2121         if (buffer)
2122                 ring_buffer_record_enable(buffer);
2123
2124 #ifdef CONFIG_TRACER_MAX_TRACE
2125         buffer = global_trace.max_buffer.buffer;
2126         if (buffer)
2127                 ring_buffer_record_enable(buffer);
2128 #endif
2129
2130         arch_spin_unlock(&global_trace.max_lock);
2131
2132  out:
2133         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2134 }
2135
2136 static void tracing_start_tr(struct trace_array *tr)
2137 {
2138         struct ring_buffer *buffer;
2139         unsigned long flags;
2140
2141         if (tracing_disabled)
2142                 return;
2143
2144         /* If global, we need to also start the max tracer */
2145         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2146                 return tracing_start();
2147
2148         raw_spin_lock_irqsave(&tr->start_lock, flags);
2149
2150         if (--tr->stop_count) {
2151                 if (tr->stop_count < 0) {
2152                         /* Someone screwed up their debugging */
2153                         WARN_ON_ONCE(1);
2154                         tr->stop_count = 0;
2155                 }
2156                 goto out;
2157         }
2158
2159         buffer = tr->trace_buffer.buffer;
2160         if (buffer)
2161                 ring_buffer_record_enable(buffer);
2162
2163  out:
2164         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2165 }
2166
2167 /**
2168  * tracing_stop - quick stop of the tracer
2169  *
2170  * Light weight way to stop tracing. Use in conjunction with
2171  * tracing_start.
2172  */
2173 void tracing_stop(void)
2174 {
2175         struct ring_buffer *buffer;
2176         unsigned long flags;
2177
2178         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2179         if (global_trace.stop_count++)
2180                 goto out;
2181
2182         /* Prevent the buffers from switching */
2183         arch_spin_lock(&global_trace.max_lock);
2184
2185         buffer = global_trace.trace_buffer.buffer;
2186         if (buffer)
2187                 ring_buffer_record_disable(buffer);
2188
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190         buffer = global_trace.max_buffer.buffer;
2191         if (buffer)
2192                 ring_buffer_record_disable(buffer);
2193 #endif
2194
2195         arch_spin_unlock(&global_trace.max_lock);
2196
2197  out:
2198         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2199 }
2200
2201 static void tracing_stop_tr(struct trace_array *tr)
2202 {
2203         struct ring_buffer *buffer;
2204         unsigned long flags;
2205
2206         /* If global, we need to also stop the max tracer */
2207         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2208                 return tracing_stop();
2209
2210         raw_spin_lock_irqsave(&tr->start_lock, flags);
2211         if (tr->stop_count++)
2212                 goto out;
2213
2214         buffer = tr->trace_buffer.buffer;
2215         if (buffer)
2216                 ring_buffer_record_disable(buffer);
2217
2218  out:
2219         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2220 }
2221
2222 static int trace_save_cmdline(struct task_struct *tsk)
2223 {
2224         unsigned pid, idx;
2225
2226         /* treat recording of idle task as a success */
2227         if (!tsk->pid)
2228                 return 1;
2229
2230         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2231                 return 0;
2232
2233         /*
2234          * It's not the end of the world if we don't get
2235          * the lock, but we also don't want to spin
2236          * nor do we want to disable interrupts,
2237          * so if we miss here, then better luck next time.
2238          */
2239         if (!arch_spin_trylock(&trace_cmdline_lock))
2240                 return 0;
2241
2242         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2243         if (idx == NO_CMDLINE_MAP) {
2244                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2245
2246                 /*
2247                  * Check whether the cmdline buffer at idx has a pid
2248                  * mapped. We are going to overwrite that entry so we
2249                  * need to clear the map_pid_to_cmdline. Otherwise we
2250                  * would read the new comm for the old pid.
2251                  */
2252                 pid = savedcmd->map_cmdline_to_pid[idx];
2253                 if (pid != NO_CMDLINE_MAP)
2254                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2255
2256                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2257                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2258
2259                 savedcmd->cmdline_idx = idx;
2260         }
2261
2262         set_cmdline(idx, tsk->comm);
2263
2264         arch_spin_unlock(&trace_cmdline_lock);
2265
2266         return 1;
2267 }
2268
2269 static void __trace_find_cmdline(int pid, char comm[])
2270 {
2271         unsigned map;
2272
2273         if (!pid) {
2274                 strcpy(comm, "<idle>");
2275                 return;
2276         }
2277
2278         if (WARN_ON_ONCE(pid < 0)) {
2279                 strcpy(comm, "<XXX>");
2280                 return;
2281         }
2282
2283         if (pid > PID_MAX_DEFAULT) {
2284                 strcpy(comm, "<...>");
2285                 return;
2286         }
2287
2288         map = savedcmd->map_pid_to_cmdline[pid];
2289         if (map != NO_CMDLINE_MAP)
2290                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2291         else
2292                 strcpy(comm, "<...>");
2293 }
2294
2295 void trace_find_cmdline(int pid, char comm[])
2296 {
2297         preempt_disable();
2298         arch_spin_lock(&trace_cmdline_lock);
2299
2300         __trace_find_cmdline(pid, comm);
2301
2302         arch_spin_unlock(&trace_cmdline_lock);
2303         preempt_enable();
2304 }
2305
2306 int trace_find_tgid(int pid)
2307 {
2308         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2309                 return 0;
2310
2311         return tgid_map[pid];
2312 }
2313
2314 static int trace_save_tgid(struct task_struct *tsk)
2315 {
2316         /* treat recording of idle task as a success */
2317         if (!tsk->pid)
2318                 return 1;
2319
2320         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2321                 return 0;
2322
2323         tgid_map[tsk->pid] = tsk->tgid;
2324         return 1;
2325 }
2326
2327 static bool tracing_record_taskinfo_skip(int flags)
2328 {
2329         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2330                 return true;
2331         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2332                 return true;
2333         if (!__this_cpu_read(trace_taskinfo_save))
2334                 return true;
2335         return false;
2336 }
2337
2338 /**
2339  * tracing_record_taskinfo - record the task info of a task
2340  *
2341  * @task:  task to record
2342  * @flags: TRACE_RECORD_CMDLINE for recording comm
2343  *         TRACE_RECORD_TGID for recording tgid
2344  */
2345 void tracing_record_taskinfo(struct task_struct *task, int flags)
2346 {
2347         bool done;
2348
2349         if (tracing_record_taskinfo_skip(flags))
2350                 return;
2351
2352         /*
2353          * Record as much task information as possible. If some fail, continue
2354          * to try to record the others.
2355          */
2356         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2357         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2358
2359         /* If recording any information failed, retry again soon. */
2360         if (!done)
2361                 return;
2362
2363         __this_cpu_write(trace_taskinfo_save, false);
2364 }
2365
2366 /**
2367  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2368  *
2369  * @prev: previous task during sched_switch
2370  * @next: next task during sched_switch
2371  * @flags: TRACE_RECORD_CMDLINE for recording comm
2372  *         TRACE_RECORD_TGID for recording tgid
2373  */
2374 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2375                                           struct task_struct *next, int flags)
2376 {
2377         bool done;
2378
2379         if (tracing_record_taskinfo_skip(flags))
2380                 return;
2381
2382         /*
2383          * Record as much task information as possible. If some fail, continue
2384          * to try to record the others.
2385          */
2386         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2387         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2388         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2389         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2390
2391         /* If recording any information failed, retry again soon. */
2392         if (!done)
2393                 return;
2394
2395         __this_cpu_write(trace_taskinfo_save, false);
2396 }
2397
2398 /* Helpers to record a specific task information */
2399 void tracing_record_cmdline(struct task_struct *task)
2400 {
2401         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2402 }
2403
2404 void tracing_record_tgid(struct task_struct *task)
2405 {
2406         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2407 }
2408
2409 /*
2410  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2411  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2412  * simplifies those functions and keeps them in sync.
2413  */
2414 enum print_line_t trace_handle_return(struct trace_seq *s)
2415 {
2416         return trace_seq_has_overflowed(s) ?
2417                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2418 }
2419 EXPORT_SYMBOL_GPL(trace_handle_return);
2420
2421 void
2422 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2423                              unsigned long flags, int pc)
2424 {
2425         struct task_struct *tsk = current;
2426
2427         entry->preempt_count            = pc & 0xff;
2428         entry->pid                      = (tsk) ? tsk->pid : 0;
2429         entry->type                     = type;
2430         entry->flags =
2431 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2432                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2433 #else
2434                 TRACE_FLAG_IRQS_NOSUPPORT |
2435 #endif
2436                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2437                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2438                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2439                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2440                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2441 }
2442 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2443
2444 struct ring_buffer_event *
2445 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2446                           int type,
2447                           unsigned long len,
2448                           unsigned long flags, int pc)
2449 {
2450         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2451 }
2452
2453 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2454 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2455 static int trace_buffered_event_ref;
2456
2457 /**
2458  * trace_buffered_event_enable - enable buffering events
2459  *
2460  * When events are being filtered, it is quicker to use a temporary
2461  * buffer to write the event data into if there's a likely chance
2462  * that it will not be committed. The discard of the ring buffer
2463  * is not as fast as committing, and is much slower than copying
2464  * a commit.
2465  *
2466  * When an event is to be filtered, allocate per cpu buffers to
2467  * write the event data into, and if the event is filtered and discarded
2468  * it is simply dropped, otherwise, the entire data is to be committed
2469  * in one shot.
2470  */
2471 void trace_buffered_event_enable(void)
2472 {
2473         struct ring_buffer_event *event;
2474         struct page *page;
2475         int cpu;
2476
2477         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2478
2479         if (trace_buffered_event_ref++)
2480                 return;
2481
2482         for_each_tracing_cpu(cpu) {
2483                 page = alloc_pages_node(cpu_to_node(cpu),
2484                                         GFP_KERNEL | __GFP_NORETRY, 0);
2485                 if (!page)
2486                         goto failed;
2487
2488                 event = page_address(page);
2489                 memset(event, 0, sizeof(*event));
2490
2491                 per_cpu(trace_buffered_event, cpu) = event;
2492
2493                 preempt_disable();
2494                 if (cpu == smp_processor_id() &&
2495                     this_cpu_read(trace_buffered_event) !=
2496                     per_cpu(trace_buffered_event, cpu))
2497                         WARN_ON_ONCE(1);
2498                 preempt_enable();
2499         }
2500
2501         return;
2502  failed:
2503         trace_buffered_event_disable();
2504 }
2505
2506 static void enable_trace_buffered_event(void *data)
2507 {
2508         /* Probably not needed, but do it anyway */
2509         smp_rmb();
2510         this_cpu_dec(trace_buffered_event_cnt);
2511 }
2512
2513 static void disable_trace_buffered_event(void *data)
2514 {
2515         this_cpu_inc(trace_buffered_event_cnt);
2516 }
2517
2518 /**
2519  * trace_buffered_event_disable - disable buffering events
2520  *
2521  * When a filter is removed, it is faster to not use the buffered
2522  * events, and to commit directly into the ring buffer. Free up
2523  * the temp buffers when there are no more users. This requires
2524  * special synchronization with current events.
2525  */
2526 void trace_buffered_event_disable(void)
2527 {
2528         int cpu;
2529
2530         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2531
2532         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2533                 return;
2534
2535         if (--trace_buffered_event_ref)
2536                 return;
2537
2538         preempt_disable();
2539         /* For each CPU, set the buffer as used. */
2540         smp_call_function_many(tracing_buffer_mask,
2541                                disable_trace_buffered_event, NULL, 1);
2542         preempt_enable();
2543
2544         /* Wait for all current users to finish */
2545         synchronize_rcu();
2546
2547         for_each_tracing_cpu(cpu) {
2548                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2549                 per_cpu(trace_buffered_event, cpu) = NULL;
2550         }
2551         /*
2552          * Make sure trace_buffered_event is NULL before clearing
2553          * trace_buffered_event_cnt.
2554          */
2555         smp_wmb();
2556
2557         preempt_disable();
2558         /* Do the work on each cpu */
2559         smp_call_function_many(tracing_buffer_mask,
2560                                enable_trace_buffered_event, NULL, 1);
2561         preempt_enable();
2562 }
2563
2564 static struct ring_buffer *temp_buffer;
2565
2566 struct ring_buffer_event *
2567 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2568                           struct trace_event_file *trace_file,
2569                           int type, unsigned long len,
2570                           unsigned long flags, int pc)
2571 {
2572         struct ring_buffer_event *entry;
2573         int val;
2574
2575         *current_rb = trace_file->tr->trace_buffer.buffer;
2576
2577         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2578              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2579             (entry = this_cpu_read(trace_buffered_event))) {
2580                 /* Try to use the per cpu buffer first */
2581                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2582                 if (val == 1) {
2583                         trace_event_setup(entry, type, flags, pc);
2584                         entry->array[0] = len;
2585                         return entry;
2586                 }
2587                 this_cpu_dec(trace_buffered_event_cnt);
2588         }
2589
2590         entry = __trace_buffer_lock_reserve(*current_rb,
2591                                             type, len, flags, pc);
2592         /*
2593          * If tracing is off, but we have triggers enabled
2594          * we still need to look at the event data. Use the temp_buffer
2595          * to store the trace event for the tigger to use. It's recusive
2596          * safe and will not be recorded anywhere.
2597          */
2598         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2599                 *current_rb = temp_buffer;
2600                 entry = __trace_buffer_lock_reserve(*current_rb,
2601                                                     type, len, flags, pc);
2602         }
2603         return entry;
2604 }
2605 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2606
2607 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2608 static DEFINE_MUTEX(tracepoint_printk_mutex);
2609
2610 static void output_printk(struct trace_event_buffer *fbuffer)
2611 {
2612         struct trace_event_call *event_call;
2613         struct trace_event *event;
2614         unsigned long flags;
2615         struct trace_iterator *iter = tracepoint_print_iter;
2616
2617         /* We should never get here if iter is NULL */
2618         if (WARN_ON_ONCE(!iter))
2619                 return;
2620
2621         event_call = fbuffer->trace_file->event_call;
2622         if (!event_call || !event_call->event.funcs ||
2623             !event_call->event.funcs->trace)
2624                 return;
2625
2626         event = &fbuffer->trace_file->event_call->event;
2627
2628         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2629         trace_seq_init(&iter->seq);
2630         iter->ent = fbuffer->entry;
2631         event_call->event.funcs->trace(iter, 0, event);
2632         trace_seq_putc(&iter->seq, 0);
2633         printk("%s", iter->seq.buffer);
2634
2635         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2636 }
2637
2638 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2639                              void __user *buffer, size_t *lenp,
2640                              loff_t *ppos)
2641 {
2642         int save_tracepoint_printk;
2643         int ret;
2644
2645         mutex_lock(&tracepoint_printk_mutex);
2646         save_tracepoint_printk = tracepoint_printk;
2647
2648         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2649
2650         /*
2651          * This will force exiting early, as tracepoint_printk
2652          * is always zero when tracepoint_printk_iter is not allocated
2653          */
2654         if (!tracepoint_print_iter)
2655                 tracepoint_printk = 0;
2656
2657         if (save_tracepoint_printk == tracepoint_printk)
2658                 goto out;
2659
2660         if (tracepoint_printk)
2661                 static_key_enable(&tracepoint_printk_key.key);
2662         else
2663                 static_key_disable(&tracepoint_printk_key.key);
2664
2665  out:
2666         mutex_unlock(&tracepoint_printk_mutex);
2667
2668         return ret;
2669 }
2670
2671 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2672 {
2673         if (static_key_false(&tracepoint_printk_key.key))
2674                 output_printk(fbuffer);
2675
2676         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2677                                     fbuffer->event, fbuffer->entry,
2678                                     fbuffer->flags, fbuffer->pc);
2679 }
2680 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2681
2682 /*
2683  * Skip 3:
2684  *
2685  *   trace_buffer_unlock_commit_regs()
2686  *   trace_event_buffer_commit()
2687  *   trace_event_raw_event_xxx()
2688  */
2689 # define STACK_SKIP 3
2690
2691 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2692                                      struct ring_buffer *buffer,
2693                                      struct ring_buffer_event *event,
2694                                      unsigned long flags, int pc,
2695                                      struct pt_regs *regs)
2696 {
2697         __buffer_unlock_commit(buffer, event);
2698
2699         /*
2700          * If regs is not set, then skip the necessary functions.
2701          * Note, we can still get here via blktrace, wakeup tracer
2702          * and mmiotrace, but that's ok if they lose a function or
2703          * two. They are not that meaningful.
2704          */
2705         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2706         ftrace_trace_userstack(buffer, flags, pc);
2707 }
2708
2709 /*
2710  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2711  */
2712 void
2713 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2714                                    struct ring_buffer_event *event)
2715 {
2716         __buffer_unlock_commit(buffer, event);
2717 }
2718
2719 static void
2720 trace_process_export(struct trace_export *export,
2721                struct ring_buffer_event *event)
2722 {
2723         struct trace_entry *entry;
2724         unsigned int size = 0;
2725
2726         entry = ring_buffer_event_data(event);
2727         size = ring_buffer_event_length(event);
2728         export->write(export, entry, size);
2729 }
2730
2731 static DEFINE_MUTEX(ftrace_export_lock);
2732
2733 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2734
2735 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2736
2737 static inline void ftrace_exports_enable(void)
2738 {
2739         static_branch_enable(&ftrace_exports_enabled);
2740 }
2741
2742 static inline void ftrace_exports_disable(void)
2743 {
2744         static_branch_disable(&ftrace_exports_enabled);
2745 }
2746
2747 static void ftrace_exports(struct ring_buffer_event *event)
2748 {
2749         struct trace_export *export;
2750
2751         preempt_disable_notrace();
2752
2753         export = rcu_dereference_raw_check(ftrace_exports_list);
2754         while (export) {
2755                 trace_process_export(export, event);
2756                 export = rcu_dereference_raw_check(export->next);
2757         }
2758
2759         preempt_enable_notrace();
2760 }
2761
2762 static inline void
2763 add_trace_export(struct trace_export **list, struct trace_export *export)
2764 {
2765         rcu_assign_pointer(export->next, *list);
2766         /*
2767          * We are entering export into the list but another
2768          * CPU might be walking that list. We need to make sure
2769          * the export->next pointer is valid before another CPU sees
2770          * the export pointer included into the list.
2771          */
2772         rcu_assign_pointer(*list, export);
2773 }
2774
2775 static inline int
2776 rm_trace_export(struct trace_export **list, struct trace_export *export)
2777 {
2778         struct trace_export **p;
2779
2780         for (p = list; *p != NULL; p = &(*p)->next)
2781                 if (*p == export)
2782                         break;
2783
2784         if (*p != export)
2785                 return -1;
2786
2787         rcu_assign_pointer(*p, (*p)->next);
2788
2789         return 0;
2790 }
2791
2792 static inline void
2793 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2794 {
2795         if (*list == NULL)
2796                 ftrace_exports_enable();
2797
2798         add_trace_export(list, export);
2799 }
2800
2801 static inline int
2802 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804         int ret;
2805
2806         ret = rm_trace_export(list, export);
2807         if (*list == NULL)
2808                 ftrace_exports_disable();
2809
2810         return ret;
2811 }
2812
2813 int register_ftrace_export(struct trace_export *export)
2814 {
2815         if (WARN_ON_ONCE(!export->write))
2816                 return -1;
2817
2818         mutex_lock(&ftrace_export_lock);
2819
2820         add_ftrace_export(&ftrace_exports_list, export);
2821
2822         mutex_unlock(&ftrace_export_lock);
2823
2824         return 0;
2825 }
2826 EXPORT_SYMBOL_GPL(register_ftrace_export);
2827
2828 int unregister_ftrace_export(struct trace_export *export)
2829 {
2830         int ret;
2831
2832         mutex_lock(&ftrace_export_lock);
2833
2834         ret = rm_ftrace_export(&ftrace_exports_list, export);
2835
2836         mutex_unlock(&ftrace_export_lock);
2837
2838         return ret;
2839 }
2840 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2841
2842 void
2843 trace_function(struct trace_array *tr,
2844                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2845                int pc)
2846 {
2847         struct trace_event_call *call = &event_function;
2848         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2849         struct ring_buffer_event *event;
2850         struct ftrace_entry *entry;
2851
2852         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2853                                             flags, pc);
2854         if (!event)
2855                 return;
2856         entry   = ring_buffer_event_data(event);
2857         entry->ip                       = ip;
2858         entry->parent_ip                = parent_ip;
2859
2860         if (!call_filter_check_discard(call, entry, buffer, event)) {
2861                 if (static_branch_unlikely(&ftrace_exports_enabled))
2862                         ftrace_exports(event);
2863                 __buffer_unlock_commit(buffer, event);
2864         }
2865 }
2866
2867 #ifdef CONFIG_STACKTRACE
2868
2869 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2870 #define FTRACE_KSTACK_NESTING   4
2871
2872 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2873
2874 struct ftrace_stack {
2875         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2876 };
2877
2878
2879 struct ftrace_stacks {
2880         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2881 };
2882
2883 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2884 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2885
2886 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2887                                  unsigned long flags,
2888                                  int skip, int pc, struct pt_regs *regs)
2889 {
2890         struct trace_event_call *call = &event_kernel_stack;
2891         struct ring_buffer_event *event;
2892         unsigned int size, nr_entries;
2893         struct ftrace_stack *fstack;
2894         struct stack_entry *entry;
2895         int stackidx;
2896
2897         /*
2898          * Add one, for this function and the call to save_stack_trace()
2899          * If regs is set, then these functions will not be in the way.
2900          */
2901 #ifndef CONFIG_UNWINDER_ORC
2902         if (!regs)
2903                 skip++;
2904 #endif
2905
2906         /*
2907          * Since events can happen in NMIs there's no safe way to
2908          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2909          * or NMI comes in, it will just have to use the default
2910          * FTRACE_STACK_SIZE.
2911          */
2912         preempt_disable_notrace();
2913
2914         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2915
2916         /* This should never happen. If it does, yell once and skip */
2917         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2918                 goto out;
2919
2920         /*
2921          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2922          * interrupt will either see the value pre increment or post
2923          * increment. If the interrupt happens pre increment it will have
2924          * restored the counter when it returns.  We just need a barrier to
2925          * keep gcc from moving things around.
2926          */
2927         barrier();
2928
2929         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2930         size = ARRAY_SIZE(fstack->calls);
2931
2932         if (regs) {
2933                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2934                                                    size, skip);
2935         } else {
2936                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2937         }
2938
2939         size = nr_entries * sizeof(unsigned long);
2940         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2941                                             sizeof(*entry) + size, flags, pc);
2942         if (!event)
2943                 goto out;
2944         entry = ring_buffer_event_data(event);
2945
2946         memcpy(&entry->caller, fstack->calls, size);
2947         entry->size = nr_entries;
2948
2949         if (!call_filter_check_discard(call, entry, buffer, event))
2950                 __buffer_unlock_commit(buffer, event);
2951
2952  out:
2953         /* Again, don't let gcc optimize things here */
2954         barrier();
2955         __this_cpu_dec(ftrace_stack_reserve);
2956         preempt_enable_notrace();
2957
2958 }
2959
2960 static inline void ftrace_trace_stack(struct trace_array *tr,
2961                                       struct ring_buffer *buffer,
2962                                       unsigned long flags,
2963                                       int skip, int pc, struct pt_regs *regs)
2964 {
2965         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2966                 return;
2967
2968         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2969 }
2970
2971 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2972                    int pc)
2973 {
2974         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2975
2976         if (rcu_is_watching()) {
2977                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2978                 return;
2979         }
2980
2981         /*
2982          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2983          * but if the above rcu_is_watching() failed, then the NMI
2984          * triggered someplace critical, and rcu_irq_enter() should
2985          * not be called from NMI.
2986          */
2987         if (unlikely(in_nmi()))
2988                 return;
2989
2990         rcu_irq_enter_irqson();
2991         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2992         rcu_irq_exit_irqson();
2993 }
2994
2995 /**
2996  * trace_dump_stack - record a stack back trace in the trace buffer
2997  * @skip: Number of functions to skip (helper handlers)
2998  */
2999 void trace_dump_stack(int skip)
3000 {
3001         unsigned long flags;
3002
3003         if (tracing_disabled || tracing_selftest_running)
3004                 return;
3005
3006         local_save_flags(flags);
3007
3008 #ifndef CONFIG_UNWINDER_ORC
3009         /* Skip 1 to skip this function. */
3010         skip++;
3011 #endif
3012         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
3013                              flags, skip, preempt_count(), NULL);
3014 }
3015 EXPORT_SYMBOL_GPL(trace_dump_stack);
3016
3017 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3018 static DEFINE_PER_CPU(int, user_stack_count);
3019
3020 static void
3021 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
3022 {
3023         struct trace_event_call *call = &event_user_stack;
3024         struct ring_buffer_event *event;
3025         struct userstack_entry *entry;
3026
3027         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3028                 return;
3029
3030         /*
3031          * NMIs can not handle page faults, even with fix ups.
3032          * The save user stack can (and often does) fault.
3033          */
3034         if (unlikely(in_nmi()))
3035                 return;
3036
3037         /*
3038          * prevent recursion, since the user stack tracing may
3039          * trigger other kernel events.
3040          */
3041         preempt_disable();
3042         if (__this_cpu_read(user_stack_count))
3043                 goto out;
3044
3045         __this_cpu_inc(user_stack_count);
3046
3047         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3048                                             sizeof(*entry), flags, pc);
3049         if (!event)
3050                 goto out_drop_count;
3051         entry   = ring_buffer_event_data(event);
3052
3053         entry->tgid             = current->tgid;
3054         memset(&entry->caller, 0, sizeof(entry->caller));
3055
3056         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3057         if (!call_filter_check_discard(call, entry, buffer, event))
3058                 __buffer_unlock_commit(buffer, event);
3059
3060  out_drop_count:
3061         __this_cpu_dec(user_stack_count);
3062  out:
3063         preempt_enable();
3064 }
3065 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3066 static void ftrace_trace_userstack(struct ring_buffer *buffer,
3067                                    unsigned long flags, int pc)
3068 {
3069 }
3070 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3071
3072 #endif /* CONFIG_STACKTRACE */
3073
3074 /* created for use with alloc_percpu */
3075 struct trace_buffer_struct {
3076         int nesting;
3077         char buffer[4][TRACE_BUF_SIZE];
3078 };
3079
3080 static struct trace_buffer_struct *trace_percpu_buffer;
3081
3082 /*
3083  * Thise allows for lockless recording.  If we're nested too deeply, then
3084  * this returns NULL.
3085  */
3086 static char *get_trace_buf(void)
3087 {
3088         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3089
3090         if (!buffer || buffer->nesting >= 4)
3091                 return NULL;
3092
3093         buffer->nesting++;
3094
3095         /* Interrupts must see nesting incremented before we use the buffer */
3096         barrier();
3097         return &buffer->buffer[buffer->nesting][0];
3098 }
3099
3100 static void put_trace_buf(void)
3101 {
3102         /* Don't let the decrement of nesting leak before this */
3103         barrier();
3104         this_cpu_dec(trace_percpu_buffer->nesting);
3105 }
3106
3107 static int alloc_percpu_trace_buffer(void)
3108 {
3109         struct trace_buffer_struct *buffers;
3110
3111         buffers = alloc_percpu(struct trace_buffer_struct);
3112         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3113                 return -ENOMEM;
3114
3115         trace_percpu_buffer = buffers;
3116         return 0;
3117 }
3118
3119 static int buffers_allocated;
3120
3121 void trace_printk_init_buffers(void)
3122 {
3123         if (buffers_allocated)
3124                 return;
3125
3126         if (alloc_percpu_trace_buffer())
3127                 return;
3128
3129         /* trace_printk() is for debug use only. Don't use it in production. */
3130
3131         pr_warn("\n");
3132         pr_warn("**********************************************************\n");
3133         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3134         pr_warn("**                                                      **\n");
3135         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3136         pr_warn("**                                                      **\n");
3137         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3138         pr_warn("** unsafe for production use.                           **\n");
3139         pr_warn("**                                                      **\n");
3140         pr_warn("** If you see this message and you are not debugging    **\n");
3141         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3142         pr_warn("**                                                      **\n");
3143         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3144         pr_warn("**********************************************************\n");
3145
3146         /* Expand the buffers to set size */
3147         tracing_update_buffers();
3148
3149         buffers_allocated = 1;
3150
3151         /*
3152          * trace_printk_init_buffers() can be called by modules.
3153          * If that happens, then we need to start cmdline recording
3154          * directly here. If the global_trace.buffer is already
3155          * allocated here, then this was called by module code.
3156          */
3157         if (global_trace.trace_buffer.buffer)
3158                 tracing_start_cmdline_record();
3159 }
3160 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3161
3162 void trace_printk_start_comm(void)
3163 {
3164         /* Start tracing comms if trace printk is set */
3165         if (!buffers_allocated)
3166                 return;
3167         tracing_start_cmdline_record();
3168 }
3169
3170 static void trace_printk_start_stop_comm(int enabled)
3171 {
3172         if (!buffers_allocated)
3173                 return;
3174
3175         if (enabled)
3176                 tracing_start_cmdline_record();
3177         else
3178                 tracing_stop_cmdline_record();
3179 }
3180
3181 /**
3182  * trace_vbprintk - write binary msg to tracing buffer
3183  * @ip:    The address of the caller
3184  * @fmt:   The string format to write to the buffer
3185  * @args:  Arguments for @fmt
3186  */
3187 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3188 {
3189         struct trace_event_call *call = &event_bprint;
3190         struct ring_buffer_event *event;
3191         struct ring_buffer *buffer;
3192         struct trace_array *tr = &global_trace;
3193         struct bprint_entry *entry;
3194         unsigned long flags;
3195         char *tbuffer;
3196         int len = 0, size, pc;
3197
3198         if (unlikely(tracing_selftest_running || tracing_disabled))
3199                 return 0;
3200
3201         /* Don't pollute graph traces with trace_vprintk internals */
3202         pause_graph_tracing();
3203
3204         pc = preempt_count();
3205         preempt_disable_notrace();
3206
3207         tbuffer = get_trace_buf();
3208         if (!tbuffer) {
3209                 len = 0;
3210                 goto out_nobuffer;
3211         }
3212
3213         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3214
3215         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3216                 goto out;
3217
3218         local_save_flags(flags);
3219         size = sizeof(*entry) + sizeof(u32) * len;
3220         buffer = tr->trace_buffer.buffer;
3221         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3222                                             flags, pc);
3223         if (!event)
3224                 goto out;
3225         entry = ring_buffer_event_data(event);
3226         entry->ip                       = ip;
3227         entry->fmt                      = fmt;
3228
3229         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3230         if (!call_filter_check_discard(call, entry, buffer, event)) {
3231                 __buffer_unlock_commit(buffer, event);
3232                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3233         }
3234
3235 out:
3236         put_trace_buf();
3237
3238 out_nobuffer:
3239         preempt_enable_notrace();
3240         unpause_graph_tracing();
3241
3242         return len;
3243 }
3244 EXPORT_SYMBOL_GPL(trace_vbprintk);
3245
3246 __printf(3, 0)
3247 static int
3248 __trace_array_vprintk(struct ring_buffer *buffer,
3249                       unsigned long ip, const char *fmt, va_list args)
3250 {
3251         struct trace_event_call *call = &event_print;
3252         struct ring_buffer_event *event;
3253         int len = 0, size, pc;
3254         struct print_entry *entry;
3255         unsigned long flags;
3256         char *tbuffer;
3257
3258         if (tracing_disabled || tracing_selftest_running)
3259                 return 0;
3260
3261         /* Don't pollute graph traces with trace_vprintk internals */
3262         pause_graph_tracing();
3263
3264         pc = preempt_count();
3265         preempt_disable_notrace();
3266
3267
3268         tbuffer = get_trace_buf();
3269         if (!tbuffer) {
3270                 len = 0;
3271                 goto out_nobuffer;
3272         }
3273
3274         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3275
3276         local_save_flags(flags);
3277         size = sizeof(*entry) + len + 1;
3278         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3279                                             flags, pc);
3280         if (!event)
3281                 goto out;
3282         entry = ring_buffer_event_data(event);
3283         entry->ip = ip;
3284
3285         memcpy(&entry->buf, tbuffer, len + 1);
3286         if (!call_filter_check_discard(call, entry, buffer, event)) {
3287                 __buffer_unlock_commit(buffer, event);
3288                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3289         }
3290
3291 out:
3292         put_trace_buf();
3293
3294 out_nobuffer:
3295         preempt_enable_notrace();
3296         unpause_graph_tracing();
3297
3298         return len;
3299 }
3300
3301 __printf(3, 0)
3302 int trace_array_vprintk(struct trace_array *tr,
3303                         unsigned long ip, const char *fmt, va_list args)
3304 {
3305         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3306 }
3307
3308 __printf(3, 0)
3309 int trace_array_printk(struct trace_array *tr,
3310                        unsigned long ip, const char *fmt, ...)
3311 {
3312         int ret;
3313         va_list ap;
3314
3315         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3316                 return 0;
3317
3318         if (!tr)
3319                 return -ENOENT;
3320
3321         va_start(ap, fmt);
3322         ret = trace_array_vprintk(tr, ip, fmt, ap);
3323         va_end(ap);
3324         return ret;
3325 }
3326 EXPORT_SYMBOL_GPL(trace_array_printk);
3327
3328 __printf(3, 4)
3329 int trace_array_printk_buf(struct ring_buffer *buffer,
3330                            unsigned long ip, const char *fmt, ...)
3331 {
3332         int ret;
3333         va_list ap;
3334
3335         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3336                 return 0;
3337
3338         va_start(ap, fmt);
3339         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3340         va_end(ap);
3341         return ret;
3342 }
3343
3344 __printf(2, 0)
3345 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3346 {
3347         return trace_array_vprintk(&global_trace, ip, fmt, args);
3348 }
3349 EXPORT_SYMBOL_GPL(trace_vprintk);
3350
3351 static void trace_iterator_increment(struct trace_iterator *iter)
3352 {
3353         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3354
3355         iter->idx++;
3356         if (buf_iter)
3357                 ring_buffer_read(buf_iter, NULL);
3358 }
3359
3360 static struct trace_entry *
3361 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3362                 unsigned long *lost_events)
3363 {
3364         struct ring_buffer_event *event;
3365         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3366
3367         if (buf_iter)
3368                 event = ring_buffer_iter_peek(buf_iter, ts);
3369         else
3370                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3371                                          lost_events);
3372
3373         if (event) {
3374                 iter->ent_size = ring_buffer_event_length(event);
3375                 return ring_buffer_event_data(event);
3376         }
3377         iter->ent_size = 0;
3378         return NULL;
3379 }
3380
3381 static struct trace_entry *
3382 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3383                   unsigned long *missing_events, u64 *ent_ts)
3384 {
3385         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3386         struct trace_entry *ent, *next = NULL;
3387         unsigned long lost_events = 0, next_lost = 0;
3388         int cpu_file = iter->cpu_file;
3389         u64 next_ts = 0, ts;
3390         int next_cpu = -1;
3391         int next_size = 0;
3392         int cpu;
3393
3394         /*
3395          * If we are in a per_cpu trace file, don't bother by iterating over
3396          * all cpu and peek directly.
3397          */
3398         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3399                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3400                         return NULL;
3401                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3402                 if (ent_cpu)
3403                         *ent_cpu = cpu_file;
3404
3405                 return ent;
3406         }
3407
3408         for_each_tracing_cpu(cpu) {
3409
3410                 if (ring_buffer_empty_cpu(buffer, cpu))
3411                         continue;
3412
3413                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3414
3415                 /*
3416                  * Pick the entry with the smallest timestamp:
3417                  */
3418                 if (ent && (!next || ts < next_ts)) {
3419                         next = ent;
3420                         next_cpu = cpu;
3421                         next_ts = ts;
3422                         next_lost = lost_events;
3423                         next_size = iter->ent_size;
3424                 }
3425         }
3426
3427         iter->ent_size = next_size;
3428
3429         if (ent_cpu)
3430                 *ent_cpu = next_cpu;
3431
3432         if (ent_ts)
3433                 *ent_ts = next_ts;
3434
3435         if (missing_events)
3436                 *missing_events = next_lost;
3437
3438         return next;
3439 }
3440
3441 /* Find the next real entry, without updating the iterator itself */
3442 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3443                                           int *ent_cpu, u64 *ent_ts)
3444 {
3445         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3446 }
3447
3448 /* Find the next real entry, and increment the iterator to the next entry */
3449 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3450 {
3451         iter->ent = __find_next_entry(iter, &iter->cpu,
3452                                       &iter->lost_events, &iter->ts);
3453
3454         if (iter->ent)
3455                 trace_iterator_increment(iter);
3456
3457         return iter->ent ? iter : NULL;
3458 }
3459
3460 static void trace_consume(struct trace_iterator *iter)
3461 {
3462         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3463                             &iter->lost_events);
3464 }
3465
3466 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3467 {
3468         struct trace_iterator *iter = m->private;
3469         int i = (int)*pos;
3470         void *ent;
3471
3472         WARN_ON_ONCE(iter->leftover);
3473
3474         (*pos)++;
3475
3476         /* can't go backwards */
3477         if (iter->idx > i)
3478                 return NULL;
3479
3480         if (iter->idx < 0)
3481                 ent = trace_find_next_entry_inc(iter);
3482         else
3483                 ent = iter;
3484
3485         while (ent && iter->idx < i)
3486                 ent = trace_find_next_entry_inc(iter);
3487
3488         iter->pos = *pos;
3489
3490         return ent;
3491 }
3492
3493 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3494 {
3495         struct ring_buffer_event *event;
3496         struct ring_buffer_iter *buf_iter;
3497         unsigned long entries = 0;
3498         u64 ts;
3499
3500         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3501
3502         buf_iter = trace_buffer_iter(iter, cpu);
3503         if (!buf_iter)
3504                 return;
3505
3506         ring_buffer_iter_reset(buf_iter);
3507
3508         /*
3509          * We could have the case with the max latency tracers
3510          * that a reset never took place on a cpu. This is evident
3511          * by the timestamp being before the start of the buffer.
3512          */
3513         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3514                 if (ts >= iter->trace_buffer->time_start)
3515                         break;
3516                 entries++;
3517                 ring_buffer_read(buf_iter, NULL);
3518         }
3519
3520         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3521 }
3522
3523 /*
3524  * The current tracer is copied to avoid a global locking
3525  * all around.
3526  */
3527 static void *s_start(struct seq_file *m, loff_t *pos)
3528 {
3529         struct trace_iterator *iter = m->private;
3530         struct trace_array *tr = iter->tr;
3531         int cpu_file = iter->cpu_file;
3532         void *p = NULL;
3533         loff_t l = 0;
3534         int cpu;
3535
3536         /*
3537          * copy the tracer to avoid using a global lock all around.
3538          * iter->trace is a copy of current_trace, the pointer to the
3539          * name may be used instead of a strcmp(), as iter->trace->name
3540          * will point to the same string as current_trace->name.
3541          */
3542         mutex_lock(&trace_types_lock);
3543         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3544                 *iter->trace = *tr->current_trace;
3545         mutex_unlock(&trace_types_lock);
3546
3547 #ifdef CONFIG_TRACER_MAX_TRACE
3548         if (iter->snapshot && iter->trace->use_max_tr)
3549                 return ERR_PTR(-EBUSY);
3550 #endif
3551
3552         if (!iter->snapshot)
3553                 atomic_inc(&trace_record_taskinfo_disabled);
3554
3555         if (*pos != iter->pos) {
3556                 iter->ent = NULL;
3557                 iter->cpu = 0;
3558                 iter->idx = -1;
3559
3560                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3561                         for_each_tracing_cpu(cpu)
3562                                 tracing_iter_reset(iter, cpu);
3563                 } else
3564                         tracing_iter_reset(iter, cpu_file);
3565
3566                 iter->leftover = 0;
3567                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3568                         ;
3569
3570         } else {
3571                 /*
3572                  * If we overflowed the seq_file before, then we want
3573                  * to just reuse the trace_seq buffer again.
3574                  */
3575                 if (iter->leftover)
3576                         p = iter;
3577                 else {
3578                         l = *pos - 1;
3579                         p = s_next(m, p, &l);
3580                 }
3581         }
3582
3583         trace_event_read_lock();
3584         trace_access_lock(cpu_file);
3585         return p;
3586 }
3587
3588 static void s_stop(struct seq_file *m, void *p)
3589 {
3590         struct trace_iterator *iter = m->private;
3591
3592 #ifdef CONFIG_TRACER_MAX_TRACE
3593         if (iter->snapshot && iter->trace->use_max_tr)
3594                 return;
3595 #endif
3596
3597         if (!iter->snapshot)
3598                 atomic_dec(&trace_record_taskinfo_disabled);
3599
3600         trace_access_unlock(iter->cpu_file);
3601         trace_event_read_unlock();
3602 }
3603
3604 static void
3605 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3606                       unsigned long *entries, int cpu)
3607 {
3608         unsigned long count;
3609
3610         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3611         /*
3612          * If this buffer has skipped entries, then we hold all
3613          * entries for the trace and we need to ignore the
3614          * ones before the time stamp.
3615          */
3616         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3617                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3618                 /* total is the same as the entries */
3619                 *total = count;
3620         } else
3621                 *total = count +
3622                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3623         *entries = count;
3624 }
3625
3626 static void
3627 get_total_entries(struct trace_buffer *buf,
3628                   unsigned long *total, unsigned long *entries)
3629 {
3630         unsigned long t, e;
3631         int cpu;
3632
3633         *total = 0;
3634         *entries = 0;
3635
3636         for_each_tracing_cpu(cpu) {
3637                 get_total_entries_cpu(buf, &t, &e, cpu);
3638                 *total += t;
3639                 *entries += e;
3640         }
3641 }
3642
3643 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3644 {
3645         unsigned long total, entries;
3646
3647         if (!tr)
3648                 tr = &global_trace;
3649
3650         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3651
3652         return entries;
3653 }
3654
3655 unsigned long trace_total_entries(struct trace_array *tr)
3656 {
3657         unsigned long total, entries;
3658
3659         if (!tr)
3660                 tr = &global_trace;
3661
3662         get_total_entries(&tr->trace_buffer, &total, &entries);
3663
3664         return entries;
3665 }
3666
3667 static void print_lat_help_header(struct seq_file *m)
3668 {
3669         seq_puts(m, "#                  _------=> CPU#            \n"
3670                     "#                 / _-----=> irqs-off        \n"
3671                     "#                | / _----=> need-resched    \n"
3672                     "#                || / _---=> hardirq/softirq \n"
3673                     "#                ||| / _--=> preempt-depth   \n"
3674                     "#                |||| /     delay            \n"
3675                     "#  cmd     pid   ||||| time  |   caller      \n"
3676                     "#     \\   /      |||||  \\    |   /         \n");
3677 }
3678
3679 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3680 {
3681         unsigned long total;
3682         unsigned long entries;
3683
3684         get_total_entries(buf, &total, &entries);
3685         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3686                    entries, total, num_online_cpus());
3687         seq_puts(m, "#\n");
3688 }
3689
3690 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3691                                    unsigned int flags)
3692 {
3693         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3694
3695         print_event_info(buf, m);
3696
3697         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3698         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3699 }
3700
3701 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3702                                        unsigned int flags)
3703 {
3704         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3705         const char *space = "          ";
3706         int prec = tgid ? 10 : 2;
3707
3708         print_event_info(buf, m);
3709
3710         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3711         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3712         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3713         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3714         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3715         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3716         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3717 }
3718
3719 void
3720 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3721 {
3722         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3723         struct trace_buffer *buf = iter->trace_buffer;
3724         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3725         struct tracer *type = iter->trace;
3726         unsigned long entries;
3727         unsigned long total;
3728         const char *name = "preemption";
3729
3730         name = type->name;
3731
3732         get_total_entries(buf, &total, &entries);
3733
3734         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3735                    name, UTS_RELEASE);
3736         seq_puts(m, "# -----------------------------------"
3737                  "---------------------------------\n");
3738         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3739                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3740                    nsecs_to_usecs(data->saved_latency),
3741                    entries,
3742                    total,
3743                    buf->cpu,
3744 #if defined(CONFIG_PREEMPT_NONE)
3745                    "server",
3746 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3747                    "desktop",
3748 #elif defined(CONFIG_PREEMPT)
3749                    "preempt",
3750 #elif defined(CONFIG_PREEMPT_RT)
3751                    "preempt_rt",
3752 #else
3753                    "unknown",
3754 #endif
3755                    /* These are reserved for later use */
3756                    0, 0, 0, 0);
3757 #ifdef CONFIG_SMP
3758         seq_printf(m, " #P:%d)\n", num_online_cpus());
3759 #else
3760         seq_puts(m, ")\n");
3761 #endif
3762         seq_puts(m, "#    -----------------\n");
3763         seq_printf(m, "#    | task: %.16s-%d "
3764                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3765                    data->comm, data->pid,
3766                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3767                    data->policy, data->rt_priority);
3768         seq_puts(m, "#    -----------------\n");
3769
3770         if (data->critical_start) {
3771                 seq_puts(m, "#  => started at: ");
3772                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3773                 trace_print_seq(m, &iter->seq);
3774                 seq_puts(m, "\n#  => ended at:   ");
3775                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3776                 trace_print_seq(m, &iter->seq);
3777                 seq_puts(m, "\n#\n");
3778         }
3779
3780         seq_puts(m, "#\n");
3781 }
3782
3783 static void test_cpu_buff_start(struct trace_iterator *iter)
3784 {
3785         struct trace_seq *s = &iter->seq;
3786         struct trace_array *tr = iter->tr;
3787
3788         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3789                 return;
3790
3791         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3792                 return;
3793
3794         if (cpumask_available(iter->started) &&
3795             cpumask_test_cpu(iter->cpu, iter->started))
3796                 return;
3797
3798         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3799                 return;
3800
3801         if (cpumask_available(iter->started))
3802                 cpumask_set_cpu(iter->cpu, iter->started);
3803
3804         /* Don't print started cpu buffer for the first entry of the trace */
3805         if (iter->idx > 1)
3806                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3807                                 iter->cpu);
3808 }
3809
3810 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3811 {
3812         struct trace_array *tr = iter->tr;
3813         struct trace_seq *s = &iter->seq;
3814         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3815         struct trace_entry *entry;
3816         struct trace_event *event;
3817
3818         entry = iter->ent;
3819
3820         test_cpu_buff_start(iter);
3821
3822         event = ftrace_find_event(entry->type);
3823
3824         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3825                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3826                         trace_print_lat_context(iter);
3827                 else
3828                         trace_print_context(iter);
3829         }
3830
3831         if (trace_seq_has_overflowed(s))
3832                 return TRACE_TYPE_PARTIAL_LINE;
3833
3834         if (event)
3835                 return event->funcs->trace(iter, sym_flags, event);
3836
3837         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3838
3839         return trace_handle_return(s);
3840 }
3841
3842 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3843 {
3844         struct trace_array *tr = iter->tr;
3845         struct trace_seq *s = &iter->seq;
3846         struct trace_entry *entry;
3847         struct trace_event *event;
3848
3849         entry = iter->ent;
3850
3851         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3852                 trace_seq_printf(s, "%d %d %llu ",
3853                                  entry->pid, iter->cpu, iter->ts);
3854
3855         if (trace_seq_has_overflowed(s))
3856                 return TRACE_TYPE_PARTIAL_LINE;
3857
3858         event = ftrace_find_event(entry->type);
3859         if (event)
3860                 return event->funcs->raw(iter, 0, event);
3861
3862         trace_seq_printf(s, "%d ?\n", entry->type);
3863
3864         return trace_handle_return(s);
3865 }
3866
3867 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3868 {
3869         struct trace_array *tr = iter->tr;
3870         struct trace_seq *s = &iter->seq;
3871         unsigned char newline = '\n';
3872         struct trace_entry *entry;
3873         struct trace_event *event;
3874
3875         entry = iter->ent;
3876
3877         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3878                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3879                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3880                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3881                 if (trace_seq_has_overflowed(s))
3882                         return TRACE_TYPE_PARTIAL_LINE;
3883         }
3884
3885         event = ftrace_find_event(entry->type);
3886         if (event) {
3887                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3888                 if (ret != TRACE_TYPE_HANDLED)
3889                         return ret;
3890         }
3891
3892         SEQ_PUT_FIELD(s, newline);
3893
3894         return trace_handle_return(s);
3895 }
3896
3897 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3898 {
3899         struct trace_array *tr = iter->tr;
3900         struct trace_seq *s = &iter->seq;
3901         struct trace_entry *entry;
3902         struct trace_event *event;
3903
3904         entry = iter->ent;
3905
3906         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3907                 SEQ_PUT_FIELD(s, entry->pid);
3908                 SEQ_PUT_FIELD(s, iter->cpu);
3909                 SEQ_PUT_FIELD(s, iter->ts);
3910                 if (trace_seq_has_overflowed(s))
3911                         return TRACE_TYPE_PARTIAL_LINE;
3912         }
3913
3914         event = ftrace_find_event(entry->type);
3915         return event ? event->funcs->binary(iter, 0, event) :
3916                 TRACE_TYPE_HANDLED;
3917 }
3918
3919 int trace_empty(struct trace_iterator *iter)
3920 {
3921         struct ring_buffer_iter *buf_iter;
3922         int cpu;
3923
3924         /* If we are looking at one CPU buffer, only check that one */
3925         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3926                 cpu = iter->cpu_file;
3927                 buf_iter = trace_buffer_iter(iter, cpu);
3928                 if (buf_iter) {
3929                         if (!ring_buffer_iter_empty(buf_iter))
3930                                 return 0;
3931                 } else {
3932                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3933                                 return 0;
3934                 }
3935                 return 1;
3936         }
3937
3938         for_each_tracing_cpu(cpu) {
3939                 buf_iter = trace_buffer_iter(iter, cpu);
3940                 if (buf_iter) {
3941                         if (!ring_buffer_iter_empty(buf_iter))
3942                                 return 0;
3943                 } else {
3944                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3945                                 return 0;
3946                 }
3947         }
3948
3949         return 1;
3950 }
3951
3952 /*  Called with trace_event_read_lock() held. */
3953 enum print_line_t print_trace_line(struct trace_iterator *iter)
3954 {
3955         struct trace_array *tr = iter->tr;
3956         unsigned long trace_flags = tr->trace_flags;
3957         enum print_line_t ret;
3958
3959         if (iter->lost_events) {
3960                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3961                                  iter->cpu, iter->lost_events);
3962                 if (trace_seq_has_overflowed(&iter->seq))
3963                         return TRACE_TYPE_PARTIAL_LINE;
3964         }
3965
3966         if (iter->trace && iter->trace->print_line) {
3967                 ret = iter->trace->print_line(iter);
3968                 if (ret != TRACE_TYPE_UNHANDLED)
3969                         return ret;
3970         }
3971
3972         if (iter->ent->type == TRACE_BPUTS &&
3973                         trace_flags & TRACE_ITER_PRINTK &&
3974                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3975                 return trace_print_bputs_msg_only(iter);
3976
3977         if (iter->ent->type == TRACE_BPRINT &&
3978                         trace_flags & TRACE_ITER_PRINTK &&
3979                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3980                 return trace_print_bprintk_msg_only(iter);
3981
3982         if (iter->ent->type == TRACE_PRINT &&
3983                         trace_flags & TRACE_ITER_PRINTK &&
3984                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3985                 return trace_print_printk_msg_only(iter);
3986
3987         if (trace_flags & TRACE_ITER_BIN)
3988                 return print_bin_fmt(iter);
3989
3990         if (trace_flags & TRACE_ITER_HEX)
3991                 return print_hex_fmt(iter);
3992
3993         if (trace_flags & TRACE_ITER_RAW)
3994                 return print_raw_fmt(iter);
3995
3996         return print_trace_fmt(iter);
3997 }
3998
3999 void trace_latency_header(struct seq_file *m)
4000 {
4001         struct trace_iterator *iter = m->private;
4002         struct trace_array *tr = iter->tr;
4003
4004         /* print nothing if the buffers are empty */
4005         if (trace_empty(iter))
4006                 return;
4007
4008         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4009                 print_trace_header(m, iter);
4010
4011         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4012                 print_lat_help_header(m);
4013 }
4014
4015 void trace_default_header(struct seq_file *m)
4016 {
4017         struct trace_iterator *iter = m->private;
4018         struct trace_array *tr = iter->tr;
4019         unsigned long trace_flags = tr->trace_flags;
4020
4021         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4022                 return;
4023
4024         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4025                 /* print nothing if the buffers are empty */
4026                 if (trace_empty(iter))
4027                         return;
4028                 print_trace_header(m, iter);
4029                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4030                         print_lat_help_header(m);
4031         } else {
4032                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4033                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4034                                 print_func_help_header_irq(iter->trace_buffer,
4035                                                            m, trace_flags);
4036                         else
4037                                 print_func_help_header(iter->trace_buffer, m,
4038                                                        trace_flags);
4039                 }
4040         }
4041 }
4042
4043 static void test_ftrace_alive(struct seq_file *m)
4044 {
4045         if (!ftrace_is_dead())
4046                 return;
4047         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4048                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4049 }
4050
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052 static void show_snapshot_main_help(struct seq_file *m)
4053 {
4054         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4055                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4056                     "#                      Takes a snapshot of the main buffer.\n"
4057                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4058                     "#                      (Doesn't have to be '2' works with any number that\n"
4059                     "#                       is not a '0' or '1')\n");
4060 }
4061
4062 static void show_snapshot_percpu_help(struct seq_file *m)
4063 {
4064         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4065 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4066         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4067                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4068 #else
4069         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4070                     "#                     Must use main snapshot file to allocate.\n");
4071 #endif
4072         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4073                     "#                      (Doesn't have to be '2' works with any number that\n"
4074                     "#                       is not a '0' or '1')\n");
4075 }
4076
4077 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4078 {
4079         if (iter->tr->allocated_snapshot)
4080                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4081         else
4082                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4083
4084         seq_puts(m, "# Snapshot commands:\n");
4085         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4086                 show_snapshot_main_help(m);
4087         else
4088                 show_snapshot_percpu_help(m);
4089 }
4090 #else
4091 /* Should never be called */
4092 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4093 #endif
4094
4095 static int s_show(struct seq_file *m, void *v)
4096 {
4097         struct trace_iterator *iter = v;
4098         int ret;
4099
4100         if (iter->ent == NULL) {
4101                 if (iter->tr) {
4102                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4103                         seq_puts(m, "#\n");
4104                         test_ftrace_alive(m);
4105                 }
4106                 if (iter->snapshot && trace_empty(iter))
4107                         print_snapshot_help(m, iter);
4108                 else if (iter->trace && iter->trace->print_header)
4109                         iter->trace->print_header(m);
4110                 else
4111                         trace_default_header(m);
4112
4113         } else if (iter->leftover) {
4114                 /*
4115                  * If we filled the seq_file buffer earlier, we
4116                  * want to just show it now.
4117                  */
4118                 ret = trace_print_seq(m, &iter->seq);
4119
4120                 /* ret should this time be zero, but you never know */
4121                 iter->leftover = ret;
4122
4123         } else {
4124                 print_trace_line(iter);
4125                 ret = trace_print_seq(m, &iter->seq);
4126                 /*
4127                  * If we overflow the seq_file buffer, then it will
4128                  * ask us for this data again at start up.
4129                  * Use that instead.
4130                  *  ret is 0 if seq_file write succeeded.
4131                  *        -1 otherwise.
4132                  */
4133                 iter->leftover = ret;
4134         }
4135
4136         return 0;
4137 }
4138
4139 /*
4140  * Should be used after trace_array_get(), trace_types_lock
4141  * ensures that i_cdev was already initialized.
4142  */
4143 static inline int tracing_get_cpu(struct inode *inode)
4144 {
4145         if (inode->i_cdev) /* See trace_create_cpu_file() */
4146                 return (long)inode->i_cdev - 1;
4147         return RING_BUFFER_ALL_CPUS;
4148 }
4149
4150 static const struct seq_operations tracer_seq_ops = {
4151         .start          = s_start,
4152         .next           = s_next,
4153         .stop           = s_stop,
4154         .show           = s_show,
4155 };
4156
4157 static struct trace_iterator *
4158 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4159 {
4160         struct trace_array *tr = inode->i_private;
4161         struct trace_iterator *iter;
4162         int cpu;
4163
4164         if (tracing_disabled)
4165                 return ERR_PTR(-ENODEV);
4166
4167         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4168         if (!iter)
4169                 return ERR_PTR(-ENOMEM);
4170
4171         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4172                                     GFP_KERNEL);
4173         if (!iter->buffer_iter)
4174                 goto release;
4175
4176         /*
4177          * We make a copy of the current tracer to avoid concurrent
4178          * changes on it while we are reading.
4179          */
4180         mutex_lock(&trace_types_lock);
4181         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4182         if (!iter->trace)
4183                 goto fail;
4184
4185         *iter->trace = *tr->current_trace;
4186
4187         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4188                 goto fail;
4189
4190         iter->tr = tr;
4191
4192 #ifdef CONFIG_TRACER_MAX_TRACE
4193         /* Currently only the top directory has a snapshot */
4194         if (tr->current_trace->print_max || snapshot)
4195                 iter->trace_buffer = &tr->max_buffer;
4196         else
4197 #endif
4198                 iter->trace_buffer = &tr->trace_buffer;
4199         iter->snapshot = snapshot;
4200         iter->pos = -1;
4201         iter->cpu_file = tracing_get_cpu(inode);
4202         mutex_init(&iter->mutex);
4203
4204         /* Notify the tracer early; before we stop tracing. */
4205         if (iter->trace && iter->trace->open)
4206                 iter->trace->open(iter);
4207
4208         /* Annotate start of buffers if we had overruns */
4209         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4210                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4211
4212         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4213         if (trace_clocks[tr->clock_id].in_ns)
4214                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4215
4216         /* stop the trace while dumping if we are not opening "snapshot" */
4217         if (!iter->snapshot)
4218                 tracing_stop_tr(tr);
4219
4220         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4221                 for_each_tracing_cpu(cpu) {
4222                         iter->buffer_iter[cpu] =
4223                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4224                                                          cpu, GFP_KERNEL);
4225                 }
4226                 ring_buffer_read_prepare_sync();
4227                 for_each_tracing_cpu(cpu) {
4228                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4229                         tracing_iter_reset(iter, cpu);
4230                 }
4231         } else {
4232                 cpu = iter->cpu_file;
4233                 iter->buffer_iter[cpu] =
4234                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4235                                                  cpu, GFP_KERNEL);
4236                 ring_buffer_read_prepare_sync();
4237                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4238                 tracing_iter_reset(iter, cpu);
4239         }
4240
4241         mutex_unlock(&trace_types_lock);
4242
4243         return iter;
4244
4245  fail:
4246         mutex_unlock(&trace_types_lock);
4247         kfree(iter->trace);
4248         kfree(iter->buffer_iter);
4249 release:
4250         seq_release_private(inode, file);
4251         return ERR_PTR(-ENOMEM);
4252 }
4253
4254 int tracing_open_generic(struct inode *inode, struct file *filp)
4255 {
4256         int ret;
4257
4258         ret = tracing_check_open_get_tr(NULL);
4259         if (ret)
4260                 return ret;
4261
4262         filp->private_data = inode->i_private;
4263         return 0;
4264 }
4265
4266 bool tracing_is_disabled(void)
4267 {
4268         return (tracing_disabled) ? true: false;
4269 }
4270
4271 /*
4272  * Open and update trace_array ref count.
4273  * Must have the current trace_array passed to it.
4274  */
4275 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4276 {
4277         struct trace_array *tr = inode->i_private;
4278         int ret;
4279
4280         ret = tracing_check_open_get_tr(tr);
4281         if (ret)
4282                 return ret;
4283
4284         filp->private_data = inode->i_private;
4285
4286         return 0;
4287 }
4288
4289 static int tracing_release(struct inode *inode, struct file *file)
4290 {
4291         struct trace_array *tr = inode->i_private;
4292         struct seq_file *m = file->private_data;
4293         struct trace_iterator *iter;
4294         int cpu;
4295
4296         if (!(file->f_mode & FMODE_READ)) {
4297                 trace_array_put(tr);
4298                 return 0;
4299         }
4300
4301         /* Writes do not use seq_file */
4302         iter = m->private;
4303         mutex_lock(&trace_types_lock);
4304
4305         for_each_tracing_cpu(cpu) {
4306                 if (iter->buffer_iter[cpu])
4307                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4308         }
4309
4310         if (iter->trace && iter->trace->close)
4311                 iter->trace->close(iter);
4312
4313         if (!iter->snapshot)
4314                 /* reenable tracing if it was previously enabled */
4315                 tracing_start_tr(tr);
4316
4317         __trace_array_put(tr);
4318
4319         mutex_unlock(&trace_types_lock);
4320
4321         mutex_destroy(&iter->mutex);
4322         free_cpumask_var(iter->started);
4323         kfree(iter->trace);
4324         kfree(iter->buffer_iter);
4325         seq_release_private(inode, file);
4326
4327         return 0;
4328 }
4329
4330 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4331 {
4332         struct trace_array *tr = inode->i_private;
4333
4334         trace_array_put(tr);
4335         return 0;
4336 }
4337
4338 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4339 {
4340         struct trace_array *tr = inode->i_private;
4341
4342         trace_array_put(tr);
4343
4344         return single_release(inode, file);
4345 }
4346
4347 static int tracing_open(struct inode *inode, struct file *file)
4348 {
4349         struct trace_array *tr = inode->i_private;
4350         struct trace_iterator *iter;
4351         int ret;
4352
4353         ret = tracing_check_open_get_tr(tr);
4354         if (ret)
4355                 return ret;
4356
4357         /* If this file was open for write, then erase contents */
4358         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4359                 int cpu = tracing_get_cpu(inode);
4360                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4361
4362 #ifdef CONFIG_TRACER_MAX_TRACE
4363                 if (tr->current_trace->print_max)
4364                         trace_buf = &tr->max_buffer;
4365 #endif
4366
4367                 if (cpu == RING_BUFFER_ALL_CPUS)
4368                         tracing_reset_online_cpus(trace_buf);
4369                 else
4370                         tracing_reset_cpu(trace_buf, cpu);
4371         }
4372
4373         if (file->f_mode & FMODE_READ) {
4374                 iter = __tracing_open(inode, file, false);
4375                 if (IS_ERR(iter))
4376                         ret = PTR_ERR(iter);
4377                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4378                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4379         }
4380
4381         if (ret < 0)
4382                 trace_array_put(tr);
4383
4384         return ret;
4385 }
4386
4387 /*
4388  * Some tracers are not suitable for instance buffers.
4389  * A tracer is always available for the global array (toplevel)
4390  * or if it explicitly states that it is.
4391  */
4392 static bool
4393 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4394 {
4395         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4396 }
4397
4398 /* Find the next tracer that this trace array may use */
4399 static struct tracer *
4400 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4401 {
4402         while (t && !trace_ok_for_array(t, tr))
4403                 t = t->next;
4404
4405         return t;
4406 }
4407
4408 static void *
4409 t_next(struct seq_file *m, void *v, loff_t *pos)
4410 {
4411         struct trace_array *tr = m->private;
4412         struct tracer *t = v;
4413
4414         (*pos)++;
4415
4416         if (t)
4417                 t = get_tracer_for_array(tr, t->next);
4418
4419         return t;
4420 }
4421
4422 static void *t_start(struct seq_file *m, loff_t *pos)
4423 {
4424         struct trace_array *tr = m->private;
4425         struct tracer *t;
4426         loff_t l = 0;
4427
4428         mutex_lock(&trace_types_lock);
4429
4430         t = get_tracer_for_array(tr, trace_types);
4431         for (; t && l < *pos; t = t_next(m, t, &l))
4432                         ;
4433
4434         return t;
4435 }
4436
4437 static void t_stop(struct seq_file *m, void *p)
4438 {
4439         mutex_unlock(&trace_types_lock);
4440 }
4441
4442 static int t_show(struct seq_file *m, void *v)
4443 {
4444         struct tracer *t = v;
4445
4446         if (!t)
4447                 return 0;
4448
4449         seq_puts(m, t->name);
4450         if (t->next)
4451                 seq_putc(m, ' ');
4452         else
4453                 seq_putc(m, '\n');
4454
4455         return 0;
4456 }
4457
4458 static const struct seq_operations show_traces_seq_ops = {
4459         .start          = t_start,
4460         .next           = t_next,
4461         .stop           = t_stop,
4462         .show           = t_show,
4463 };
4464
4465 static int show_traces_open(struct inode *inode, struct file *file)
4466 {
4467         struct trace_array *tr = inode->i_private;
4468         struct seq_file *m;
4469         int ret;
4470
4471         ret = tracing_check_open_get_tr(tr);
4472         if (ret)
4473                 return ret;
4474
4475         ret = seq_open(file, &show_traces_seq_ops);
4476         if (ret) {
4477                 trace_array_put(tr);
4478                 return ret;
4479         }
4480
4481         m = file->private_data;
4482         m->private = tr;
4483
4484         return 0;
4485 }
4486
4487 static int show_traces_release(struct inode *inode, struct file *file)
4488 {
4489         struct trace_array *tr = inode->i_private;
4490
4491         trace_array_put(tr);
4492         return seq_release(inode, file);
4493 }
4494
4495 static ssize_t
4496 tracing_write_stub(struct file *filp, const char __user *ubuf,
4497                    size_t count, loff_t *ppos)
4498 {
4499         return count;
4500 }
4501
4502 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4503 {
4504         int ret;
4505
4506         if (file->f_mode & FMODE_READ)
4507                 ret = seq_lseek(file, offset, whence);
4508         else
4509                 file->f_pos = ret = 0;
4510
4511         return ret;
4512 }
4513
4514 static const struct file_operations tracing_fops = {
4515         .open           = tracing_open,
4516         .read           = seq_read,
4517         .write          = tracing_write_stub,
4518         .llseek         = tracing_lseek,
4519         .release        = tracing_release,
4520 };
4521
4522 static const struct file_operations show_traces_fops = {
4523         .open           = show_traces_open,
4524         .read           = seq_read,
4525         .llseek         = seq_lseek,
4526         .release        = show_traces_release,
4527 };
4528
4529 static ssize_t
4530 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4531                      size_t count, loff_t *ppos)
4532 {
4533         struct trace_array *tr = file_inode(filp)->i_private;
4534         char *mask_str;
4535         int len;
4536
4537         len = snprintf(NULL, 0, "%*pb\n",
4538                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4539         mask_str = kmalloc(len, GFP_KERNEL);
4540         if (!mask_str)
4541                 return -ENOMEM;
4542
4543         len = snprintf(mask_str, len, "%*pb\n",
4544                        cpumask_pr_args(tr->tracing_cpumask));
4545         if (len >= count) {
4546                 count = -EINVAL;
4547                 goto out_err;
4548         }
4549         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4550
4551 out_err:
4552         kfree(mask_str);
4553
4554         return count;
4555 }
4556
4557 static ssize_t
4558 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4559                       size_t count, loff_t *ppos)
4560 {
4561         struct trace_array *tr = file_inode(filp)->i_private;
4562         cpumask_var_t tracing_cpumask_new;
4563         int err, cpu;
4564
4565         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4566                 return -ENOMEM;
4567
4568         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4569         if (err)
4570                 goto err_unlock;
4571
4572         local_irq_disable();
4573         arch_spin_lock(&tr->max_lock);
4574         for_each_tracing_cpu(cpu) {
4575                 /*
4576                  * Increase/decrease the disabled counter if we are
4577                  * about to flip a bit in the cpumask:
4578                  */
4579                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4580                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4581                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4582                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4583                 }
4584                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4585                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4586                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4587                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4588                 }
4589         }
4590         arch_spin_unlock(&tr->max_lock);
4591         local_irq_enable();
4592
4593         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4594         free_cpumask_var(tracing_cpumask_new);
4595
4596         return count;
4597
4598 err_unlock:
4599         free_cpumask_var(tracing_cpumask_new);
4600
4601         return err;
4602 }
4603
4604 static const struct file_operations tracing_cpumask_fops = {
4605         .open           = tracing_open_generic_tr,
4606         .read           = tracing_cpumask_read,
4607         .write          = tracing_cpumask_write,
4608         .release        = tracing_release_generic_tr,
4609         .llseek         = generic_file_llseek,
4610 };
4611
4612 static int tracing_trace_options_show(struct seq_file *m, void *v)
4613 {
4614         struct tracer_opt *trace_opts;
4615         struct trace_array *tr = m->private;
4616         u32 tracer_flags;
4617         int i;
4618
4619         mutex_lock(&trace_types_lock);
4620         tracer_flags = tr->current_trace->flags->val;
4621         trace_opts = tr->current_trace->flags->opts;
4622
4623         for (i = 0; trace_options[i]; i++) {
4624                 if (tr->trace_flags & (1 << i))
4625                         seq_printf(m, "%s\n", trace_options[i]);
4626                 else
4627                         seq_printf(m, "no%s\n", trace_options[i]);
4628         }
4629
4630         for (i = 0; trace_opts[i].name; i++) {
4631                 if (tracer_flags & trace_opts[i].bit)
4632                         seq_printf(m, "%s\n", trace_opts[i].name);
4633                 else
4634                         seq_printf(m, "no%s\n", trace_opts[i].name);
4635         }
4636         mutex_unlock(&trace_types_lock);
4637
4638         return 0;
4639 }
4640
4641 static int __set_tracer_option(struct trace_array *tr,
4642                                struct tracer_flags *tracer_flags,
4643                                struct tracer_opt *opts, int neg)
4644 {
4645         struct tracer *trace = tracer_flags->trace;
4646         int ret;
4647
4648         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4649         if (ret)
4650                 return ret;
4651
4652         if (neg)
4653                 tracer_flags->val &= ~opts->bit;
4654         else
4655                 tracer_flags->val |= opts->bit;
4656         return 0;
4657 }
4658
4659 /* Try to assign a tracer specific option */
4660 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4661 {
4662         struct tracer *trace = tr->current_trace;
4663         struct tracer_flags *tracer_flags = trace->flags;
4664         struct tracer_opt *opts = NULL;
4665         int i;
4666
4667         for (i = 0; tracer_flags->opts[i].name; i++) {
4668                 opts = &tracer_flags->opts[i];
4669
4670                 if (strcmp(cmp, opts->name) == 0)
4671                         return __set_tracer_option(tr, trace->flags, opts, neg);
4672         }
4673
4674         return -EINVAL;
4675 }
4676
4677 /* Some tracers require overwrite to stay enabled */
4678 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4679 {
4680         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4681                 return -1;
4682
4683         return 0;
4684 }
4685
4686 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4687 {
4688         if ((mask == TRACE_ITER_RECORD_TGID) ||
4689             (mask == TRACE_ITER_RECORD_CMD))
4690                 lockdep_assert_held(&event_mutex);
4691
4692         /* do nothing if flag is already set */
4693         if (!!(tr->trace_flags & mask) == !!enabled)
4694                 return 0;
4695
4696         /* Give the tracer a chance to approve the change */
4697         if (tr->current_trace->flag_changed)
4698                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4699                         return -EINVAL;
4700
4701         if (enabled)
4702                 tr->trace_flags |= mask;
4703         else
4704                 tr->trace_flags &= ~mask;
4705
4706         if (mask == TRACE_ITER_RECORD_CMD)
4707                 trace_event_enable_cmd_record(enabled);
4708
4709         if (mask == TRACE_ITER_RECORD_TGID) {
4710                 if (!tgid_map)
4711                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4712                                            sizeof(*tgid_map),
4713                                            GFP_KERNEL);
4714                 if (!tgid_map) {
4715                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4716                         return -ENOMEM;
4717                 }
4718
4719                 trace_event_enable_tgid_record(enabled);
4720         }
4721
4722         if (mask == TRACE_ITER_EVENT_FORK)
4723                 trace_event_follow_fork(tr, enabled);
4724
4725         if (mask == TRACE_ITER_FUNC_FORK)
4726                 ftrace_pid_follow_fork(tr, enabled);
4727
4728         if (mask == TRACE_ITER_OVERWRITE) {
4729                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4730 #ifdef CONFIG_TRACER_MAX_TRACE
4731                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4732 #endif
4733         }
4734
4735         if (mask == TRACE_ITER_PRINTK) {
4736                 trace_printk_start_stop_comm(enabled);
4737                 trace_printk_control(enabled);
4738         }
4739
4740         return 0;
4741 }
4742
4743 static int trace_set_options(struct trace_array *tr, char *option)
4744 {
4745         char *cmp;
4746         int neg = 0;
4747         int ret;
4748         size_t orig_len = strlen(option);
4749         int len;
4750
4751         cmp = strstrip(option);
4752
4753         len = str_has_prefix(cmp, "no");
4754         if (len)
4755                 neg = 1;
4756
4757         cmp += len;
4758
4759         mutex_lock(&event_mutex);
4760         mutex_lock(&trace_types_lock);
4761
4762         ret = match_string(trace_options, -1, cmp);
4763         /* If no option could be set, test the specific tracer options */
4764         if (ret < 0)
4765                 ret = set_tracer_option(tr, cmp, neg);
4766         else
4767                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4768
4769         mutex_unlock(&trace_types_lock);
4770         mutex_unlock(&event_mutex);
4771
4772         /*
4773          * If the first trailing whitespace is replaced with '\0' by strstrip,
4774          * turn it back into a space.
4775          */
4776         if (orig_len > strlen(option))
4777                 option[strlen(option)] = ' ';
4778
4779         return ret;
4780 }
4781
4782 static void __init apply_trace_boot_options(void)
4783 {
4784         char *buf = trace_boot_options_buf;
4785         char *option;
4786
4787         while (true) {
4788                 option = strsep(&buf, ",");
4789
4790                 if (!option)
4791                         break;
4792
4793                 if (*option)
4794                         trace_set_options(&global_trace, option);
4795
4796                 /* Put back the comma to allow this to be called again */
4797                 if (buf)
4798                         *(buf - 1) = ',';
4799         }
4800 }
4801
4802 static ssize_t
4803 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4804                         size_t cnt, loff_t *ppos)
4805 {
4806         struct seq_file *m = filp->private_data;
4807         struct trace_array *tr = m->private;
4808         char buf[64];
4809         int ret;
4810
4811         if (cnt >= sizeof(buf))
4812                 return -EINVAL;
4813
4814         if (copy_from_user(buf, ubuf, cnt))
4815                 return -EFAULT;
4816
4817         buf[cnt] = 0;
4818
4819         ret = trace_set_options(tr, buf);
4820         if (ret < 0)
4821                 return ret;
4822
4823         *ppos += cnt;
4824
4825         return cnt;
4826 }
4827
4828 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4829 {
4830         struct trace_array *tr = inode->i_private;
4831         int ret;
4832
4833         ret = tracing_check_open_get_tr(tr);
4834         if (ret)
4835                 return ret;
4836
4837         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4838         if (ret < 0)
4839                 trace_array_put(tr);
4840
4841         return ret;
4842 }
4843
4844 static const struct file_operations tracing_iter_fops = {
4845         .open           = tracing_trace_options_open,
4846         .read           = seq_read,
4847         .llseek         = seq_lseek,
4848         .release        = tracing_single_release_tr,
4849         .write          = tracing_trace_options_write,
4850 };
4851
4852 static const char readme_msg[] =
4853         "tracing mini-HOWTO:\n\n"
4854         "# echo 0 > tracing_on : quick way to disable tracing\n"
4855         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4856         " Important files:\n"
4857         "  trace\t\t\t- The static contents of the buffer\n"
4858         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4859         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4860         "  current_tracer\t- function and latency tracers\n"
4861         "  available_tracers\t- list of configured tracers for current_tracer\n"
4862         "  error_log\t- error log for failed commands (that support it)\n"
4863         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4864         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4865         "  trace_clock\t\t-change the clock used to order events\n"
4866         "       local:   Per cpu clock but may not be synced across CPUs\n"
4867         "      global:   Synced across CPUs but slows tracing down.\n"
4868         "     counter:   Not a clock, but just an increment\n"
4869         "      uptime:   Jiffy counter from time of boot\n"
4870         "        perf:   Same clock that perf events use\n"
4871 #ifdef CONFIG_X86_64
4872         "     x86-tsc:   TSC cycle counter\n"
4873 #endif
4874         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4875         "       delta:   Delta difference against a buffer-wide timestamp\n"
4876         "    absolute:   Absolute (standalone) timestamp\n"
4877         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4878         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4879         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4880         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4881         "\t\t\t  Remove sub-buffer with rmdir\n"
4882         "  trace_options\t\t- Set format or modify how tracing happens\n"
4883         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4884         "\t\t\t  option name\n"
4885         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4886 #ifdef CONFIG_DYNAMIC_FTRACE
4887         "\n  available_filter_functions - list of functions that can be filtered on\n"
4888         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4889         "\t\t\t  functions\n"
4890         "\t     accepts: func_full_name or glob-matching-pattern\n"
4891         "\t     modules: Can select a group via module\n"
4892         "\t      Format: :mod:<module-name>\n"
4893         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4894         "\t    triggers: a command to perform when function is hit\n"
4895         "\t      Format: <function>:<trigger>[:count]\n"
4896         "\t     trigger: traceon, traceoff\n"
4897         "\t\t      enable_event:<system>:<event>\n"
4898         "\t\t      disable_event:<system>:<event>\n"
4899 #ifdef CONFIG_STACKTRACE
4900         "\t\t      stacktrace\n"
4901 #endif
4902 #ifdef CONFIG_TRACER_SNAPSHOT
4903         "\t\t      snapshot\n"
4904 #endif
4905         "\t\t      dump\n"
4906         "\t\t      cpudump\n"
4907         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4908         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4909         "\t     The first one will disable tracing every time do_fault is hit\n"
4910         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4911         "\t       The first time do trap is hit and it disables tracing, the\n"
4912         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4913         "\t       the counter will not decrement. It only decrements when the\n"
4914         "\t       trigger did work\n"
4915         "\t     To remove trigger without count:\n"
4916         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4917         "\t     To remove trigger with a count:\n"
4918         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4919         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4920         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4921         "\t    modules: Can select a group via module command :mod:\n"
4922         "\t    Does not accept triggers\n"
4923 #endif /* CONFIG_DYNAMIC_FTRACE */
4924 #ifdef CONFIG_FUNCTION_TRACER
4925         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4926         "\t\t    (function)\n"
4927 #endif
4928 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4929         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4930         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4931         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4932 #endif
4933 #ifdef CONFIG_TRACER_SNAPSHOT
4934         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4935         "\t\t\t  snapshot buffer. Read the contents for more\n"
4936         "\t\t\t  information\n"
4937 #endif
4938 #ifdef CONFIG_STACK_TRACER
4939         "  stack_trace\t\t- Shows the max stack trace when active\n"
4940         "  stack_max_size\t- Shows current max stack size that was traced\n"
4941         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4942         "\t\t\t  new trace)\n"
4943 #ifdef CONFIG_DYNAMIC_FTRACE
4944         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4945         "\t\t\t  traces\n"
4946 #endif
4947 #endif /* CONFIG_STACK_TRACER */
4948 #ifdef CONFIG_DYNAMIC_EVENTS
4949         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4950         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4951 #endif
4952 #ifdef CONFIG_KPROBE_EVENTS
4953         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4954         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4955 #endif
4956 #ifdef CONFIG_UPROBE_EVENTS
4957         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4958         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4959 #endif
4960 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4961         "\t  accepts: event-definitions (one definition per line)\n"
4962         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4963         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4964 #ifdef CONFIG_HIST_TRIGGERS
4965         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4966 #endif
4967         "\t           -:[<group>/]<event>\n"
4968 #ifdef CONFIG_KPROBE_EVENTS
4969         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4970   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4971 #endif
4972 #ifdef CONFIG_UPROBE_EVENTS
4973   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4974 #endif
4975         "\t     args: <name>=fetcharg[:type]\n"
4976         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4977 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4978         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4979 #else
4980         "\t           $stack<index>, $stack, $retval, $comm,\n"
4981 #endif
4982         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4983         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4984         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4985         "\t           <type>\\[<array-size>\\]\n"
4986 #ifdef CONFIG_HIST_TRIGGERS
4987         "\t    field: <stype> <name>;\n"
4988         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4989         "\t           [unsigned] char/int/long\n"
4990 #endif
4991 #endif
4992         "  events/\t\t- Directory containing all trace event subsystems:\n"
4993         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4994         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4995         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4996         "\t\t\t  events\n"
4997         "      filter\t\t- If set, only events passing filter are traced\n"
4998         "  events/<system>/<event>/\t- Directory containing control files for\n"
4999         "\t\t\t  <event>:\n"
5000         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5001         "      filter\t\t- If set, only events passing filter are traced\n"
5002         "      trigger\t\t- If set, a command to perform when event is hit\n"
5003         "\t    Format: <trigger>[:count][if <filter>]\n"
5004         "\t   trigger: traceon, traceoff\n"
5005         "\t            enable_event:<system>:<event>\n"
5006         "\t            disable_event:<system>:<event>\n"
5007 #ifdef CONFIG_HIST_TRIGGERS
5008         "\t            enable_hist:<system>:<event>\n"
5009         "\t            disable_hist:<system>:<event>\n"
5010 #endif
5011 #ifdef CONFIG_STACKTRACE
5012         "\t\t    stacktrace\n"
5013 #endif
5014 #ifdef CONFIG_TRACER_SNAPSHOT
5015         "\t\t    snapshot\n"
5016 #endif
5017 #ifdef CONFIG_HIST_TRIGGERS
5018         "\t\t    hist (see below)\n"
5019 #endif
5020         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5021         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5022         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5023         "\t                  events/block/block_unplug/trigger\n"
5024         "\t   The first disables tracing every time block_unplug is hit.\n"
5025         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5026         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5027         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5028         "\t   Like function triggers, the counter is only decremented if it\n"
5029         "\t    enabled or disabled tracing.\n"
5030         "\t   To remove a trigger without a count:\n"
5031         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5032         "\t   To remove a trigger with a count:\n"
5033         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5034         "\t   Filters can be ignored when removing a trigger.\n"
5035 #ifdef CONFIG_HIST_TRIGGERS
5036         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5037         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5038         "\t            [:values=<field1[,field2,...]>]\n"
5039         "\t            [:sort=<field1[,field2,...]>]\n"
5040         "\t            [:size=#entries]\n"
5041         "\t            [:pause][:continue][:clear]\n"
5042         "\t            [:name=histname1]\n"
5043         "\t            [:<handler>.<action>]\n"
5044         "\t            [if <filter>]\n\n"
5045         "\t    When a matching event is hit, an entry is added to a hash\n"
5046         "\t    table using the key(s) and value(s) named, and the value of a\n"
5047         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5048         "\t    correspond to fields in the event's format description.  Keys\n"
5049         "\t    can be any field, or the special string 'stacktrace'.\n"
5050         "\t    Compound keys consisting of up to two fields can be specified\n"
5051         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5052         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5053         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5054         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5055         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5056         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5057         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5058         "\t    its histogram data will be shared with other triggers of the\n"
5059         "\t    same name, and trigger hits will update this common data.\n\n"
5060         "\t    Reading the 'hist' file for the event will dump the hash\n"
5061         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5062         "\t    triggers attached to an event, there will be a table for each\n"
5063         "\t    trigger in the output.  The table displayed for a named\n"
5064         "\t    trigger will be the same as any other instance having the\n"
5065         "\t    same name.  The default format used to display a given field\n"
5066         "\t    can be modified by appending any of the following modifiers\n"
5067         "\t    to the field name, as applicable:\n\n"
5068         "\t            .hex        display a number as a hex value\n"
5069         "\t            .sym        display an address as a symbol\n"
5070         "\t            .sym-offset display an address as a symbol and offset\n"
5071         "\t            .execname   display a common_pid as a program name\n"
5072         "\t            .syscall    display a syscall id as a syscall name\n"
5073         "\t            .log2       display log2 value rather than raw number\n"
5074         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5075         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5076         "\t    trigger or to start a hist trigger but not log any events\n"
5077         "\t    until told to do so.  'continue' can be used to start or\n"
5078         "\t    restart a paused hist trigger.\n\n"
5079         "\t    The 'clear' parameter will clear the contents of a running\n"
5080         "\t    hist trigger and leave its current paused/active state\n"
5081         "\t    unchanged.\n\n"
5082         "\t    The enable_hist and disable_hist triggers can be used to\n"
5083         "\t    have one event conditionally start and stop another event's\n"
5084         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5085         "\t    the enable_event and disable_event triggers.\n\n"
5086         "\t    Hist trigger handlers and actions are executed whenever a\n"
5087         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5088         "\t        <handler>.<action>\n\n"
5089         "\t    The available handlers are:\n\n"
5090         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5091         "\t        onmax(var)               - invoke if var exceeds current max\n"
5092         "\t        onchange(var)            - invoke action if var changes\n\n"
5093         "\t    The available actions are:\n\n"
5094         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5095         "\t        save(field,...)                      - save current event fields\n"
5096 #ifdef CONFIG_TRACER_SNAPSHOT
5097         "\t        snapshot()                           - snapshot the trace buffer\n"
5098 #endif
5099 #endif
5100 ;
5101
5102 static ssize_t
5103 tracing_readme_read(struct file *filp, char __user *ubuf,
5104                        size_t cnt, loff_t *ppos)
5105 {
5106         return simple_read_from_buffer(ubuf, cnt, ppos,
5107                                         readme_msg, strlen(readme_msg));
5108 }
5109
5110 static const struct file_operations tracing_readme_fops = {
5111         .open           = tracing_open_generic,
5112         .read           = tracing_readme_read,
5113         .llseek         = generic_file_llseek,
5114 };
5115
5116 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5117 {
5118         int *ptr = v;
5119
5120         if (*pos || m->count)
5121                 ptr++;
5122
5123         (*pos)++;
5124
5125         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5126                 if (trace_find_tgid(*ptr))
5127                         return ptr;
5128         }
5129
5130         return NULL;
5131 }
5132
5133 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5134 {
5135         void *v;
5136         loff_t l = 0;
5137
5138         if (!tgid_map)
5139                 return NULL;
5140
5141         v = &tgid_map[0];
5142         while (l <= *pos) {
5143                 v = saved_tgids_next(m, v, &l);
5144                 if (!v)
5145                         return NULL;
5146         }
5147
5148         return v;
5149 }
5150
5151 static void saved_tgids_stop(struct seq_file *m, void *v)
5152 {
5153 }
5154
5155 static int saved_tgids_show(struct seq_file *m, void *v)
5156 {
5157         int pid = (int *)v - tgid_map;
5158
5159         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5160         return 0;
5161 }
5162
5163 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5164         .start          = saved_tgids_start,
5165         .stop           = saved_tgids_stop,
5166         .next           = saved_tgids_next,
5167         .show           = saved_tgids_show,
5168 };
5169
5170 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5171 {
5172         int ret;
5173
5174         ret = tracing_check_open_get_tr(NULL);
5175         if (ret)
5176                 return ret;
5177
5178         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5179 }
5180
5181
5182 static const struct file_operations tracing_saved_tgids_fops = {
5183         .open           = tracing_saved_tgids_open,
5184         .read           = seq_read,
5185         .llseek         = seq_lseek,
5186         .release        = seq_release,
5187 };
5188
5189 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5190 {
5191         unsigned int *ptr = v;
5192
5193         if (*pos || m->count)
5194                 ptr++;
5195
5196         (*pos)++;
5197
5198         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5199              ptr++) {
5200                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5201                         continue;
5202
5203                 return ptr;
5204         }
5205
5206         return NULL;
5207 }
5208
5209 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5210 {
5211         void *v;
5212         loff_t l = 0;
5213
5214         preempt_disable();
5215         arch_spin_lock(&trace_cmdline_lock);
5216
5217         v = &savedcmd->map_cmdline_to_pid[0];
5218         while (l <= *pos) {
5219                 v = saved_cmdlines_next(m, v, &l);
5220                 if (!v)
5221                         return NULL;
5222         }
5223
5224         return v;
5225 }
5226
5227 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5228 {
5229         arch_spin_unlock(&trace_cmdline_lock);
5230         preempt_enable();
5231 }
5232
5233 static int saved_cmdlines_show(struct seq_file *m, void *v)
5234 {
5235         char buf[TASK_COMM_LEN];
5236         unsigned int *pid = v;
5237
5238         __trace_find_cmdline(*pid, buf);
5239         seq_printf(m, "%d %s\n", *pid, buf);
5240         return 0;
5241 }
5242
5243 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5244         .start          = saved_cmdlines_start,
5245         .next           = saved_cmdlines_next,
5246         .stop           = saved_cmdlines_stop,
5247         .show           = saved_cmdlines_show,
5248 };
5249
5250 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5251 {
5252         int ret;
5253
5254         ret = tracing_check_open_get_tr(NULL);
5255         if (ret)
5256                 return ret;
5257
5258         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5259 }
5260
5261 static const struct file_operations tracing_saved_cmdlines_fops = {
5262         .open           = tracing_saved_cmdlines_open,
5263         .read           = seq_read,
5264         .llseek         = seq_lseek,
5265         .release        = seq_release,
5266 };
5267
5268 static ssize_t
5269 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5270                                  size_t cnt, loff_t *ppos)
5271 {
5272         char buf[64];
5273         int r;
5274
5275         arch_spin_lock(&trace_cmdline_lock);
5276         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5277         arch_spin_unlock(&trace_cmdline_lock);
5278
5279         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5280 }
5281
5282 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5283 {
5284         kfree(s->saved_cmdlines);
5285         kfree(s->map_cmdline_to_pid);
5286         kfree(s);
5287 }
5288
5289 static int tracing_resize_saved_cmdlines(unsigned int val)
5290 {
5291         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5292
5293         s = kmalloc(sizeof(*s), GFP_KERNEL);
5294         if (!s)
5295                 return -ENOMEM;
5296
5297         if (allocate_cmdlines_buffer(val, s) < 0) {
5298                 kfree(s);
5299                 return -ENOMEM;
5300         }
5301
5302         arch_spin_lock(&trace_cmdline_lock);
5303         savedcmd_temp = savedcmd;
5304         savedcmd = s;
5305         arch_spin_unlock(&trace_cmdline_lock);
5306         free_saved_cmdlines_buffer(savedcmd_temp);
5307
5308         return 0;
5309 }
5310
5311 static ssize_t
5312 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5313                                   size_t cnt, loff_t *ppos)
5314 {
5315         unsigned long val;
5316         int ret;
5317
5318         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5319         if (ret)
5320                 return ret;
5321
5322         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5323         if (!val || val > PID_MAX_DEFAULT)
5324                 return -EINVAL;
5325
5326         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5327         if (ret < 0)
5328                 return ret;
5329
5330         *ppos += cnt;
5331
5332         return cnt;
5333 }
5334
5335 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5336         .open           = tracing_open_generic,
5337         .read           = tracing_saved_cmdlines_size_read,
5338         .write          = tracing_saved_cmdlines_size_write,
5339 };
5340
5341 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5342 static union trace_eval_map_item *
5343 update_eval_map(union trace_eval_map_item *ptr)
5344 {
5345         if (!ptr->map.eval_string) {
5346                 if (ptr->tail.next) {
5347                         ptr = ptr->tail.next;
5348                         /* Set ptr to the next real item (skip head) */
5349                         ptr++;
5350                 } else
5351                         return NULL;
5352         }
5353         return ptr;
5354 }
5355
5356 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5357 {
5358         union trace_eval_map_item *ptr = v;
5359
5360         /*
5361          * Paranoid! If ptr points to end, we don't want to increment past it.
5362          * This really should never happen.
5363          */
5364         ptr = update_eval_map(ptr);
5365         if (WARN_ON_ONCE(!ptr))
5366                 return NULL;
5367
5368         ptr++;
5369
5370         (*pos)++;
5371
5372         ptr = update_eval_map(ptr);
5373
5374         return ptr;
5375 }
5376
5377 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5378 {
5379         union trace_eval_map_item *v;
5380         loff_t l = 0;
5381
5382         mutex_lock(&trace_eval_mutex);
5383
5384         v = trace_eval_maps;
5385         if (v)
5386                 v++;
5387
5388         while (v && l < *pos) {
5389                 v = eval_map_next(m, v, &l);
5390         }
5391
5392         return v;
5393 }
5394
5395 static void eval_map_stop(struct seq_file *m, void *v)
5396 {
5397         mutex_unlock(&trace_eval_mutex);
5398 }
5399
5400 static int eval_map_show(struct seq_file *m, void *v)
5401 {
5402         union trace_eval_map_item *ptr = v;
5403
5404         seq_printf(m, "%s %ld (%s)\n",
5405                    ptr->map.eval_string, ptr->map.eval_value,
5406                    ptr->map.system);
5407
5408         return 0;
5409 }
5410
5411 static const struct seq_operations tracing_eval_map_seq_ops = {
5412         .start          = eval_map_start,
5413         .next           = eval_map_next,
5414         .stop           = eval_map_stop,
5415         .show           = eval_map_show,
5416 };
5417
5418 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5419 {
5420         int ret;
5421
5422         ret = tracing_check_open_get_tr(NULL);
5423         if (ret)
5424                 return ret;
5425
5426         return seq_open(filp, &tracing_eval_map_seq_ops);
5427 }
5428
5429 static const struct file_operations tracing_eval_map_fops = {
5430         .open           = tracing_eval_map_open,
5431         .read           = seq_read,
5432         .llseek         = seq_lseek,
5433         .release        = seq_release,
5434 };
5435
5436 static inline union trace_eval_map_item *
5437 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5438 {
5439         /* Return tail of array given the head */
5440         return ptr + ptr->head.length + 1;
5441 }
5442
5443 static void
5444 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5445                            int len)
5446 {
5447         struct trace_eval_map **stop;
5448         struct trace_eval_map **map;
5449         union trace_eval_map_item *map_array;
5450         union trace_eval_map_item *ptr;
5451
5452         stop = start + len;
5453
5454         /*
5455          * The trace_eval_maps contains the map plus a head and tail item,
5456          * where the head holds the module and length of array, and the
5457          * tail holds a pointer to the next list.
5458          */
5459         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5460         if (!map_array) {
5461                 pr_warn("Unable to allocate trace eval mapping\n");
5462                 return;
5463         }
5464
5465         mutex_lock(&trace_eval_mutex);
5466
5467         if (!trace_eval_maps)
5468                 trace_eval_maps = map_array;
5469         else {
5470                 ptr = trace_eval_maps;
5471                 for (;;) {
5472                         ptr = trace_eval_jmp_to_tail(ptr);
5473                         if (!ptr->tail.next)
5474                                 break;
5475                         ptr = ptr->tail.next;
5476
5477                 }
5478                 ptr->tail.next = map_array;
5479         }
5480         map_array->head.mod = mod;
5481         map_array->head.length = len;
5482         map_array++;
5483
5484         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5485                 map_array->map = **map;
5486                 map_array++;
5487         }
5488         memset(map_array, 0, sizeof(*map_array));
5489
5490         mutex_unlock(&trace_eval_mutex);
5491 }
5492
5493 static void trace_create_eval_file(struct dentry *d_tracer)
5494 {
5495         trace_create_file("eval_map", 0444, d_tracer,
5496                           NULL, &tracing_eval_map_fops);
5497 }
5498
5499 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5500 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5501 static inline void trace_insert_eval_map_file(struct module *mod,
5502                               struct trace_eval_map **start, int len) { }
5503 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5504
5505 static void trace_insert_eval_map(struct module *mod,
5506                                   struct trace_eval_map **start, int len)
5507 {
5508         struct trace_eval_map **map;
5509
5510         if (len <= 0)
5511                 return;
5512
5513         map = start;
5514
5515         trace_event_eval_update(map, len);
5516
5517         trace_insert_eval_map_file(mod, start, len);
5518 }
5519
5520 static ssize_t
5521 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5522                        size_t cnt, loff_t *ppos)
5523 {
5524         struct trace_array *tr = filp->private_data;
5525         char buf[MAX_TRACER_SIZE+2];
5526         int r;
5527
5528         mutex_lock(&trace_types_lock);
5529         r = sprintf(buf, "%s\n", tr->current_trace->name);
5530         mutex_unlock(&trace_types_lock);
5531
5532         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5533 }
5534
5535 int tracer_init(struct tracer *t, struct trace_array *tr)
5536 {
5537         tracing_reset_online_cpus(&tr->trace_buffer);
5538         return t->init(tr);
5539 }
5540
5541 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5542 {
5543         int cpu;
5544
5545         for_each_tracing_cpu(cpu)
5546                 per_cpu_ptr(buf->data, cpu)->entries = val;
5547 }
5548
5549 #ifdef CONFIG_TRACER_MAX_TRACE
5550 /* resize @tr's buffer to the size of @size_tr's entries */
5551 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5552                                         struct trace_buffer *size_buf, int cpu_id)
5553 {
5554         int cpu, ret = 0;
5555
5556         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5557                 for_each_tracing_cpu(cpu) {
5558                         ret = ring_buffer_resize(trace_buf->buffer,
5559                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5560                         if (ret < 0)
5561                                 break;
5562                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5563                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5564                 }
5565         } else {
5566                 ret = ring_buffer_resize(trace_buf->buffer,
5567                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5568                 if (ret == 0)
5569                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5570                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5571         }
5572
5573         return ret;
5574 }
5575 #endif /* CONFIG_TRACER_MAX_TRACE */
5576
5577 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5578                                         unsigned long size, int cpu)
5579 {
5580         int ret;
5581
5582         /*
5583          * If kernel or user changes the size of the ring buffer
5584          * we use the size that was given, and we can forget about
5585          * expanding it later.
5586          */
5587         ring_buffer_expanded = true;
5588
5589         /* May be called before buffers are initialized */
5590         if (!tr->trace_buffer.buffer)
5591                 return 0;
5592
5593         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5594         if (ret < 0)
5595                 return ret;
5596
5597 #ifdef CONFIG_TRACER_MAX_TRACE
5598         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5599             !tr->current_trace->use_max_tr)
5600                 goto out;
5601
5602         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5603         if (ret < 0) {
5604                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5605                                                      &tr->trace_buffer, cpu);
5606                 if (r < 0) {
5607                         /*
5608                          * AARGH! We are left with different
5609                          * size max buffer!!!!
5610                          * The max buffer is our "snapshot" buffer.
5611                          * When a tracer needs a snapshot (one of the
5612                          * latency tracers), it swaps the max buffer
5613                          * with the saved snap shot. We succeeded to
5614                          * update the size of the main buffer, but failed to
5615                          * update the size of the max buffer. But when we tried
5616                          * to reset the main buffer to the original size, we
5617                          * failed there too. This is very unlikely to
5618                          * happen, but if it does, warn and kill all
5619                          * tracing.
5620                          */
5621                         WARN_ON(1);
5622                         tracing_disabled = 1;
5623                 }
5624                 return ret;
5625         }
5626
5627         if (cpu == RING_BUFFER_ALL_CPUS)
5628                 set_buffer_entries(&tr->max_buffer, size);
5629         else
5630                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5631
5632  out:
5633 #endif /* CONFIG_TRACER_MAX_TRACE */
5634
5635         if (cpu == RING_BUFFER_ALL_CPUS)
5636                 set_buffer_entries(&tr->trace_buffer, size);
5637         else
5638                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5639
5640         return ret;
5641 }
5642
5643 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5644                                           unsigned long size, int cpu_id)
5645 {
5646         int ret = size;
5647
5648         mutex_lock(&trace_types_lock);
5649
5650         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5651                 /* make sure, this cpu is enabled in the mask */
5652                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5653                         ret = -EINVAL;
5654                         goto out;
5655                 }
5656         }
5657
5658         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5659         if (ret < 0)
5660                 ret = -ENOMEM;
5661
5662 out:
5663         mutex_unlock(&trace_types_lock);
5664
5665         return ret;
5666 }
5667
5668
5669 /**
5670  * tracing_update_buffers - used by tracing facility to expand ring buffers
5671  *
5672  * To save on memory when the tracing is never used on a system with it
5673  * configured in. The ring buffers are set to a minimum size. But once
5674  * a user starts to use the tracing facility, then they need to grow
5675  * to their default size.
5676  *
5677  * This function is to be called when a tracer is about to be used.
5678  */
5679 int tracing_update_buffers(void)
5680 {
5681         int ret = 0;
5682
5683         mutex_lock(&trace_types_lock);
5684         if (!ring_buffer_expanded)
5685                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5686                                                 RING_BUFFER_ALL_CPUS);
5687         mutex_unlock(&trace_types_lock);
5688
5689         return ret;
5690 }
5691
5692 struct trace_option_dentry;
5693
5694 static void
5695 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5696
5697 /*
5698  * Used to clear out the tracer before deletion of an instance.
5699  * Must have trace_types_lock held.
5700  */
5701 static void tracing_set_nop(struct trace_array *tr)
5702 {
5703         if (tr->current_trace == &nop_trace)
5704                 return;
5705         
5706         tr->current_trace->enabled--;
5707
5708         if (tr->current_trace->reset)
5709                 tr->current_trace->reset(tr);
5710
5711         tr->current_trace = &nop_trace;
5712 }
5713
5714 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5715 {
5716         /* Only enable if the directory has been created already. */
5717         if (!tr->dir)
5718                 return;
5719
5720         create_trace_option_files(tr, t);
5721 }
5722
5723 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5724 {
5725         struct tracer *t;
5726 #ifdef CONFIG_TRACER_MAX_TRACE
5727         bool had_max_tr;
5728 #endif
5729         int ret = 0;
5730
5731         mutex_lock(&trace_types_lock);
5732
5733         if (!ring_buffer_expanded) {
5734                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5735                                                 RING_BUFFER_ALL_CPUS);
5736                 if (ret < 0)
5737                         goto out;
5738                 ret = 0;
5739         }
5740
5741         for (t = trace_types; t; t = t->next) {
5742                 if (strcmp(t->name, buf) == 0)
5743                         break;
5744         }
5745         if (!t) {
5746                 ret = -EINVAL;
5747                 goto out;
5748         }
5749         if (t == tr->current_trace)
5750                 goto out;
5751
5752 #ifdef CONFIG_TRACER_SNAPSHOT
5753         if (t->use_max_tr) {
5754                 arch_spin_lock(&tr->max_lock);
5755                 if (tr->cond_snapshot)
5756                         ret = -EBUSY;
5757                 arch_spin_unlock(&tr->max_lock);
5758                 if (ret)
5759                         goto out;
5760         }
5761 #endif
5762         /* Some tracers won't work on kernel command line */
5763         if (system_state < SYSTEM_RUNNING && t->noboot) {
5764                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5765                         t->name);
5766                 goto out;
5767         }
5768
5769         /* Some tracers are only allowed for the top level buffer */
5770         if (!trace_ok_for_array(t, tr)) {
5771                 ret = -EINVAL;
5772                 goto out;
5773         }
5774
5775         /* If trace pipe files are being read, we can't change the tracer */
5776         if (tr->current_trace->ref) {
5777                 ret = -EBUSY;
5778                 goto out;
5779         }
5780
5781         trace_branch_disable();
5782
5783         tr->current_trace->enabled--;
5784
5785         if (tr->current_trace->reset)
5786                 tr->current_trace->reset(tr);
5787
5788         /* Current trace needs to be nop_trace before synchronize_rcu */
5789         tr->current_trace = &nop_trace;
5790
5791 #ifdef CONFIG_TRACER_MAX_TRACE
5792         had_max_tr = tr->allocated_snapshot;
5793
5794         if (had_max_tr && !t->use_max_tr) {
5795                 /*
5796                  * We need to make sure that the update_max_tr sees that
5797                  * current_trace changed to nop_trace to keep it from
5798                  * swapping the buffers after we resize it.
5799                  * The update_max_tr is called from interrupts disabled
5800                  * so a synchronized_sched() is sufficient.
5801                  */
5802                 synchronize_rcu();
5803                 free_snapshot(tr);
5804         }
5805 #endif
5806
5807 #ifdef CONFIG_TRACER_MAX_TRACE
5808         if (t->use_max_tr && !had_max_tr) {
5809                 ret = tracing_alloc_snapshot_instance(tr);
5810                 if (ret < 0)
5811                         goto out;
5812         }
5813 #endif
5814
5815         if (t->init) {
5816                 ret = tracer_init(t, tr);
5817                 if (ret)
5818                         goto out;
5819         }
5820
5821         tr->current_trace = t;
5822         tr->current_trace->enabled++;
5823         trace_branch_enable(tr);
5824  out:
5825         mutex_unlock(&trace_types_lock);
5826
5827         return ret;
5828 }
5829
5830 static ssize_t
5831 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5832                         size_t cnt, loff_t *ppos)
5833 {
5834         struct trace_array *tr = filp->private_data;
5835         char buf[MAX_TRACER_SIZE+1];
5836         int i;
5837         size_t ret;
5838         int err;
5839
5840         ret = cnt;
5841
5842         if (cnt > MAX_TRACER_SIZE)
5843                 cnt = MAX_TRACER_SIZE;
5844
5845         if (copy_from_user(buf, ubuf, cnt))
5846                 return -EFAULT;
5847
5848         buf[cnt] = 0;
5849
5850         /* strip ending whitespace. */
5851         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5852                 buf[i] = 0;
5853
5854         err = tracing_set_tracer(tr, buf);
5855         if (err)
5856                 return err;
5857
5858         *ppos += ret;
5859
5860         return ret;
5861 }
5862
5863 static ssize_t
5864 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5865                    size_t cnt, loff_t *ppos)
5866 {
5867         char buf[64];
5868         int r;
5869
5870         r = snprintf(buf, sizeof(buf), "%ld\n",
5871                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5872         if (r > sizeof(buf))
5873                 r = sizeof(buf);
5874         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5875 }
5876
5877 static ssize_t
5878 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5879                     size_t cnt, loff_t *ppos)
5880 {
5881         unsigned long val;
5882         int ret;
5883
5884         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5885         if (ret)
5886                 return ret;
5887
5888         *ptr = val * 1000;
5889
5890         return cnt;
5891 }
5892
5893 static ssize_t
5894 tracing_thresh_read(struct file *filp, char __user *ubuf,
5895                     size_t cnt, loff_t *ppos)
5896 {
5897         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5898 }
5899
5900 static ssize_t
5901 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5902                      size_t cnt, loff_t *ppos)
5903 {
5904         struct trace_array *tr = filp->private_data;
5905         int ret;
5906
5907         mutex_lock(&trace_types_lock);
5908         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5909         if (ret < 0)
5910                 goto out;
5911
5912         if (tr->current_trace->update_thresh) {
5913                 ret = tr->current_trace->update_thresh(tr);
5914                 if (ret < 0)
5915                         goto out;
5916         }
5917
5918         ret = cnt;
5919 out:
5920         mutex_unlock(&trace_types_lock);
5921
5922         return ret;
5923 }
5924
5925 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5926
5927 static ssize_t
5928 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5929                      size_t cnt, loff_t *ppos)
5930 {
5931         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5932 }
5933
5934 static ssize_t
5935 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5936                       size_t cnt, loff_t *ppos)
5937 {
5938         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5939 }
5940
5941 #endif
5942
5943 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5944 {
5945         struct trace_array *tr = inode->i_private;
5946         struct trace_iterator *iter;
5947         int ret;
5948
5949         ret = tracing_check_open_get_tr(tr);
5950         if (ret)
5951                 return ret;
5952
5953         mutex_lock(&trace_types_lock);
5954
5955         /* create a buffer to store the information to pass to userspace */
5956         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5957         if (!iter) {
5958                 ret = -ENOMEM;
5959                 __trace_array_put(tr);
5960                 goto out;
5961         }
5962
5963         trace_seq_init(&iter->seq);
5964         iter->trace = tr->current_trace;
5965
5966         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5967                 ret = -ENOMEM;
5968                 goto fail;
5969         }
5970
5971         /* trace pipe does not show start of buffer */
5972         cpumask_setall(iter->started);
5973
5974         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5975                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5976
5977         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5978         if (trace_clocks[tr->clock_id].in_ns)
5979                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5980
5981         iter->tr = tr;
5982         iter->trace_buffer = &tr->trace_buffer;
5983         iter->cpu_file = tracing_get_cpu(inode);
5984         mutex_init(&iter->mutex);
5985         filp->private_data = iter;
5986
5987         if (iter->trace->pipe_open)
5988                 iter->trace->pipe_open(iter);
5989
5990         nonseekable_open(inode, filp);
5991
5992         tr->current_trace->ref++;
5993 out:
5994         mutex_unlock(&trace_types_lock);
5995         return ret;
5996
5997 fail:
5998         kfree(iter);
5999         __trace_array_put(tr);
6000         mutex_unlock(&trace_types_lock);
6001         return ret;
6002 }
6003
6004 static int tracing_release_pipe(struct inode *inode, struct file *file)
6005 {
6006         struct trace_iterator *iter = file->private_data;
6007         struct trace_array *tr = inode->i_private;
6008
6009         mutex_lock(&trace_types_lock);
6010
6011         tr->current_trace->ref--;
6012
6013         if (iter->trace->pipe_close)
6014                 iter->trace->pipe_close(iter);
6015
6016         mutex_unlock(&trace_types_lock);
6017
6018         free_cpumask_var(iter->started);
6019         mutex_destroy(&iter->mutex);
6020         kfree(iter);
6021
6022         trace_array_put(tr);
6023
6024         return 0;
6025 }
6026
6027 static __poll_t
6028 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6029 {
6030         struct trace_array *tr = iter->tr;
6031
6032         /* Iterators are static, they should be filled or empty */
6033         if (trace_buffer_iter(iter, iter->cpu_file))
6034                 return EPOLLIN | EPOLLRDNORM;
6035
6036         if (tr->trace_flags & TRACE_ITER_BLOCK)
6037                 /*
6038                  * Always select as readable when in blocking mode
6039                  */
6040                 return EPOLLIN | EPOLLRDNORM;
6041         else
6042                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6043                                              filp, poll_table);
6044 }
6045
6046 static __poll_t
6047 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6048 {
6049         struct trace_iterator *iter = filp->private_data;
6050
6051         return trace_poll(iter, filp, poll_table);
6052 }
6053
6054 /* Must be called with iter->mutex held. */
6055 static int tracing_wait_pipe(struct file *filp)
6056 {
6057         struct trace_iterator *iter = filp->private_data;
6058         int ret;
6059
6060         while (trace_empty(iter)) {
6061
6062                 if ((filp->f_flags & O_NONBLOCK)) {
6063                         return -EAGAIN;
6064                 }
6065
6066                 /*
6067                  * We block until we read something and tracing is disabled.
6068                  * We still block if tracing is disabled, but we have never
6069                  * read anything. This allows a user to cat this file, and
6070                  * then enable tracing. But after we have read something,
6071                  * we give an EOF when tracing is again disabled.
6072                  *
6073                  * iter->pos will be 0 if we haven't read anything.
6074                  */
6075                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6076                         break;
6077
6078                 mutex_unlock(&iter->mutex);
6079
6080                 ret = wait_on_pipe(iter, 0);
6081
6082                 mutex_lock(&iter->mutex);
6083
6084                 if (ret)
6085                         return ret;
6086         }
6087
6088         return 1;
6089 }
6090
6091 /*
6092  * Consumer reader.
6093  */
6094 static ssize_t
6095 tracing_read_pipe(struct file *filp, char __user *ubuf,
6096                   size_t cnt, loff_t *ppos)
6097 {
6098         struct trace_iterator *iter = filp->private_data;
6099         ssize_t sret;
6100
6101         /*
6102          * Avoid more than one consumer on a single file descriptor
6103          * This is just a matter of traces coherency, the ring buffer itself
6104          * is protected.
6105          */
6106         mutex_lock(&iter->mutex);
6107
6108         /* return any leftover data */
6109         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6110         if (sret != -EBUSY)
6111                 goto out;
6112
6113         trace_seq_init(&iter->seq);
6114
6115         if (iter->trace->read) {
6116                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6117                 if (sret)
6118                         goto out;
6119         }
6120
6121 waitagain:
6122         sret = tracing_wait_pipe(filp);
6123         if (sret <= 0)
6124                 goto out;
6125
6126         /* stop when tracing is finished */
6127         if (trace_empty(iter)) {
6128                 sret = 0;
6129                 goto out;
6130         }
6131
6132         if (cnt >= PAGE_SIZE)
6133                 cnt = PAGE_SIZE - 1;
6134
6135         /* reset all but tr, trace, and overruns */
6136         memset(&iter->seq, 0,
6137                sizeof(struct trace_iterator) -
6138                offsetof(struct trace_iterator, seq));
6139         cpumask_clear(iter->started);
6140         trace_seq_init(&iter->seq);
6141         iter->pos = -1;
6142
6143         trace_event_read_lock();
6144         trace_access_lock(iter->cpu_file);
6145         while (trace_find_next_entry_inc(iter) != NULL) {
6146                 enum print_line_t ret;
6147                 int save_len = iter->seq.seq.len;
6148
6149                 ret = print_trace_line(iter);
6150                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6151                         /* don't print partial lines */
6152                         iter->seq.seq.len = save_len;
6153                         break;
6154                 }
6155                 if (ret != TRACE_TYPE_NO_CONSUME)
6156                         trace_consume(iter);
6157
6158                 if (trace_seq_used(&iter->seq) >= cnt)
6159                         break;
6160
6161                 /*
6162                  * Setting the full flag means we reached the trace_seq buffer
6163                  * size and we should leave by partial output condition above.
6164                  * One of the trace_seq_* functions is not used properly.
6165                  */
6166                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6167                           iter->ent->type);
6168         }
6169         trace_access_unlock(iter->cpu_file);
6170         trace_event_read_unlock();
6171
6172         /* Now copy what we have to the user */
6173         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6174         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6175                 trace_seq_init(&iter->seq);
6176
6177         /*
6178          * If there was nothing to send to user, in spite of consuming trace
6179          * entries, go back to wait for more entries.
6180          */
6181         if (sret == -EBUSY)
6182                 goto waitagain;
6183
6184 out:
6185         mutex_unlock(&iter->mutex);
6186
6187         return sret;
6188 }
6189
6190 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6191                                      unsigned int idx)
6192 {
6193         __free_page(spd->pages[idx]);
6194 }
6195
6196 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6197         .confirm                = generic_pipe_buf_confirm,
6198         .release                = generic_pipe_buf_release,
6199         .steal                  = generic_pipe_buf_steal,
6200         .get                    = generic_pipe_buf_get,
6201 };
6202
6203 static size_t
6204 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6205 {
6206         size_t count;
6207         int save_len;
6208         int ret;
6209
6210         /* Seq buffer is page-sized, exactly what we need. */
6211         for (;;) {
6212                 save_len = iter->seq.seq.len;
6213                 ret = print_trace_line(iter);
6214
6215                 if (trace_seq_has_overflowed(&iter->seq)) {
6216                         iter->seq.seq.len = save_len;
6217                         break;
6218                 }
6219
6220                 /*
6221                  * This should not be hit, because it should only
6222                  * be set if the iter->seq overflowed. But check it
6223                  * anyway to be safe.
6224                  */
6225                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6226                         iter->seq.seq.len = save_len;
6227                         break;
6228                 }
6229
6230                 count = trace_seq_used(&iter->seq) - save_len;
6231                 if (rem < count) {
6232                         rem = 0;
6233                         iter->seq.seq.len = save_len;
6234                         break;
6235                 }
6236
6237                 if (ret != TRACE_TYPE_NO_CONSUME)
6238                         trace_consume(iter);
6239                 rem -= count;
6240                 if (!trace_find_next_entry_inc(iter))   {
6241                         rem = 0;
6242                         iter->ent = NULL;
6243                         break;
6244                 }
6245         }
6246
6247         return rem;
6248 }
6249
6250 static ssize_t tracing_splice_read_pipe(struct file *filp,
6251                                         loff_t *ppos,
6252                                         struct pipe_inode_info *pipe,
6253                                         size_t len,
6254                                         unsigned int flags)
6255 {
6256         struct page *pages_def[PIPE_DEF_BUFFERS];
6257         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6258         struct trace_iterator *iter = filp->private_data;
6259         struct splice_pipe_desc spd = {
6260                 .pages          = pages_def,
6261                 .partial        = partial_def,
6262                 .nr_pages       = 0, /* This gets updated below. */
6263                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6264                 .ops            = &tracing_pipe_buf_ops,
6265                 .spd_release    = tracing_spd_release_pipe,
6266         };
6267         ssize_t ret;
6268         size_t rem;
6269         unsigned int i;
6270
6271         if (splice_grow_spd(pipe, &spd))
6272                 return -ENOMEM;
6273
6274         mutex_lock(&iter->mutex);
6275
6276         if (iter->trace->splice_read) {
6277                 ret = iter->trace->splice_read(iter, filp,
6278                                                ppos, pipe, len, flags);
6279                 if (ret)
6280                         goto out_err;
6281         }
6282
6283         ret = tracing_wait_pipe(filp);
6284         if (ret <= 0)
6285                 goto out_err;
6286
6287         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6288                 ret = -EFAULT;
6289                 goto out_err;
6290         }
6291
6292         trace_event_read_lock();
6293         trace_access_lock(iter->cpu_file);
6294
6295         /* Fill as many pages as possible. */
6296         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6297                 spd.pages[i] = alloc_page(GFP_KERNEL);
6298                 if (!spd.pages[i])
6299                         break;
6300
6301                 rem = tracing_fill_pipe_page(rem, iter);
6302
6303                 /* Copy the data into the page, so we can start over. */
6304                 ret = trace_seq_to_buffer(&iter->seq,
6305                                           page_address(spd.pages[i]),
6306                                           trace_seq_used(&iter->seq));
6307                 if (ret < 0) {
6308                         __free_page(spd.pages[i]);
6309                         break;
6310                 }
6311                 spd.partial[i].offset = 0;
6312                 spd.partial[i].len = trace_seq_used(&iter->seq);
6313
6314                 trace_seq_init(&iter->seq);
6315         }
6316
6317         trace_access_unlock(iter->cpu_file);
6318         trace_event_read_unlock();
6319         mutex_unlock(&iter->mutex);
6320
6321         spd.nr_pages = i;
6322
6323         if (i)
6324                 ret = splice_to_pipe(pipe, &spd);
6325         else
6326                 ret = 0;
6327 out:
6328         splice_shrink_spd(&spd);
6329         return ret;
6330
6331 out_err:
6332         mutex_unlock(&iter->mutex);
6333         goto out;
6334 }
6335
6336 static ssize_t
6337 tracing_entries_read(struct file *filp, char __user *ubuf,
6338                      size_t cnt, loff_t *ppos)
6339 {
6340         struct inode *inode = file_inode(filp);
6341         struct trace_array *tr = inode->i_private;
6342         int cpu = tracing_get_cpu(inode);
6343         char buf[64];
6344         int r = 0;
6345         ssize_t ret;
6346
6347         mutex_lock(&trace_types_lock);
6348
6349         if (cpu == RING_BUFFER_ALL_CPUS) {
6350                 int cpu, buf_size_same;
6351                 unsigned long size;
6352
6353                 size = 0;
6354                 buf_size_same = 1;
6355                 /* check if all cpu sizes are same */
6356                 for_each_tracing_cpu(cpu) {
6357                         /* fill in the size from first enabled cpu */
6358                         if (size == 0)
6359                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6360                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6361                                 buf_size_same = 0;
6362                                 break;
6363                         }
6364                 }
6365
6366                 if (buf_size_same) {
6367                         if (!ring_buffer_expanded)
6368                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6369                                             size >> 10,
6370                                             trace_buf_size >> 10);
6371                         else
6372                                 r = sprintf(buf, "%lu\n", size >> 10);
6373                 } else
6374                         r = sprintf(buf, "X\n");
6375         } else
6376                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6377
6378         mutex_unlock(&trace_types_lock);
6379
6380         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6381         return ret;
6382 }
6383
6384 static ssize_t
6385 tracing_entries_write(struct file *filp, const char __user *ubuf,
6386                       size_t cnt, loff_t *ppos)
6387 {
6388         struct inode *inode = file_inode(filp);
6389         struct trace_array *tr = inode->i_private;
6390         unsigned long val;
6391         int ret;
6392
6393         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6394         if (ret)
6395                 return ret;
6396
6397         /* must have at least 1 entry */
6398         if (!val)
6399                 return -EINVAL;
6400
6401         /* value is in KB */
6402         val <<= 10;
6403         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6404         if (ret < 0)
6405                 return ret;
6406
6407         *ppos += cnt;
6408
6409         return cnt;
6410 }
6411
6412 static ssize_t
6413 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6414                                 size_t cnt, loff_t *ppos)
6415 {
6416         struct trace_array *tr = filp->private_data;
6417         char buf[64];
6418         int r, cpu;
6419         unsigned long size = 0, expanded_size = 0;
6420
6421         mutex_lock(&trace_types_lock);
6422         for_each_tracing_cpu(cpu) {
6423                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6424                 if (!ring_buffer_expanded)
6425                         expanded_size += trace_buf_size >> 10;
6426         }
6427         if (ring_buffer_expanded)
6428                 r = sprintf(buf, "%lu\n", size);
6429         else
6430                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6431         mutex_unlock(&trace_types_lock);
6432
6433         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6434 }
6435
6436 static ssize_t
6437 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6438                           size_t cnt, loff_t *ppos)
6439 {
6440         /*
6441          * There is no need to read what the user has written, this function
6442          * is just to make sure that there is no error when "echo" is used
6443          */
6444
6445         *ppos += cnt;
6446
6447         return cnt;
6448 }
6449
6450 static int
6451 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6452 {
6453         struct trace_array *tr = inode->i_private;
6454
6455         /* disable tracing ? */
6456         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6457                 tracer_tracing_off(tr);
6458         /* resize the ring buffer to 0 */
6459         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6460
6461         trace_array_put(tr);
6462
6463         return 0;
6464 }
6465
6466 static ssize_t
6467 tracing_mark_write(struct file *filp, const char __user *ubuf,
6468                                         size_t cnt, loff_t *fpos)
6469 {
6470         struct trace_array *tr = filp->private_data;
6471         struct ring_buffer_event *event;
6472         enum event_trigger_type tt = ETT_NONE;
6473         struct ring_buffer *buffer;
6474         struct print_entry *entry;
6475         unsigned long irq_flags;
6476         ssize_t written;
6477         int size;
6478         int len;
6479
6480 /* Used in tracing_mark_raw_write() as well */
6481 #define FAULTED_STR "<faulted>"
6482 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6483
6484         if (tracing_disabled)
6485                 return -EINVAL;
6486
6487         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6488                 return -EINVAL;
6489
6490         if (cnt > TRACE_BUF_SIZE)
6491                 cnt = TRACE_BUF_SIZE;
6492
6493         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6494
6495         local_save_flags(irq_flags);
6496         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6497
6498         /* If less than "<faulted>", then make sure we can still add that */
6499         if (cnt < FAULTED_SIZE)
6500                 size += FAULTED_SIZE - cnt;
6501
6502         buffer = tr->trace_buffer.buffer;
6503         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6504                                             irq_flags, preempt_count());
6505         if (unlikely(!event))
6506                 /* Ring buffer disabled, return as if not open for write */
6507                 return -EBADF;
6508
6509         entry = ring_buffer_event_data(event);
6510         entry->ip = _THIS_IP_;
6511
6512         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6513         if (len) {
6514                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6515                 cnt = FAULTED_SIZE;
6516                 written = -EFAULT;
6517         } else
6518                 written = cnt;
6519         len = cnt;
6520
6521         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6522                 /* do not add \n before testing triggers, but add \0 */
6523                 entry->buf[cnt] = '\0';
6524                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6525         }
6526
6527         if (entry->buf[cnt - 1] != '\n') {
6528                 entry->buf[cnt] = '\n';
6529                 entry->buf[cnt + 1] = '\0';
6530         } else
6531                 entry->buf[cnt] = '\0';
6532
6533         __buffer_unlock_commit(buffer, event);
6534
6535         if (tt)
6536                 event_triggers_post_call(tr->trace_marker_file, tt);
6537
6538         if (written > 0)
6539                 *fpos += written;
6540
6541         return written;
6542 }
6543
6544 /* Limit it for now to 3K (including tag) */
6545 #define RAW_DATA_MAX_SIZE (1024*3)
6546
6547 static ssize_t
6548 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6549                                         size_t cnt, loff_t *fpos)
6550 {
6551         struct trace_array *tr = filp->private_data;
6552         struct ring_buffer_event *event;
6553         struct ring_buffer *buffer;
6554         struct raw_data_entry *entry;
6555         unsigned long irq_flags;
6556         ssize_t written;
6557         int size;
6558         int len;
6559
6560 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6561
6562         if (tracing_disabled)
6563                 return -EINVAL;
6564
6565         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6566                 return -EINVAL;
6567
6568         /* The marker must at least have a tag id */
6569         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6570                 return -EINVAL;
6571
6572         if (cnt > TRACE_BUF_SIZE)
6573                 cnt = TRACE_BUF_SIZE;
6574
6575         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6576
6577         local_save_flags(irq_flags);
6578         size = sizeof(*entry) + cnt;
6579         if (cnt < FAULT_SIZE_ID)
6580                 size += FAULT_SIZE_ID - cnt;
6581
6582         buffer = tr->trace_buffer.buffer;
6583         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6584                                             irq_flags, preempt_count());
6585         if (!event)
6586                 /* Ring buffer disabled, return as if not open for write */
6587                 return -EBADF;
6588
6589         entry = ring_buffer_event_data(event);
6590
6591         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6592         if (len) {
6593                 entry->id = -1;
6594                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6595                 written = -EFAULT;
6596         } else
6597                 written = cnt;
6598
6599         __buffer_unlock_commit(buffer, event);
6600
6601         if (written > 0)
6602                 *fpos += written;
6603
6604         return written;
6605 }
6606
6607 static int tracing_clock_show(struct seq_file *m, void *v)
6608 {
6609         struct trace_array *tr = m->private;
6610         int i;
6611
6612         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6613                 seq_printf(m,
6614                         "%s%s%s%s", i ? " " : "",
6615                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6616                         i == tr->clock_id ? "]" : "");
6617         seq_putc(m, '\n');
6618
6619         return 0;
6620 }
6621
6622 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6623 {
6624         int i;
6625
6626         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6627                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6628                         break;
6629         }
6630         if (i == ARRAY_SIZE(trace_clocks))
6631                 return -EINVAL;
6632
6633         mutex_lock(&trace_types_lock);
6634
6635         tr->clock_id = i;
6636
6637         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6638
6639         /*
6640          * New clock may not be consistent with the previous clock.
6641          * Reset the buffer so that it doesn't have incomparable timestamps.
6642          */
6643         tracing_reset_online_cpus(&tr->trace_buffer);
6644
6645 #ifdef CONFIG_TRACER_MAX_TRACE
6646         if (tr->max_buffer.buffer)
6647                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6648         tracing_reset_online_cpus(&tr->max_buffer);
6649 #endif
6650
6651         mutex_unlock(&trace_types_lock);
6652
6653         return 0;
6654 }
6655
6656 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6657                                    size_t cnt, loff_t *fpos)
6658 {
6659         struct seq_file *m = filp->private_data;
6660         struct trace_array *tr = m->private;
6661         char buf[64];
6662         const char *clockstr;
6663         int ret;
6664
6665         if (cnt >= sizeof(buf))
6666                 return -EINVAL;
6667
6668         if (copy_from_user(buf, ubuf, cnt))
6669                 return -EFAULT;
6670
6671         buf[cnt] = 0;
6672
6673         clockstr = strstrip(buf);
6674
6675         ret = tracing_set_clock(tr, clockstr);
6676         if (ret)
6677                 return ret;
6678
6679         *fpos += cnt;
6680
6681         return cnt;
6682 }
6683
6684 static int tracing_clock_open(struct inode *inode, struct file *file)
6685 {
6686         struct trace_array *tr = inode->i_private;
6687         int ret;
6688
6689         ret = tracing_check_open_get_tr(tr);
6690         if (ret)
6691                 return ret;
6692
6693         ret = single_open(file, tracing_clock_show, inode->i_private);
6694         if (ret < 0)
6695                 trace_array_put(tr);
6696
6697         return ret;
6698 }
6699
6700 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6701 {
6702         struct trace_array *tr = m->private;
6703
6704         mutex_lock(&trace_types_lock);
6705
6706         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6707                 seq_puts(m, "delta [absolute]\n");
6708         else
6709                 seq_puts(m, "[delta] absolute\n");
6710
6711         mutex_unlock(&trace_types_lock);
6712
6713         return 0;
6714 }
6715
6716 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6717 {
6718         struct trace_array *tr = inode->i_private;
6719         int ret;
6720
6721         ret = tracing_check_open_get_tr(tr);
6722         if (ret)
6723                 return ret;
6724
6725         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6726         if (ret < 0)
6727                 trace_array_put(tr);
6728
6729         return ret;
6730 }
6731
6732 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6733 {
6734         int ret = 0;
6735
6736         mutex_lock(&trace_types_lock);
6737
6738         if (abs && tr->time_stamp_abs_ref++)
6739                 goto out;
6740
6741         if (!abs) {
6742                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6743                         ret = -EINVAL;
6744                         goto out;
6745                 }
6746
6747                 if (--tr->time_stamp_abs_ref)
6748                         goto out;
6749         }
6750
6751         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6752
6753 #ifdef CONFIG_TRACER_MAX_TRACE
6754         if (tr->max_buffer.buffer)
6755                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6756 #endif
6757  out:
6758         mutex_unlock(&trace_types_lock);
6759
6760         return ret;
6761 }
6762
6763 struct ftrace_buffer_info {
6764         struct trace_iterator   iter;
6765         void                    *spare;
6766         unsigned int            spare_cpu;
6767         unsigned int            read;
6768 };
6769
6770 #ifdef CONFIG_TRACER_SNAPSHOT
6771 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6772 {
6773         struct trace_array *tr = inode->i_private;
6774         struct trace_iterator *iter;
6775         struct seq_file *m;
6776         int ret;
6777
6778         ret = tracing_check_open_get_tr(tr);
6779         if (ret)
6780                 return ret;
6781
6782         if (file->f_mode & FMODE_READ) {
6783                 iter = __tracing_open(inode, file, true);
6784                 if (IS_ERR(iter))
6785                         ret = PTR_ERR(iter);
6786         } else {
6787                 /* Writes still need the seq_file to hold the private data */
6788                 ret = -ENOMEM;
6789                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6790                 if (!m)
6791                         goto out;
6792                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6793                 if (!iter) {
6794                         kfree(m);
6795                         goto out;
6796                 }
6797                 ret = 0;
6798
6799                 iter->tr = tr;
6800                 iter->trace_buffer = &tr->max_buffer;
6801                 iter->cpu_file = tracing_get_cpu(inode);
6802                 m->private = iter;
6803                 file->private_data = m;
6804         }
6805 out:
6806         if (ret < 0)
6807                 trace_array_put(tr);
6808
6809         return ret;
6810 }
6811
6812 static ssize_t
6813 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6814                        loff_t *ppos)
6815 {
6816         struct seq_file *m = filp->private_data;
6817         struct trace_iterator *iter = m->private;
6818         struct trace_array *tr = iter->tr;
6819         unsigned long val;
6820         int ret;
6821
6822         ret = tracing_update_buffers();
6823         if (ret < 0)
6824                 return ret;
6825
6826         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6827         if (ret)
6828                 return ret;
6829
6830         mutex_lock(&trace_types_lock);
6831
6832         if (tr->current_trace->use_max_tr) {
6833                 ret = -EBUSY;
6834                 goto out;
6835         }
6836
6837         arch_spin_lock(&tr->max_lock);
6838         if (tr->cond_snapshot)
6839                 ret = -EBUSY;
6840         arch_spin_unlock(&tr->max_lock);
6841         if (ret)
6842                 goto out;
6843
6844         switch (val) {
6845         case 0:
6846                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6847                         ret = -EINVAL;
6848                         break;
6849                 }
6850                 if (tr->allocated_snapshot)
6851                         free_snapshot(tr);
6852                 break;
6853         case 1:
6854 /* Only allow per-cpu swap if the ring buffer supports it */
6855 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6856                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6857                         ret = -EINVAL;
6858                         break;
6859                 }
6860 #endif
6861                 if (tr->allocated_snapshot)
6862                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6863                                         &tr->trace_buffer, iter->cpu_file);
6864                 else
6865                         ret = tracing_alloc_snapshot_instance(tr);
6866                 if (ret < 0)
6867                         break;
6868                 local_irq_disable();
6869                 /* Now, we're going to swap */
6870                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6871                         update_max_tr(tr, current, smp_processor_id(), NULL);
6872                 else
6873                         update_max_tr_single(tr, current, iter->cpu_file);
6874                 local_irq_enable();
6875                 break;
6876         default:
6877                 if (tr->allocated_snapshot) {
6878                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6879                                 tracing_reset_online_cpus(&tr->max_buffer);
6880                         else
6881                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6882                 }
6883                 break;
6884         }
6885
6886         if (ret >= 0) {
6887                 *ppos += cnt;
6888                 ret = cnt;
6889         }
6890 out:
6891         mutex_unlock(&trace_types_lock);
6892         return ret;
6893 }
6894
6895 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6896 {
6897         struct seq_file *m = file->private_data;
6898         int ret;
6899
6900         ret = tracing_release(inode, file);
6901
6902         if (file->f_mode & FMODE_READ)
6903                 return ret;
6904
6905         /* If write only, the seq_file is just a stub */
6906         if (m)
6907                 kfree(m->private);
6908         kfree(m);
6909
6910         return 0;
6911 }
6912
6913 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6914 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6915                                     size_t count, loff_t *ppos);
6916 static int tracing_buffers_release(struct inode *inode, struct file *file);
6917 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6918                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6919
6920 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6921 {
6922         struct ftrace_buffer_info *info;
6923         int ret;
6924
6925         /* The following checks for tracefs lockdown */
6926         ret = tracing_buffers_open(inode, filp);
6927         if (ret < 0)
6928                 return ret;
6929
6930         info = filp->private_data;
6931
6932         if (info->iter.trace->use_max_tr) {
6933                 tracing_buffers_release(inode, filp);
6934                 return -EBUSY;
6935         }
6936
6937         info->iter.snapshot = true;
6938         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6939
6940         return ret;
6941 }
6942
6943 #endif /* CONFIG_TRACER_SNAPSHOT */
6944
6945
6946 static const struct file_operations tracing_thresh_fops = {
6947         .open           = tracing_open_generic,
6948         .read           = tracing_thresh_read,
6949         .write          = tracing_thresh_write,
6950         .llseek         = generic_file_llseek,
6951 };
6952
6953 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6954 static const struct file_operations tracing_max_lat_fops = {
6955         .open           = tracing_open_generic,
6956         .read           = tracing_max_lat_read,
6957         .write          = tracing_max_lat_write,
6958         .llseek         = generic_file_llseek,
6959 };
6960 #endif
6961
6962 static const struct file_operations set_tracer_fops = {
6963         .open           = tracing_open_generic,
6964         .read           = tracing_set_trace_read,
6965         .write          = tracing_set_trace_write,
6966         .llseek         = generic_file_llseek,
6967 };
6968
6969 static const struct file_operations tracing_pipe_fops = {
6970         .open           = tracing_open_pipe,
6971         .poll           = tracing_poll_pipe,
6972         .read           = tracing_read_pipe,
6973         .splice_read    = tracing_splice_read_pipe,
6974         .release        = tracing_release_pipe,
6975         .llseek         = no_llseek,
6976 };
6977
6978 static const struct file_operations tracing_entries_fops = {
6979         .open           = tracing_open_generic_tr,
6980         .read           = tracing_entries_read,
6981         .write          = tracing_entries_write,
6982         .llseek         = generic_file_llseek,
6983         .release        = tracing_release_generic_tr,
6984 };
6985
6986 static const struct file_operations tracing_total_entries_fops = {
6987         .open           = tracing_open_generic_tr,
6988         .read           = tracing_total_entries_read,
6989         .llseek         = generic_file_llseek,
6990         .release        = tracing_release_generic_tr,
6991 };
6992
6993 static const struct file_operations tracing_free_buffer_fops = {
6994         .open           = tracing_open_generic_tr,
6995         .write          = tracing_free_buffer_write,
6996         .release        = tracing_free_buffer_release,
6997 };
6998
6999 static const struct file_operations tracing_mark_fops = {
7000         .open           = tracing_open_generic_tr,
7001         .write          = tracing_mark_write,
7002         .llseek         = generic_file_llseek,
7003         .release        = tracing_release_generic_tr,
7004 };
7005
7006 static const struct file_operations tracing_mark_raw_fops = {
7007         .open           = tracing_open_generic_tr,
7008         .write          = tracing_mark_raw_write,
7009         .llseek         = generic_file_llseek,
7010         .release        = tracing_release_generic_tr,
7011 };
7012
7013 static const struct file_operations trace_clock_fops = {
7014         .open           = tracing_clock_open,
7015         .read           = seq_read,
7016         .llseek         = seq_lseek,
7017         .release        = tracing_single_release_tr,
7018         .write          = tracing_clock_write,
7019 };
7020
7021 static const struct file_operations trace_time_stamp_mode_fops = {
7022         .open           = tracing_time_stamp_mode_open,
7023         .read           = seq_read,
7024         .llseek         = seq_lseek,
7025         .release        = tracing_single_release_tr,
7026 };
7027
7028 #ifdef CONFIG_TRACER_SNAPSHOT
7029 static const struct file_operations snapshot_fops = {
7030         .open           = tracing_snapshot_open,
7031         .read           = seq_read,
7032         .write          = tracing_snapshot_write,
7033         .llseek         = tracing_lseek,
7034         .release        = tracing_snapshot_release,
7035 };
7036
7037 static const struct file_operations snapshot_raw_fops = {
7038         .open           = snapshot_raw_open,
7039         .read           = tracing_buffers_read,
7040         .release        = tracing_buffers_release,
7041         .splice_read    = tracing_buffers_splice_read,
7042         .llseek         = no_llseek,
7043 };
7044
7045 #endif /* CONFIG_TRACER_SNAPSHOT */
7046
7047 #define TRACING_LOG_ERRS_MAX    8
7048 #define TRACING_LOG_LOC_MAX     128
7049
7050 #define CMD_PREFIX "  Command: "
7051
7052 struct err_info {
7053         const char      **errs; /* ptr to loc-specific array of err strings */
7054         u8              type;   /* index into errs -> specific err string */
7055         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7056         u64             ts;
7057 };
7058
7059 struct tracing_log_err {
7060         struct list_head        list;
7061         struct err_info         info;
7062         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7063         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7064 };
7065
7066 static DEFINE_MUTEX(tracing_err_log_lock);
7067
7068 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7069 {
7070         struct tracing_log_err *err;
7071
7072         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7073                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7074                 if (!err)
7075                         err = ERR_PTR(-ENOMEM);
7076                 tr->n_err_log_entries++;
7077
7078                 return err;
7079         }
7080
7081         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7082         list_del(&err->list);
7083
7084         return err;
7085 }
7086
7087 /**
7088  * err_pos - find the position of a string within a command for error careting
7089  * @cmd: The tracing command that caused the error
7090  * @str: The string to position the caret at within @cmd
7091  *
7092  * Finds the position of the first occurence of @str within @cmd.  The
7093  * return value can be passed to tracing_log_err() for caret placement
7094  * within @cmd.
7095  *
7096  * Returns the index within @cmd of the first occurence of @str or 0
7097  * if @str was not found.
7098  */
7099 unsigned int err_pos(char *cmd, const char *str)
7100 {
7101         char *found;
7102
7103         if (WARN_ON(!strlen(cmd)))
7104                 return 0;
7105
7106         found = strstr(cmd, str);
7107         if (found)
7108                 return found - cmd;
7109
7110         return 0;
7111 }
7112
7113 /**
7114  * tracing_log_err - write an error to the tracing error log
7115  * @tr: The associated trace array for the error (NULL for top level array)
7116  * @loc: A string describing where the error occurred
7117  * @cmd: The tracing command that caused the error
7118  * @errs: The array of loc-specific static error strings
7119  * @type: The index into errs[], which produces the specific static err string
7120  * @pos: The position the caret should be placed in the cmd
7121  *
7122  * Writes an error into tracing/error_log of the form:
7123  *
7124  * <loc>: error: <text>
7125  *   Command: <cmd>
7126  *              ^
7127  *
7128  * tracing/error_log is a small log file containing the last
7129  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7130  * unless there has been a tracing error, and the error log can be
7131  * cleared and have its memory freed by writing the empty string in
7132  * truncation mode to it i.e. echo > tracing/error_log.
7133  *
7134  * NOTE: the @errs array along with the @type param are used to
7135  * produce a static error string - this string is not copied and saved
7136  * when the error is logged - only a pointer to it is saved.  See
7137  * existing callers for examples of how static strings are typically
7138  * defined for use with tracing_log_err().
7139  */
7140 void tracing_log_err(struct trace_array *tr,
7141                      const char *loc, const char *cmd,
7142                      const char **errs, u8 type, u8 pos)
7143 {
7144         struct tracing_log_err *err;
7145
7146         if (!tr)
7147                 tr = &global_trace;
7148
7149         mutex_lock(&tracing_err_log_lock);
7150         err = get_tracing_log_err(tr);
7151         if (PTR_ERR(err) == -ENOMEM) {
7152                 mutex_unlock(&tracing_err_log_lock);
7153                 return;
7154         }
7155
7156         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7157         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7158
7159         err->info.errs = errs;
7160         err->info.type = type;
7161         err->info.pos = pos;
7162         err->info.ts = local_clock();
7163
7164         list_add_tail(&err->list, &tr->err_log);
7165         mutex_unlock(&tracing_err_log_lock);
7166 }
7167
7168 static void clear_tracing_err_log(struct trace_array *tr)
7169 {
7170         struct tracing_log_err *err, *next;
7171
7172         mutex_lock(&tracing_err_log_lock);
7173         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7174                 list_del(&err->list);
7175                 kfree(err);
7176         }
7177
7178         tr->n_err_log_entries = 0;
7179         mutex_unlock(&tracing_err_log_lock);
7180 }
7181
7182 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7183 {
7184         struct trace_array *tr = m->private;
7185
7186         mutex_lock(&tracing_err_log_lock);
7187
7188         return seq_list_start(&tr->err_log, *pos);
7189 }
7190
7191 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7192 {
7193         struct trace_array *tr = m->private;
7194
7195         return seq_list_next(v, &tr->err_log, pos);
7196 }
7197
7198 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7199 {
7200         mutex_unlock(&tracing_err_log_lock);
7201 }
7202
7203 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7204 {
7205         u8 i;
7206
7207         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7208                 seq_putc(m, ' ');
7209         for (i = 0; i < pos; i++)
7210                 seq_putc(m, ' ');
7211         seq_puts(m, "^\n");
7212 }
7213
7214 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7215 {
7216         struct tracing_log_err *err = v;
7217
7218         if (err) {
7219                 const char *err_text = err->info.errs[err->info.type];
7220                 u64 sec = err->info.ts;
7221                 u32 nsec;
7222
7223                 nsec = do_div(sec, NSEC_PER_SEC);
7224                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7225                            err->loc, err_text);
7226                 seq_printf(m, "%s", err->cmd);
7227                 tracing_err_log_show_pos(m, err->info.pos);
7228         }
7229
7230         return 0;
7231 }
7232
7233 static const struct seq_operations tracing_err_log_seq_ops = {
7234         .start  = tracing_err_log_seq_start,
7235         .next   = tracing_err_log_seq_next,
7236         .stop   = tracing_err_log_seq_stop,
7237         .show   = tracing_err_log_seq_show
7238 };
7239
7240 static int tracing_err_log_open(struct inode *inode, struct file *file)
7241 {
7242         struct trace_array *tr = inode->i_private;
7243         int ret = 0;
7244
7245         ret = tracing_check_open_get_tr(tr);
7246         if (ret)
7247                 return ret;
7248
7249         /* If this file was opened for write, then erase contents */
7250         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7251                 clear_tracing_err_log(tr);
7252
7253         if (file->f_mode & FMODE_READ) {
7254                 ret = seq_open(file, &tracing_err_log_seq_ops);
7255                 if (!ret) {
7256                         struct seq_file *m = file->private_data;
7257                         m->private = tr;
7258                 } else {
7259                         trace_array_put(tr);
7260                 }
7261         }
7262         return ret;
7263 }
7264
7265 static ssize_t tracing_err_log_write(struct file *file,
7266                                      const char __user *buffer,
7267                                      size_t count, loff_t *ppos)
7268 {
7269         return count;
7270 }
7271
7272 static int tracing_err_log_release(struct inode *inode, struct file *file)
7273 {
7274         struct trace_array *tr = inode->i_private;
7275
7276         trace_array_put(tr);
7277
7278         if (file->f_mode & FMODE_READ)
7279                 seq_release(inode, file);
7280
7281         return 0;
7282 }
7283
7284 static const struct file_operations tracing_err_log_fops = {
7285         .open           = tracing_err_log_open,
7286         .write          = tracing_err_log_write,
7287         .read           = seq_read,
7288         .llseek         = seq_lseek,
7289         .release        = tracing_err_log_release,
7290 };
7291
7292 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7293 {
7294         struct trace_array *tr = inode->i_private;
7295         struct ftrace_buffer_info *info;
7296         int ret;
7297
7298         ret = tracing_check_open_get_tr(tr);
7299         if (ret)
7300                 return ret;
7301
7302         info = kzalloc(sizeof(*info), GFP_KERNEL);
7303         if (!info) {
7304                 trace_array_put(tr);
7305                 return -ENOMEM;
7306         }
7307
7308         mutex_lock(&trace_types_lock);
7309
7310         info->iter.tr           = tr;
7311         info->iter.cpu_file     = tracing_get_cpu(inode);
7312         info->iter.trace        = tr->current_trace;
7313         info->iter.trace_buffer = &tr->trace_buffer;
7314         info->spare             = NULL;
7315         /* Force reading ring buffer for first read */
7316         info->read              = (unsigned int)-1;
7317
7318         filp->private_data = info;
7319
7320         tr->current_trace->ref++;
7321
7322         mutex_unlock(&trace_types_lock);
7323
7324         ret = nonseekable_open(inode, filp);
7325         if (ret < 0)
7326                 trace_array_put(tr);
7327
7328         return ret;
7329 }
7330
7331 static __poll_t
7332 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7333 {
7334         struct ftrace_buffer_info *info = filp->private_data;
7335         struct trace_iterator *iter = &info->iter;
7336
7337         return trace_poll(iter, filp, poll_table);
7338 }
7339
7340 static ssize_t
7341 tracing_buffers_read(struct file *filp, char __user *ubuf,
7342                      size_t count, loff_t *ppos)
7343 {
7344         struct ftrace_buffer_info *info = filp->private_data;
7345         struct trace_iterator *iter = &info->iter;
7346         ssize_t ret = 0;
7347         ssize_t size;
7348
7349         if (!count)
7350                 return 0;
7351
7352 #ifdef CONFIG_TRACER_MAX_TRACE
7353         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7354                 return -EBUSY;
7355 #endif
7356
7357         if (!info->spare) {
7358                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7359                                                           iter->cpu_file);
7360                 if (IS_ERR(info->spare)) {
7361                         ret = PTR_ERR(info->spare);
7362                         info->spare = NULL;
7363                 } else {
7364                         info->spare_cpu = iter->cpu_file;
7365                 }
7366         }
7367         if (!info->spare)
7368                 return ret;
7369
7370         /* Do we have previous read data to read? */
7371         if (info->read < PAGE_SIZE)
7372                 goto read;
7373
7374  again:
7375         trace_access_lock(iter->cpu_file);
7376         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7377                                     &info->spare,
7378                                     count,
7379                                     iter->cpu_file, 0);
7380         trace_access_unlock(iter->cpu_file);
7381
7382         if (ret < 0) {
7383                 if (trace_empty(iter)) {
7384                         if ((filp->f_flags & O_NONBLOCK))
7385                                 return -EAGAIN;
7386
7387                         ret = wait_on_pipe(iter, 0);
7388                         if (ret)
7389                                 return ret;
7390
7391                         goto again;
7392                 }
7393                 return 0;
7394         }
7395
7396         info->read = 0;
7397  read:
7398         size = PAGE_SIZE - info->read;
7399         if (size > count)
7400                 size = count;
7401
7402         ret = copy_to_user(ubuf, info->spare + info->read, size);
7403         if (ret == size)
7404                 return -EFAULT;
7405
7406         size -= ret;
7407
7408         *ppos += size;
7409         info->read += size;
7410
7411         return size;
7412 }
7413
7414 static int tracing_buffers_release(struct inode *inode, struct file *file)
7415 {
7416         struct ftrace_buffer_info *info = file->private_data;
7417         struct trace_iterator *iter = &info->iter;
7418
7419         mutex_lock(&trace_types_lock);
7420
7421         iter->tr->current_trace->ref--;
7422
7423         __trace_array_put(iter->tr);
7424
7425         if (info->spare)
7426                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7427                                            info->spare_cpu, info->spare);
7428         kfree(info);
7429
7430         mutex_unlock(&trace_types_lock);
7431
7432         return 0;
7433 }
7434
7435 struct buffer_ref {
7436         struct ring_buffer      *buffer;
7437         void                    *page;
7438         int                     cpu;
7439         refcount_t              refcount;
7440 };
7441
7442 static void buffer_ref_release(struct buffer_ref *ref)
7443 {
7444         if (!refcount_dec_and_test(&ref->refcount))
7445                 return;
7446         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7447         kfree(ref);
7448 }
7449
7450 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7451                                     struct pipe_buffer *buf)
7452 {
7453         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7454
7455         buffer_ref_release(ref);
7456         buf->private = 0;
7457 }
7458
7459 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7460                                 struct pipe_buffer *buf)
7461 {
7462         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7463
7464         if (refcount_read(&ref->refcount) > INT_MAX/2)
7465                 return false;
7466
7467         refcount_inc(&ref->refcount);
7468         return true;
7469 }
7470
7471 /* Pipe buffer operations for a buffer. */
7472 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7473         .confirm                = generic_pipe_buf_confirm,
7474         .release                = buffer_pipe_buf_release,
7475         .steal                  = generic_pipe_buf_nosteal,
7476         .get                    = buffer_pipe_buf_get,
7477 };
7478
7479 /*
7480  * Callback from splice_to_pipe(), if we need to release some pages
7481  * at the end of the spd in case we error'ed out in filling the pipe.
7482  */
7483 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7484 {
7485         struct buffer_ref *ref =
7486                 (struct buffer_ref *)spd->partial[i].private;
7487
7488         buffer_ref_release(ref);
7489         spd->partial[i].private = 0;
7490 }
7491
7492 static ssize_t
7493 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7494                             struct pipe_inode_info *pipe, size_t len,
7495                             unsigned int flags)
7496 {
7497         struct ftrace_buffer_info *info = file->private_data;
7498         struct trace_iterator *iter = &info->iter;
7499         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7500         struct page *pages_def[PIPE_DEF_BUFFERS];
7501         struct splice_pipe_desc spd = {
7502                 .pages          = pages_def,
7503                 .partial        = partial_def,
7504                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7505                 .ops            = &buffer_pipe_buf_ops,
7506                 .spd_release    = buffer_spd_release,
7507         };
7508         struct buffer_ref *ref;
7509         int entries, i;
7510         ssize_t ret = 0;
7511
7512 #ifdef CONFIG_TRACER_MAX_TRACE
7513         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7514                 return -EBUSY;
7515 #endif
7516
7517         if (*ppos & (PAGE_SIZE - 1))
7518                 return -EINVAL;
7519
7520         if (len & (PAGE_SIZE - 1)) {
7521                 if (len < PAGE_SIZE)
7522                         return -EINVAL;
7523                 len &= PAGE_MASK;
7524         }
7525
7526         if (splice_grow_spd(pipe, &spd))
7527                 return -ENOMEM;
7528
7529  again:
7530         trace_access_lock(iter->cpu_file);
7531         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7532
7533         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7534                 struct page *page;
7535                 int r;
7536
7537                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7538                 if (!ref) {
7539                         ret = -ENOMEM;
7540                         break;
7541                 }
7542
7543                 refcount_set(&ref->refcount, 1);
7544                 ref->buffer = iter->trace_buffer->buffer;
7545                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7546                 if (IS_ERR(ref->page)) {
7547                         ret = PTR_ERR(ref->page);
7548                         ref->page = NULL;
7549                         kfree(ref);
7550                         break;
7551                 }
7552                 ref->cpu = iter->cpu_file;
7553
7554                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7555                                           len, iter->cpu_file, 1);
7556                 if (r < 0) {
7557                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7558                                                    ref->page);
7559                         kfree(ref);
7560                         break;
7561                 }
7562
7563                 page = virt_to_page(ref->page);
7564
7565                 spd.pages[i] = page;
7566                 spd.partial[i].len = PAGE_SIZE;
7567                 spd.partial[i].offset = 0;
7568                 spd.partial[i].private = (unsigned long)ref;
7569                 spd.nr_pages++;
7570                 *ppos += PAGE_SIZE;
7571
7572                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7573         }
7574
7575         trace_access_unlock(iter->cpu_file);
7576         spd.nr_pages = i;
7577
7578         /* did we read anything? */
7579         if (!spd.nr_pages) {
7580                 if (ret)
7581                         goto out;
7582
7583                 ret = -EAGAIN;
7584                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7585                         goto out;
7586
7587                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7588                 if (ret)
7589                         goto out;
7590
7591                 goto again;
7592         }
7593
7594         ret = splice_to_pipe(pipe, &spd);
7595 out:
7596         splice_shrink_spd(&spd);
7597
7598         return ret;
7599 }
7600
7601 static const struct file_operations tracing_buffers_fops = {
7602         .open           = tracing_buffers_open,
7603         .read           = tracing_buffers_read,
7604         .poll           = tracing_buffers_poll,
7605         .release        = tracing_buffers_release,
7606         .splice_read    = tracing_buffers_splice_read,
7607         .llseek         = no_llseek,
7608 };
7609
7610 static ssize_t
7611 tracing_stats_read(struct file *filp, char __user *ubuf,
7612                    size_t count, loff_t *ppos)
7613 {
7614         struct inode *inode = file_inode(filp);
7615         struct trace_array *tr = inode->i_private;
7616         struct trace_buffer *trace_buf = &tr->trace_buffer;
7617         int cpu = tracing_get_cpu(inode);
7618         struct trace_seq *s;
7619         unsigned long cnt;
7620         unsigned long long t;
7621         unsigned long usec_rem;
7622
7623         s = kmalloc(sizeof(*s), GFP_KERNEL);
7624         if (!s)
7625                 return -ENOMEM;
7626
7627         trace_seq_init(s);
7628
7629         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7630         trace_seq_printf(s, "entries: %ld\n", cnt);
7631
7632         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7633         trace_seq_printf(s, "overrun: %ld\n", cnt);
7634
7635         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7636         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7637
7638         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7639         trace_seq_printf(s, "bytes: %ld\n", cnt);
7640
7641         if (trace_clocks[tr->clock_id].in_ns) {
7642                 /* local or global for trace_clock */
7643                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7644                 usec_rem = do_div(t, USEC_PER_SEC);
7645                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7646                                                                 t, usec_rem);
7647
7648                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7649                 usec_rem = do_div(t, USEC_PER_SEC);
7650                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7651         } else {
7652                 /* counter or tsc mode for trace_clock */
7653                 trace_seq_printf(s, "oldest event ts: %llu\n",
7654                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7655
7656                 trace_seq_printf(s, "now ts: %llu\n",
7657                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7658         }
7659
7660         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7661         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7662
7663         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7664         trace_seq_printf(s, "read events: %ld\n", cnt);
7665
7666         count = simple_read_from_buffer(ubuf, count, ppos,
7667                                         s->buffer, trace_seq_used(s));
7668
7669         kfree(s);
7670
7671         return count;
7672 }
7673
7674 static const struct file_operations tracing_stats_fops = {
7675         .open           = tracing_open_generic_tr,
7676         .read           = tracing_stats_read,
7677         .llseek         = generic_file_llseek,
7678         .release        = tracing_release_generic_tr,
7679 };
7680
7681 #ifdef CONFIG_DYNAMIC_FTRACE
7682
7683 static ssize_t
7684 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7685                   size_t cnt, loff_t *ppos)
7686 {
7687         ssize_t ret;
7688         char *buf;
7689         int r;
7690
7691         /* 256 should be plenty to hold the amount needed */
7692         buf = kmalloc(256, GFP_KERNEL);
7693         if (!buf)
7694                 return -ENOMEM;
7695
7696         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7697                       ftrace_update_tot_cnt,
7698                       ftrace_number_of_pages,
7699                       ftrace_number_of_groups);
7700
7701         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7702         kfree(buf);
7703         return ret;
7704 }
7705
7706 static const struct file_operations tracing_dyn_info_fops = {
7707         .open           = tracing_open_generic,
7708         .read           = tracing_read_dyn_info,
7709         .llseek         = generic_file_llseek,
7710 };
7711 #endif /* CONFIG_DYNAMIC_FTRACE */
7712
7713 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7714 static void
7715 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7716                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7717                 void *data)
7718 {
7719         tracing_snapshot_instance(tr);
7720 }
7721
7722 static void
7723 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7724                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7725                       void *data)
7726 {
7727         struct ftrace_func_mapper *mapper = data;
7728         long *count = NULL;
7729
7730         if (mapper)
7731                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7732
7733         if (count) {
7734
7735                 if (*count <= 0)
7736                         return;
7737
7738                 (*count)--;
7739         }
7740
7741         tracing_snapshot_instance(tr);
7742 }
7743
7744 static int
7745 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7746                       struct ftrace_probe_ops *ops, void *data)
7747 {
7748         struct ftrace_func_mapper *mapper = data;
7749         long *count = NULL;
7750
7751         seq_printf(m, "%ps:", (void *)ip);
7752
7753         seq_puts(m, "snapshot");
7754
7755         if (mapper)
7756                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7757
7758         if (count)
7759                 seq_printf(m, ":count=%ld\n", *count);
7760         else
7761                 seq_puts(m, ":unlimited\n");
7762
7763         return 0;
7764 }
7765
7766 static int
7767 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7768                      unsigned long ip, void *init_data, void **data)
7769 {
7770         struct ftrace_func_mapper *mapper = *data;
7771
7772         if (!mapper) {
7773                 mapper = allocate_ftrace_func_mapper();
7774                 if (!mapper)
7775                         return -ENOMEM;
7776                 *data = mapper;
7777         }
7778
7779         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7780 }
7781
7782 static void
7783 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7784                      unsigned long ip, void *data)
7785 {
7786         struct ftrace_func_mapper *mapper = data;
7787
7788         if (!ip) {
7789                 if (!mapper)
7790                         return;
7791                 free_ftrace_func_mapper(mapper, NULL);
7792                 return;
7793         }
7794
7795         ftrace_func_mapper_remove_ip(mapper, ip);
7796 }
7797
7798 static struct ftrace_probe_ops snapshot_probe_ops = {
7799         .func                   = ftrace_snapshot,
7800         .print                  = ftrace_snapshot_print,
7801 };
7802
7803 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7804         .func                   = ftrace_count_snapshot,
7805         .print                  = ftrace_snapshot_print,
7806         .init                   = ftrace_snapshot_init,
7807         .free                   = ftrace_snapshot_free,
7808 };
7809
7810 static int
7811 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7812                                char *glob, char *cmd, char *param, int enable)
7813 {
7814         struct ftrace_probe_ops *ops;
7815         void *count = (void *)-1;
7816         char *number;
7817         int ret;
7818
7819         if (!tr)
7820                 return -ENODEV;
7821
7822         /* hash funcs only work with set_ftrace_filter */
7823         if (!enable)
7824                 return -EINVAL;
7825
7826         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7827
7828         if (glob[0] == '!')
7829                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7830
7831         if (!param)
7832                 goto out_reg;
7833
7834         number = strsep(&param, ":");
7835
7836         if (!strlen(number))
7837                 goto out_reg;
7838
7839         /*
7840          * We use the callback data field (which is a pointer)
7841          * as our counter.
7842          */
7843         ret = kstrtoul(number, 0, (unsigned long *)&count);
7844         if (ret)
7845                 return ret;
7846
7847  out_reg:
7848         ret = tracing_alloc_snapshot_instance(tr);
7849         if (ret < 0)
7850                 goto out;
7851
7852         ret = register_ftrace_function_probe(glob, tr, ops, count);
7853
7854  out:
7855         return ret < 0 ? ret : 0;
7856 }
7857
7858 static struct ftrace_func_command ftrace_snapshot_cmd = {
7859         .name                   = "snapshot",
7860         .func                   = ftrace_trace_snapshot_callback,
7861 };
7862
7863 static __init int register_snapshot_cmd(void)
7864 {
7865         return register_ftrace_command(&ftrace_snapshot_cmd);
7866 }
7867 #else
7868 static inline __init int register_snapshot_cmd(void) { return 0; }
7869 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7870
7871 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7872 {
7873         if (WARN_ON(!tr->dir))
7874                 return ERR_PTR(-ENODEV);
7875
7876         /* Top directory uses NULL as the parent */
7877         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7878                 return NULL;
7879
7880         /* All sub buffers have a descriptor */
7881         return tr->dir;
7882 }
7883
7884 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7885 {
7886         struct dentry *d_tracer;
7887
7888         if (tr->percpu_dir)
7889                 return tr->percpu_dir;
7890
7891         d_tracer = tracing_get_dentry(tr);
7892         if (IS_ERR(d_tracer))
7893                 return NULL;
7894
7895         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7896
7897         WARN_ONCE(!tr->percpu_dir,
7898                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7899
7900         return tr->percpu_dir;
7901 }
7902
7903 static struct dentry *
7904 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7905                       void *data, long cpu, const struct file_operations *fops)
7906 {
7907         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7908
7909         if (ret) /* See tracing_get_cpu() */
7910                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7911         return ret;
7912 }
7913
7914 static void
7915 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7916 {
7917         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7918         struct dentry *d_cpu;
7919         char cpu_dir[30]; /* 30 characters should be more than enough */
7920
7921         if (!d_percpu)
7922                 return;
7923
7924         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7925         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7926         if (!d_cpu) {
7927                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7928                 return;
7929         }
7930
7931         /* per cpu trace_pipe */
7932         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7933                                 tr, cpu, &tracing_pipe_fops);
7934
7935         /* per cpu trace */
7936         trace_create_cpu_file("trace", 0644, d_cpu,
7937                                 tr, cpu, &tracing_fops);
7938
7939         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7940                                 tr, cpu, &tracing_buffers_fops);
7941
7942         trace_create_cpu_file("stats", 0444, d_cpu,
7943                                 tr, cpu, &tracing_stats_fops);
7944
7945         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7946                                 tr, cpu, &tracing_entries_fops);
7947
7948 #ifdef CONFIG_TRACER_SNAPSHOT
7949         trace_create_cpu_file("snapshot", 0644, d_cpu,
7950                                 tr, cpu, &snapshot_fops);
7951
7952         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7953                                 tr, cpu, &snapshot_raw_fops);
7954 #endif
7955 }
7956
7957 #ifdef CONFIG_FTRACE_SELFTEST
7958 /* Let selftest have access to static functions in this file */
7959 #include "trace_selftest.c"
7960 #endif
7961
7962 static ssize_t
7963 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7964                         loff_t *ppos)
7965 {
7966         struct trace_option_dentry *topt = filp->private_data;
7967         char *buf;
7968
7969         if (topt->flags->val & topt->opt->bit)
7970                 buf = "1\n";
7971         else
7972                 buf = "0\n";
7973
7974         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7975 }
7976
7977 static ssize_t
7978 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7979                          loff_t *ppos)
7980 {
7981         struct trace_option_dentry *topt = filp->private_data;
7982         unsigned long val;
7983         int ret;
7984
7985         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7986         if (ret)
7987                 return ret;
7988
7989         if (val != 0 && val != 1)
7990                 return -EINVAL;
7991
7992         if (!!(topt->flags->val & topt->opt->bit) != val) {
7993                 mutex_lock(&trace_types_lock);
7994                 ret = __set_tracer_option(topt->tr, topt->flags,
7995                                           topt->opt, !val);
7996                 mutex_unlock(&trace_types_lock);
7997                 if (ret)
7998                         return ret;
7999         }
8000
8001         *ppos += cnt;
8002
8003         return cnt;
8004 }
8005
8006
8007 static const struct file_operations trace_options_fops = {
8008         .open = tracing_open_generic,
8009         .read = trace_options_read,
8010         .write = trace_options_write,
8011         .llseek = generic_file_llseek,
8012 };
8013
8014 /*
8015  * In order to pass in both the trace_array descriptor as well as the index
8016  * to the flag that the trace option file represents, the trace_array
8017  * has a character array of trace_flags_index[], which holds the index
8018  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8019  * The address of this character array is passed to the flag option file
8020  * read/write callbacks.
8021  *
8022  * In order to extract both the index and the trace_array descriptor,
8023  * get_tr_index() uses the following algorithm.
8024  *
8025  *   idx = *ptr;
8026  *
8027  * As the pointer itself contains the address of the index (remember
8028  * index[1] == 1).
8029  *
8030  * Then to get the trace_array descriptor, by subtracting that index
8031  * from the ptr, we get to the start of the index itself.
8032  *
8033  *   ptr - idx == &index[0]
8034  *
8035  * Then a simple container_of() from that pointer gets us to the
8036  * trace_array descriptor.
8037  */
8038 static void get_tr_index(void *data, struct trace_array **ptr,
8039                          unsigned int *pindex)
8040 {
8041         *pindex = *(unsigned char *)data;
8042
8043         *ptr = container_of(data - *pindex, struct trace_array,
8044                             trace_flags_index);
8045 }
8046
8047 static ssize_t
8048 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8049                         loff_t *ppos)
8050 {
8051         void *tr_index = filp->private_data;
8052         struct trace_array *tr;
8053         unsigned int index;
8054         char *buf;
8055
8056         get_tr_index(tr_index, &tr, &index);
8057
8058         if (tr->trace_flags & (1 << index))
8059                 buf = "1\n";
8060         else
8061                 buf = "0\n";
8062
8063         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8064 }
8065
8066 static ssize_t
8067 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8068                          loff_t *ppos)
8069 {
8070         void *tr_index = filp->private_data;
8071         struct trace_array *tr;
8072         unsigned int index;
8073         unsigned long val;
8074         int ret;
8075
8076         get_tr_index(tr_index, &tr, &index);
8077
8078         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8079         if (ret)
8080                 return ret;
8081
8082         if (val != 0 && val != 1)
8083                 return -EINVAL;
8084
8085         mutex_lock(&event_mutex);
8086         mutex_lock(&trace_types_lock);
8087         ret = set_tracer_flag(tr, 1 << index, val);
8088         mutex_unlock(&trace_types_lock);
8089         mutex_unlock(&event_mutex);
8090
8091         if (ret < 0)
8092                 return ret;
8093
8094         *ppos += cnt;
8095
8096         return cnt;
8097 }
8098
8099 static const struct file_operations trace_options_core_fops = {
8100         .open = tracing_open_generic,
8101         .read = trace_options_core_read,
8102         .write = trace_options_core_write,
8103         .llseek = generic_file_llseek,
8104 };
8105
8106 struct dentry *trace_create_file(const char *name,
8107                                  umode_t mode,
8108                                  struct dentry *parent,
8109                                  void *data,
8110                                  const struct file_operations *fops)
8111 {
8112         struct dentry *ret;
8113
8114         ret = tracefs_create_file(name, mode, parent, data, fops);
8115         if (!ret)
8116                 pr_warn("Could not create tracefs '%s' entry\n", name);
8117
8118         return ret;
8119 }
8120
8121
8122 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8123 {
8124         struct dentry *d_tracer;
8125
8126         if (tr->options)
8127                 return tr->options;
8128
8129         d_tracer = tracing_get_dentry(tr);
8130         if (IS_ERR(d_tracer))
8131                 return NULL;
8132
8133         tr->options = tracefs_create_dir("options", d_tracer);
8134         if (!tr->options) {
8135                 pr_warn("Could not create tracefs directory 'options'\n");
8136                 return NULL;
8137         }
8138
8139         return tr->options;
8140 }
8141
8142 static void
8143 create_trace_option_file(struct trace_array *tr,
8144                          struct trace_option_dentry *topt,
8145                          struct tracer_flags *flags,
8146                          struct tracer_opt *opt)
8147 {
8148         struct dentry *t_options;
8149
8150         t_options = trace_options_init_dentry(tr);
8151         if (!t_options)
8152                 return;
8153
8154         topt->flags = flags;
8155         topt->opt = opt;
8156         topt->tr = tr;
8157
8158         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8159                                     &trace_options_fops);
8160
8161 }
8162
8163 static void
8164 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8165 {
8166         struct trace_option_dentry *topts;
8167         struct trace_options *tr_topts;
8168         struct tracer_flags *flags;
8169         struct tracer_opt *opts;
8170         int cnt;
8171         int i;
8172
8173         if (!tracer)
8174                 return;
8175
8176         flags = tracer->flags;
8177
8178         if (!flags || !flags->opts)
8179                 return;
8180
8181         /*
8182          * If this is an instance, only create flags for tracers
8183          * the instance may have.
8184          */
8185         if (!trace_ok_for_array(tracer, tr))
8186                 return;
8187
8188         for (i = 0; i < tr->nr_topts; i++) {
8189                 /* Make sure there's no duplicate flags. */
8190                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8191                         return;
8192         }
8193
8194         opts = flags->opts;
8195
8196         for (cnt = 0; opts[cnt].name; cnt++)
8197                 ;
8198
8199         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8200         if (!topts)
8201                 return;
8202
8203         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8204                             GFP_KERNEL);
8205         if (!tr_topts) {
8206                 kfree(topts);
8207                 return;
8208         }
8209
8210         tr->topts = tr_topts;
8211         tr->topts[tr->nr_topts].tracer = tracer;
8212         tr->topts[tr->nr_topts].topts = topts;
8213         tr->nr_topts++;
8214
8215         for (cnt = 0; opts[cnt].name; cnt++) {
8216                 create_trace_option_file(tr, &topts[cnt], flags,
8217                                          &opts[cnt]);
8218                 WARN_ONCE(topts[cnt].entry == NULL,
8219                           "Failed to create trace option: %s",
8220                           opts[cnt].name);
8221         }
8222 }
8223
8224 static struct dentry *
8225 create_trace_option_core_file(struct trace_array *tr,
8226                               const char *option, long index)
8227 {
8228         struct dentry *t_options;
8229
8230         t_options = trace_options_init_dentry(tr);
8231         if (!t_options)
8232                 return NULL;
8233
8234         return trace_create_file(option, 0644, t_options,
8235                                  (void *)&tr->trace_flags_index[index],
8236                                  &trace_options_core_fops);
8237 }
8238
8239 static void create_trace_options_dir(struct trace_array *tr)
8240 {
8241         struct dentry *t_options;
8242         bool top_level = tr == &global_trace;
8243         int i;
8244
8245         t_options = trace_options_init_dentry(tr);
8246         if (!t_options)
8247                 return;
8248
8249         for (i = 0; trace_options[i]; i++) {
8250                 if (top_level ||
8251                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8252                         create_trace_option_core_file(tr, trace_options[i], i);
8253         }
8254 }
8255
8256 static ssize_t
8257 rb_simple_read(struct file *filp, char __user *ubuf,
8258                size_t cnt, loff_t *ppos)
8259 {
8260         struct trace_array *tr = filp->private_data;
8261         char buf[64];
8262         int r;
8263
8264         r = tracer_tracing_is_on(tr);
8265         r = sprintf(buf, "%d\n", r);
8266
8267         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8268 }
8269
8270 static ssize_t
8271 rb_simple_write(struct file *filp, const char __user *ubuf,
8272                 size_t cnt, loff_t *ppos)
8273 {
8274         struct trace_array *tr = filp->private_data;
8275         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8276         unsigned long val;
8277         int ret;
8278
8279         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8280         if (ret)
8281                 return ret;
8282
8283         if (buffer) {
8284                 mutex_lock(&trace_types_lock);
8285                 if (!!val == tracer_tracing_is_on(tr)) {
8286                         val = 0; /* do nothing */
8287                 } else if (val) {
8288                         tracer_tracing_on(tr);
8289                         if (tr->current_trace->start)
8290                                 tr->current_trace->start(tr);
8291                 } else {
8292                         tracer_tracing_off(tr);
8293                         if (tr->current_trace->stop)
8294                                 tr->current_trace->stop(tr);
8295                 }
8296                 mutex_unlock(&trace_types_lock);
8297         }
8298
8299         (*ppos)++;
8300
8301         return cnt;
8302 }
8303
8304 static const struct file_operations rb_simple_fops = {
8305         .open           = tracing_open_generic_tr,
8306         .read           = rb_simple_read,
8307         .write          = rb_simple_write,
8308         .release        = tracing_release_generic_tr,
8309         .llseek         = default_llseek,
8310 };
8311
8312 static ssize_t
8313 buffer_percent_read(struct file *filp, char __user *ubuf,
8314                     size_t cnt, loff_t *ppos)
8315 {
8316         struct trace_array *tr = filp->private_data;
8317         char buf[64];
8318         int r;
8319
8320         r = tr->buffer_percent;
8321         r = sprintf(buf, "%d\n", r);
8322
8323         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8324 }
8325
8326 static ssize_t
8327 buffer_percent_write(struct file *filp, const char __user *ubuf,
8328                      size_t cnt, loff_t *ppos)
8329 {
8330         struct trace_array *tr = filp->private_data;
8331         unsigned long val;
8332         int ret;
8333
8334         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8335         if (ret)
8336                 return ret;
8337
8338         if (val > 100)
8339                 return -EINVAL;
8340
8341         if (!val)
8342                 val = 1;
8343
8344         tr->buffer_percent = val;
8345
8346         (*ppos)++;
8347
8348         return cnt;
8349 }
8350
8351 static const struct file_operations buffer_percent_fops = {
8352         .open           = tracing_open_generic_tr,
8353         .read           = buffer_percent_read,
8354         .write          = buffer_percent_write,
8355         .release        = tracing_release_generic_tr,
8356         .llseek         = default_llseek,
8357 };
8358
8359 static struct dentry *trace_instance_dir;
8360
8361 static void
8362 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8363
8364 static int
8365 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8366 {
8367         enum ring_buffer_flags rb_flags;
8368
8369         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8370
8371         buf->tr = tr;
8372
8373         buf->buffer = ring_buffer_alloc(size, rb_flags);
8374         if (!buf->buffer)
8375                 return -ENOMEM;
8376
8377         buf->data = alloc_percpu(struct trace_array_cpu);
8378         if (!buf->data) {
8379                 ring_buffer_free(buf->buffer);
8380                 buf->buffer = NULL;
8381                 return -ENOMEM;
8382         }
8383
8384         /* Allocate the first page for all buffers */
8385         set_buffer_entries(&tr->trace_buffer,
8386                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8387
8388         return 0;
8389 }
8390
8391 static int allocate_trace_buffers(struct trace_array *tr, int size)
8392 {
8393         int ret;
8394
8395         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8396         if (ret)
8397                 return ret;
8398
8399 #ifdef CONFIG_TRACER_MAX_TRACE
8400         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8401                                     allocate_snapshot ? size : 1);
8402         if (WARN_ON(ret)) {
8403                 ring_buffer_free(tr->trace_buffer.buffer);
8404                 tr->trace_buffer.buffer = NULL;
8405                 free_percpu(tr->trace_buffer.data);
8406                 tr->trace_buffer.data = NULL;
8407                 return -ENOMEM;
8408         }
8409         tr->allocated_snapshot = allocate_snapshot;
8410
8411         /*
8412          * Only the top level trace array gets its snapshot allocated
8413          * from the kernel command line.
8414          */
8415         allocate_snapshot = false;
8416 #endif
8417         return 0;
8418 }
8419
8420 static void free_trace_buffer(struct trace_buffer *buf)
8421 {
8422         if (buf->buffer) {
8423                 ring_buffer_free(buf->buffer);
8424                 buf->buffer = NULL;
8425                 free_percpu(buf->data);
8426                 buf->data = NULL;
8427         }
8428 }
8429
8430 static void free_trace_buffers(struct trace_array *tr)
8431 {
8432         if (!tr)
8433                 return;
8434
8435         free_trace_buffer(&tr->trace_buffer);
8436
8437 #ifdef CONFIG_TRACER_MAX_TRACE
8438         free_trace_buffer(&tr->max_buffer);
8439 #endif
8440 }
8441
8442 static void init_trace_flags_index(struct trace_array *tr)
8443 {
8444         int i;
8445
8446         /* Used by the trace options files */
8447         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8448                 tr->trace_flags_index[i] = i;
8449 }
8450
8451 static void __update_tracer_options(struct trace_array *tr)
8452 {
8453         struct tracer *t;
8454
8455         for (t = trace_types; t; t = t->next)
8456                 add_tracer_options(tr, t);
8457 }
8458
8459 static void update_tracer_options(struct trace_array *tr)
8460 {
8461         mutex_lock(&trace_types_lock);
8462         __update_tracer_options(tr);
8463         mutex_unlock(&trace_types_lock);
8464 }
8465
8466 static struct trace_array *trace_array_create(const char *name)
8467 {
8468         struct trace_array *tr;
8469         int ret;
8470
8471         ret = -ENOMEM;
8472         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8473         if (!tr)
8474                 return ERR_PTR(ret);
8475
8476         tr->name = kstrdup(name, GFP_KERNEL);
8477         if (!tr->name)
8478                 goto out_free_tr;
8479
8480         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8481                 goto out_free_tr;
8482
8483         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8484
8485         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8486
8487         raw_spin_lock_init(&tr->start_lock);
8488
8489         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8490
8491         tr->current_trace = &nop_trace;
8492
8493         INIT_LIST_HEAD(&tr->systems);
8494         INIT_LIST_HEAD(&tr->events);
8495         INIT_LIST_HEAD(&tr->hist_vars);
8496         INIT_LIST_HEAD(&tr->err_log);
8497
8498         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8499                 goto out_free_tr;
8500
8501         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8502         if (!tr->dir)
8503                 goto out_free_tr;
8504
8505         ret = event_trace_add_tracer(tr->dir, tr);
8506         if (ret) {
8507                 tracefs_remove_recursive(tr->dir);
8508                 goto out_free_tr;
8509         }
8510
8511         ftrace_init_trace_array(tr);
8512
8513         init_tracer_tracefs(tr, tr->dir);
8514         init_trace_flags_index(tr);
8515         __update_tracer_options(tr);
8516
8517         list_add(&tr->list, &ftrace_trace_arrays);
8518
8519         tr->ref++;
8520
8521
8522         return tr;
8523
8524  out_free_tr:
8525         free_trace_buffers(tr);
8526         free_cpumask_var(tr->tracing_cpumask);
8527         kfree(tr->name);
8528         kfree(tr);
8529
8530         return ERR_PTR(ret);
8531 }
8532
8533 static int instance_mkdir(const char *name)
8534 {
8535         struct trace_array *tr;
8536         int ret;
8537
8538         mutex_lock(&event_mutex);
8539         mutex_lock(&trace_types_lock);
8540
8541         ret = -EEXIST;
8542         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8543                 if (tr->name && strcmp(tr->name, name) == 0)
8544                         goto out_unlock;
8545         }
8546
8547         tr = trace_array_create(name);
8548
8549         ret = PTR_ERR_OR_ZERO(tr);
8550
8551 out_unlock:
8552         mutex_unlock(&trace_types_lock);
8553         mutex_unlock(&event_mutex);
8554         return ret;
8555 }
8556
8557 /**
8558  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8559  * @name: The name of the trace array to be looked up/created.
8560  *
8561  * Returns pointer to trace array with given name.
8562  * NULL, if it cannot be created.
8563  *
8564  * NOTE: This function increments the reference counter associated with the
8565  * trace array returned. This makes sure it cannot be freed while in use.
8566  * Use trace_array_put() once the trace array is no longer needed.
8567  *
8568  */
8569 struct trace_array *trace_array_get_by_name(const char *name)
8570 {
8571         struct trace_array *tr;
8572
8573         mutex_lock(&event_mutex);
8574         mutex_lock(&trace_types_lock);
8575
8576         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8577                 if (tr->name && strcmp(tr->name, name) == 0)
8578                         goto out_unlock;
8579         }
8580
8581         tr = trace_array_create(name);
8582
8583         if (IS_ERR(tr))
8584                 tr = NULL;
8585 out_unlock:
8586         if (tr)
8587                 tr->ref++;
8588
8589         mutex_unlock(&trace_types_lock);
8590         mutex_unlock(&event_mutex);
8591         return tr;
8592 }
8593 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8594
8595 static int __remove_instance(struct trace_array *tr)
8596 {
8597         int i;
8598
8599         /* Reference counter for a newly created trace array = 1. */
8600         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8601                 return -EBUSY;
8602
8603         list_del(&tr->list);
8604
8605         /* Disable all the flags that were enabled coming in */
8606         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8607                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8608                         set_tracer_flag(tr, 1 << i, 0);
8609         }
8610
8611         tracing_set_nop(tr);
8612         clear_ftrace_function_probes(tr);
8613         event_trace_del_tracer(tr);
8614         ftrace_clear_pids(tr);
8615         ftrace_destroy_function_files(tr);
8616         tracefs_remove_recursive(tr->dir);
8617         free_trace_buffers(tr);
8618
8619         for (i = 0; i < tr->nr_topts; i++) {
8620                 kfree(tr->topts[i].topts);
8621         }
8622         kfree(tr->topts);
8623
8624         free_cpumask_var(tr->tracing_cpumask);
8625         kfree(tr->name);
8626         kfree(tr);
8627         tr = NULL;
8628
8629         return 0;
8630 }
8631
8632 int trace_array_destroy(struct trace_array *this_tr)
8633 {
8634         struct trace_array *tr;
8635         int ret;
8636
8637         if (!this_tr)
8638                 return -EINVAL;
8639
8640         mutex_lock(&event_mutex);
8641         mutex_lock(&trace_types_lock);
8642
8643         ret = -ENODEV;
8644
8645         /* Making sure trace array exists before destroying it. */
8646         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8647                 if (tr == this_tr) {
8648                         ret = __remove_instance(tr);
8649                         break;
8650                 }
8651         }
8652
8653         mutex_unlock(&trace_types_lock);
8654         mutex_unlock(&event_mutex);
8655
8656         return ret;
8657 }
8658 EXPORT_SYMBOL_GPL(trace_array_destroy);
8659
8660 static int instance_rmdir(const char *name)
8661 {
8662         struct trace_array *tr;
8663         int ret;
8664
8665         mutex_lock(&event_mutex);
8666         mutex_lock(&trace_types_lock);
8667
8668         ret = -ENODEV;
8669         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8670                 if (tr->name && strcmp(tr->name, name) == 0) {
8671                         ret = __remove_instance(tr);
8672                         break;
8673                 }
8674         }
8675
8676         mutex_unlock(&trace_types_lock);
8677         mutex_unlock(&event_mutex);
8678
8679         return ret;
8680 }
8681
8682 static __init void create_trace_instances(struct dentry *d_tracer)
8683 {
8684         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8685                                                          instance_mkdir,
8686                                                          instance_rmdir);
8687         if (WARN_ON(!trace_instance_dir))
8688                 return;
8689 }
8690
8691 static void
8692 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8693 {
8694         struct trace_event_file *file;
8695         int cpu;
8696
8697         trace_create_file("available_tracers", 0444, d_tracer,
8698                         tr, &show_traces_fops);
8699
8700         trace_create_file("current_tracer", 0644, d_tracer,
8701                         tr, &set_tracer_fops);
8702
8703         trace_create_file("tracing_cpumask", 0644, d_tracer,
8704                           tr, &tracing_cpumask_fops);
8705
8706         trace_create_file("trace_options", 0644, d_tracer,
8707                           tr, &tracing_iter_fops);
8708
8709         trace_create_file("trace", 0644, d_tracer,
8710                           tr, &tracing_fops);
8711
8712         trace_create_file("trace_pipe", 0444, d_tracer,
8713                           tr, &tracing_pipe_fops);
8714
8715         trace_create_file("buffer_size_kb", 0644, d_tracer,
8716                           tr, &tracing_entries_fops);
8717
8718         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8719                           tr, &tracing_total_entries_fops);
8720
8721         trace_create_file("free_buffer", 0200, d_tracer,
8722                           tr, &tracing_free_buffer_fops);
8723
8724         trace_create_file("trace_marker", 0220, d_tracer,
8725                           tr, &tracing_mark_fops);
8726
8727         file = __find_event_file(tr, "ftrace", "print");
8728         if (file && file->dir)
8729                 trace_create_file("trigger", 0644, file->dir, file,
8730                                   &event_trigger_fops);
8731         tr->trace_marker_file = file;
8732
8733         trace_create_file("trace_marker_raw", 0220, d_tracer,
8734                           tr, &tracing_mark_raw_fops);
8735
8736         trace_create_file("trace_clock", 0644, d_tracer, tr,
8737                           &trace_clock_fops);
8738
8739         trace_create_file("tracing_on", 0644, d_tracer,
8740                           tr, &rb_simple_fops);
8741
8742         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8743                           &trace_time_stamp_mode_fops);
8744
8745         tr->buffer_percent = 50;
8746
8747         trace_create_file("buffer_percent", 0444, d_tracer,
8748                         tr, &buffer_percent_fops);
8749
8750         create_trace_options_dir(tr);
8751
8752 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8753         trace_create_maxlat_file(tr, d_tracer);
8754 #endif
8755
8756         if (ftrace_create_function_files(tr, d_tracer))
8757                 WARN(1, "Could not allocate function filter files");
8758
8759 #ifdef CONFIG_TRACER_SNAPSHOT
8760         trace_create_file("snapshot", 0644, d_tracer,
8761                           tr, &snapshot_fops);
8762 #endif
8763
8764         trace_create_file("error_log", 0644, d_tracer,
8765                           tr, &tracing_err_log_fops);
8766
8767         for_each_tracing_cpu(cpu)
8768                 tracing_init_tracefs_percpu(tr, cpu);
8769
8770         ftrace_init_tracefs(tr, d_tracer);
8771 }
8772
8773 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8774 {
8775         struct vfsmount *mnt;
8776         struct file_system_type *type;
8777
8778         /*
8779          * To maintain backward compatibility for tools that mount
8780          * debugfs to get to the tracing facility, tracefs is automatically
8781          * mounted to the debugfs/tracing directory.
8782          */
8783         type = get_fs_type("tracefs");
8784         if (!type)
8785                 return NULL;
8786         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8787         put_filesystem(type);
8788         if (IS_ERR(mnt))
8789                 return NULL;
8790         mntget(mnt);
8791
8792         return mnt;
8793 }
8794
8795 /**
8796  * tracing_init_dentry - initialize top level trace array
8797  *
8798  * This is called when creating files or directories in the tracing
8799  * directory. It is called via fs_initcall() by any of the boot up code
8800  * and expects to return the dentry of the top level tracing directory.
8801  */
8802 struct dentry *tracing_init_dentry(void)
8803 {
8804         struct trace_array *tr = &global_trace;
8805
8806         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8807                 pr_warn("Tracing disabled due to lockdown\n");
8808                 return ERR_PTR(-EPERM);
8809         }
8810
8811         /* The top level trace array uses  NULL as parent */
8812         if (tr->dir)
8813                 return NULL;
8814
8815         if (WARN_ON(!tracefs_initialized()) ||
8816                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8817                  WARN_ON(!debugfs_initialized())))
8818                 return ERR_PTR(-ENODEV);
8819
8820         /*
8821          * As there may still be users that expect the tracing
8822          * files to exist in debugfs/tracing, we must automount
8823          * the tracefs file system there, so older tools still
8824          * work with the newer kerenl.
8825          */
8826         tr->dir = debugfs_create_automount("tracing", NULL,
8827                                            trace_automount, NULL);
8828
8829         return NULL;
8830 }
8831
8832 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8833 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8834
8835 static void __init trace_eval_init(void)
8836 {
8837         int len;
8838
8839         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8840         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8841 }
8842
8843 #ifdef CONFIG_MODULES
8844 static void trace_module_add_evals(struct module *mod)
8845 {
8846         if (!mod->num_trace_evals)
8847                 return;
8848
8849         /*
8850          * Modules with bad taint do not have events created, do
8851          * not bother with enums either.
8852          */
8853         if (trace_module_has_bad_taint(mod))
8854                 return;
8855
8856         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8857 }
8858
8859 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8860 static void trace_module_remove_evals(struct module *mod)
8861 {
8862         union trace_eval_map_item *map;
8863         union trace_eval_map_item **last = &trace_eval_maps;
8864
8865         if (!mod->num_trace_evals)
8866                 return;
8867
8868         mutex_lock(&trace_eval_mutex);
8869
8870         map = trace_eval_maps;
8871
8872         while (map) {
8873                 if (map->head.mod == mod)
8874                         break;
8875                 map = trace_eval_jmp_to_tail(map);
8876                 last = &map->tail.next;
8877                 map = map->tail.next;
8878         }
8879         if (!map)
8880                 goto out;
8881
8882         *last = trace_eval_jmp_to_tail(map)->tail.next;
8883         kfree(map);
8884  out:
8885         mutex_unlock(&trace_eval_mutex);
8886 }
8887 #else
8888 static inline void trace_module_remove_evals(struct module *mod) { }
8889 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8890
8891 static int trace_module_notify(struct notifier_block *self,
8892                                unsigned long val, void *data)
8893 {
8894         struct module *mod = data;
8895
8896         switch (val) {
8897         case MODULE_STATE_COMING:
8898                 trace_module_add_evals(mod);
8899                 break;
8900         case MODULE_STATE_GOING:
8901                 trace_module_remove_evals(mod);
8902                 break;
8903         }
8904
8905         return 0;
8906 }
8907
8908 static struct notifier_block trace_module_nb = {
8909         .notifier_call = trace_module_notify,
8910         .priority = 0,
8911 };
8912 #endif /* CONFIG_MODULES */
8913
8914 static __init int tracer_init_tracefs(void)
8915 {
8916         struct dentry *d_tracer;
8917
8918         trace_access_lock_init();
8919
8920         d_tracer = tracing_init_dentry();
8921         if (IS_ERR(d_tracer))
8922                 return 0;
8923
8924         event_trace_init();
8925
8926         init_tracer_tracefs(&global_trace, d_tracer);
8927         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8928
8929         trace_create_file("tracing_thresh", 0644, d_tracer,
8930                         &global_trace, &tracing_thresh_fops);
8931
8932         trace_create_file("README", 0444, d_tracer,
8933                         NULL, &tracing_readme_fops);
8934
8935         trace_create_file("saved_cmdlines", 0444, d_tracer,
8936                         NULL, &tracing_saved_cmdlines_fops);
8937
8938         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8939                           NULL, &tracing_saved_cmdlines_size_fops);
8940
8941         trace_create_file("saved_tgids", 0444, d_tracer,
8942                         NULL, &tracing_saved_tgids_fops);
8943
8944         trace_eval_init();
8945
8946         trace_create_eval_file(d_tracer);
8947
8948 #ifdef CONFIG_MODULES
8949         register_module_notifier(&trace_module_nb);
8950 #endif
8951
8952 #ifdef CONFIG_DYNAMIC_FTRACE
8953         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8954                         NULL, &tracing_dyn_info_fops);
8955 #endif
8956
8957         create_trace_instances(d_tracer);
8958
8959         update_tracer_options(&global_trace);
8960
8961         return 0;
8962 }
8963
8964 static int trace_panic_handler(struct notifier_block *this,
8965                                unsigned long event, void *unused)
8966 {
8967         if (ftrace_dump_on_oops)
8968                 ftrace_dump(ftrace_dump_on_oops);
8969         return NOTIFY_OK;
8970 }
8971
8972 static struct notifier_block trace_panic_notifier = {
8973         .notifier_call  = trace_panic_handler,
8974         .next           = NULL,
8975         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8976 };
8977
8978 static int trace_die_handler(struct notifier_block *self,
8979                              unsigned long val,
8980                              void *data)
8981 {
8982         switch (val) {
8983         case DIE_OOPS:
8984                 if (ftrace_dump_on_oops)
8985                         ftrace_dump(ftrace_dump_on_oops);
8986                 break;
8987         default:
8988                 break;
8989         }
8990         return NOTIFY_OK;
8991 }
8992
8993 static struct notifier_block trace_die_notifier = {
8994         .notifier_call = trace_die_handler,
8995         .priority = 200
8996 };
8997
8998 /*
8999  * printk is set to max of 1024, we really don't need it that big.
9000  * Nothing should be printing 1000 characters anyway.
9001  */
9002 #define TRACE_MAX_PRINT         1000
9003
9004 /*
9005  * Define here KERN_TRACE so that we have one place to modify
9006  * it if we decide to change what log level the ftrace dump
9007  * should be at.
9008  */
9009 #define KERN_TRACE              KERN_EMERG
9010
9011 void
9012 trace_printk_seq(struct trace_seq *s)
9013 {
9014         /* Probably should print a warning here. */
9015         if (s->seq.len >= TRACE_MAX_PRINT)
9016                 s->seq.len = TRACE_MAX_PRINT;
9017
9018         /*
9019          * More paranoid code. Although the buffer size is set to
9020          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9021          * an extra layer of protection.
9022          */
9023         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9024                 s->seq.len = s->seq.size - 1;
9025
9026         /* should be zero ended, but we are paranoid. */
9027         s->buffer[s->seq.len] = 0;
9028
9029         printk(KERN_TRACE "%s", s->buffer);
9030
9031         trace_seq_init(s);
9032 }
9033
9034 void trace_init_global_iter(struct trace_iterator *iter)
9035 {
9036         iter->tr = &global_trace;
9037         iter->trace = iter->tr->current_trace;
9038         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9039         iter->trace_buffer = &global_trace.trace_buffer;
9040
9041         if (iter->trace && iter->trace->open)
9042                 iter->trace->open(iter);
9043
9044         /* Annotate start of buffers if we had overruns */
9045         if (ring_buffer_overruns(iter->trace_buffer->buffer))
9046                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9047
9048         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9049         if (trace_clocks[iter->tr->clock_id].in_ns)
9050                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9051 }
9052
9053 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9054 {
9055         /* use static because iter can be a bit big for the stack */
9056         static struct trace_iterator iter;
9057         static atomic_t dump_running;
9058         struct trace_array *tr = &global_trace;
9059         unsigned int old_userobj;
9060         unsigned long flags;
9061         int cnt = 0, cpu;
9062
9063         /* Only allow one dump user at a time. */
9064         if (atomic_inc_return(&dump_running) != 1) {
9065                 atomic_dec(&dump_running);
9066                 return;
9067         }
9068
9069         /*
9070          * Always turn off tracing when we dump.
9071          * We don't need to show trace output of what happens
9072          * between multiple crashes.
9073          *
9074          * If the user does a sysrq-z, then they can re-enable
9075          * tracing with echo 1 > tracing_on.
9076          */
9077         tracing_off();
9078
9079         local_irq_save(flags);
9080         printk_nmi_direct_enter();
9081
9082         /* Simulate the iterator */
9083         trace_init_global_iter(&iter);
9084
9085         for_each_tracing_cpu(cpu) {
9086                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9087         }
9088
9089         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9090
9091         /* don't look at user memory in panic mode */
9092         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9093
9094         switch (oops_dump_mode) {
9095         case DUMP_ALL:
9096                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9097                 break;
9098         case DUMP_ORIG:
9099                 iter.cpu_file = raw_smp_processor_id();
9100                 break;
9101         case DUMP_NONE:
9102                 goto out_enable;
9103         default:
9104                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9105                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9106         }
9107
9108         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9109
9110         /* Did function tracer already get disabled? */
9111         if (ftrace_is_dead()) {
9112                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9113                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9114         }
9115
9116         /*
9117          * We need to stop all tracing on all CPUS to read the
9118          * the next buffer. This is a bit expensive, but is
9119          * not done often. We fill all what we can read,
9120          * and then release the locks again.
9121          */
9122
9123         while (!trace_empty(&iter)) {
9124
9125                 if (!cnt)
9126                         printk(KERN_TRACE "---------------------------------\n");
9127
9128                 cnt++;
9129
9130                 trace_iterator_reset(&iter);
9131                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9132
9133                 if (trace_find_next_entry_inc(&iter) != NULL) {
9134                         int ret;
9135
9136                         ret = print_trace_line(&iter);
9137                         if (ret != TRACE_TYPE_NO_CONSUME)
9138                                 trace_consume(&iter);
9139                 }
9140                 touch_nmi_watchdog();
9141
9142                 trace_printk_seq(&iter.seq);
9143         }
9144
9145         if (!cnt)
9146                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9147         else
9148                 printk(KERN_TRACE "---------------------------------\n");
9149
9150  out_enable:
9151         tr->trace_flags |= old_userobj;
9152
9153         for_each_tracing_cpu(cpu) {
9154                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9155         }
9156         atomic_dec(&dump_running);
9157         printk_nmi_direct_exit();
9158         local_irq_restore(flags);
9159 }
9160 EXPORT_SYMBOL_GPL(ftrace_dump);
9161
9162 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9163 {
9164         char **argv;
9165         int argc, ret;
9166
9167         argc = 0;
9168         ret = 0;
9169         argv = argv_split(GFP_KERNEL, buf, &argc);
9170         if (!argv)
9171                 return -ENOMEM;
9172
9173         if (argc)
9174                 ret = createfn(argc, argv);
9175
9176         argv_free(argv);
9177
9178         return ret;
9179 }
9180
9181 #define WRITE_BUFSIZE  4096
9182
9183 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9184                                 size_t count, loff_t *ppos,
9185                                 int (*createfn)(int, char **))
9186 {
9187         char *kbuf, *buf, *tmp;
9188         int ret = 0;
9189         size_t done = 0;
9190         size_t size;
9191
9192         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9193         if (!kbuf)
9194                 return -ENOMEM;
9195
9196         while (done < count) {
9197                 size = count - done;
9198
9199                 if (size >= WRITE_BUFSIZE)
9200                         size = WRITE_BUFSIZE - 1;
9201
9202                 if (copy_from_user(kbuf, buffer + done, size)) {
9203                         ret = -EFAULT;
9204                         goto out;
9205                 }
9206                 kbuf[size] = '\0';
9207                 buf = kbuf;
9208                 do {
9209                         tmp = strchr(buf, '\n');
9210                         if (tmp) {
9211                                 *tmp = '\0';
9212                                 size = tmp - buf + 1;
9213                         } else {
9214                                 size = strlen(buf);
9215                                 if (done + size < count) {
9216                                         if (buf != kbuf)
9217                                                 break;
9218                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9219                                         pr_warn("Line length is too long: Should be less than %d\n",
9220                                                 WRITE_BUFSIZE - 2);
9221                                         ret = -EINVAL;
9222                                         goto out;
9223                                 }
9224                         }
9225                         done += size;
9226
9227                         /* Remove comments */
9228                         tmp = strchr(buf, '#');
9229
9230                         if (tmp)
9231                                 *tmp = '\0';
9232
9233                         ret = trace_run_command(buf, createfn);
9234                         if (ret)
9235                                 goto out;
9236                         buf += size;
9237
9238                 } while (done < count);
9239         }
9240         ret = done;
9241
9242 out:
9243         kfree(kbuf);
9244
9245         return ret;
9246 }
9247
9248 __init static int tracer_alloc_buffers(void)
9249 {
9250         int ring_buf_size;
9251         int ret = -ENOMEM;
9252
9253
9254         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9255                 pr_warn("Tracing disabled due to lockdown\n");
9256                 return -EPERM;
9257         }
9258
9259         /*
9260          * Make sure we don't accidently add more trace options
9261          * than we have bits for.
9262          */
9263         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9264
9265         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9266                 goto out;
9267
9268         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9269                 goto out_free_buffer_mask;
9270
9271         /* Only allocate trace_printk buffers if a trace_printk exists */
9272         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9273                 /* Must be called before global_trace.buffer is allocated */
9274                 trace_printk_init_buffers();
9275
9276         /* To save memory, keep the ring buffer size to its minimum */
9277         if (ring_buffer_expanded)
9278                 ring_buf_size = trace_buf_size;
9279         else
9280                 ring_buf_size = 1;
9281
9282         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9283         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9284
9285         raw_spin_lock_init(&global_trace.start_lock);
9286
9287         /*
9288          * The prepare callbacks allocates some memory for the ring buffer. We
9289          * don't free the buffer if the if the CPU goes down. If we were to free
9290          * the buffer, then the user would lose any trace that was in the
9291          * buffer. The memory will be removed once the "instance" is removed.
9292          */
9293         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9294                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9295                                       NULL);
9296         if (ret < 0)
9297                 goto out_free_cpumask;
9298         /* Used for event triggers */
9299         ret = -ENOMEM;
9300         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9301         if (!temp_buffer)
9302                 goto out_rm_hp_state;
9303
9304         if (trace_create_savedcmd() < 0)
9305                 goto out_free_temp_buffer;
9306
9307         /* TODO: make the number of buffers hot pluggable with CPUS */
9308         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9309                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9310                 WARN_ON(1);
9311                 goto out_free_savedcmd;
9312         }
9313
9314         if (global_trace.buffer_disabled)
9315                 tracing_off();
9316
9317         if (trace_boot_clock) {
9318                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9319                 if (ret < 0)
9320                         pr_warn("Trace clock %s not defined, going back to default\n",
9321                                 trace_boot_clock);
9322         }
9323
9324         /*
9325          * register_tracer() might reference current_trace, so it
9326          * needs to be set before we register anything. This is
9327          * just a bootstrap of current_trace anyway.
9328          */
9329         global_trace.current_trace = &nop_trace;
9330
9331         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9332
9333         ftrace_init_global_array_ops(&global_trace);
9334
9335         init_trace_flags_index(&global_trace);
9336
9337         register_tracer(&nop_trace);
9338
9339         /* Function tracing may start here (via kernel command line) */
9340         init_function_trace();
9341
9342         /* All seems OK, enable tracing */
9343         tracing_disabled = 0;
9344
9345         atomic_notifier_chain_register(&panic_notifier_list,
9346                                        &trace_panic_notifier);
9347
9348         register_die_notifier(&trace_die_notifier);
9349
9350         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9351
9352         INIT_LIST_HEAD(&global_trace.systems);
9353         INIT_LIST_HEAD(&global_trace.events);
9354         INIT_LIST_HEAD(&global_trace.hist_vars);
9355         INIT_LIST_HEAD(&global_trace.err_log);
9356         list_add(&global_trace.list, &ftrace_trace_arrays);
9357
9358         apply_trace_boot_options();
9359
9360         register_snapshot_cmd();
9361
9362         return 0;
9363
9364 out_free_savedcmd:
9365         free_saved_cmdlines_buffer(savedcmd);
9366 out_free_temp_buffer:
9367         ring_buffer_free(temp_buffer);
9368 out_rm_hp_state:
9369         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9370 out_free_cpumask:
9371         free_cpumask_var(global_trace.tracing_cpumask);
9372 out_free_buffer_mask:
9373         free_cpumask_var(tracing_buffer_mask);
9374 out:
9375         return ret;
9376 }
9377
9378 void __init early_trace_init(void)
9379 {
9380         if (tracepoint_printk) {
9381                 tracepoint_print_iter =
9382                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9383                 if (WARN_ON(!tracepoint_print_iter))
9384                         tracepoint_printk = 0;
9385                 else
9386                         static_key_enable(&tracepoint_printk_key.key);
9387         }
9388         tracer_alloc_buffers();
9389 }
9390
9391 void __init trace_init(void)
9392 {
9393         trace_event_init();
9394 }
9395
9396 __init static int clear_boot_tracer(void)
9397 {
9398         /*
9399          * The default tracer at boot buffer is an init section.
9400          * This function is called in lateinit. If we did not
9401          * find the boot tracer, then clear it out, to prevent
9402          * later registration from accessing the buffer that is
9403          * about to be freed.
9404          */
9405         if (!default_bootup_tracer)
9406                 return 0;
9407
9408         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9409                default_bootup_tracer);
9410         default_bootup_tracer = NULL;
9411
9412         return 0;
9413 }
9414
9415 fs_initcall(tracer_init_tracefs);
9416 late_initcall_sync(clear_boot_tracer);
9417
9418 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9419 __init static int tracing_set_default_clock(void)
9420 {
9421         /* sched_clock_stable() is determined in late_initcall */
9422         if (!trace_boot_clock && !sched_clock_stable()) {
9423                 printk(KERN_WARNING
9424                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9425                        "If you want to keep using the local clock, then add:\n"
9426                        "  \"trace_clock=local\"\n"
9427                        "on the kernel command line\n");
9428                 tracing_set_clock(&global_trace, "global");
9429         }
9430
9431         return 0;
9432 }
9433 late_initcall_sync(tracing_set_default_clock);
9434 #endif