arm64: zynqmp: Make zynqmp_firmware driver optional
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
390 {
391         /*
392          * Return false, because if filtered_pids does not exist,
393          * all pids are good to trace.
394          */
395         if (!filtered_pids)
396                 return false;
397
398         return !trace_find_filtered_pid(filtered_pids, task->pid);
399 }
400
401 /**
402  * trace_filter_add_remove_task - Add or remove a task from a pid_list
403  * @pid_list: The list to modify
404  * @self: The current task for fork or NULL for exit
405  * @task: The task to add or remove
406  *
407  * If adding a task, if @self is defined, the task is only added if @self
408  * is also included in @pid_list. This happens on fork and tasks should
409  * only be added when the parent is listed. If @self is NULL, then the
410  * @task pid will be removed from the list, which would happen on exit
411  * of a task.
412  */
413 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
414                                   struct task_struct *self,
415                                   struct task_struct *task)
416 {
417         if (!pid_list)
418                 return;
419
420         /* For forks, we only add if the forking task is listed */
421         if (self) {
422                 if (!trace_find_filtered_pid(pid_list, self->pid))
423                         return;
424         }
425
426         /* Sorry, but we don't support pid_max changing after setting */
427         if (task->pid >= pid_list->pid_max)
428                 return;
429
430         /* "self" is set for forks, and NULL for exits */
431         if (self)
432                 set_bit(task->pid, pid_list->pids);
433         else
434                 clear_bit(task->pid, pid_list->pids);
435 }
436
437 /**
438  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
439  * @pid_list: The pid list to show
440  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
441  * @pos: The position of the file
442  *
443  * This is used by the seq_file "next" operation to iterate the pids
444  * listed in a trace_pid_list structure.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
450 {
451         unsigned long pid = (unsigned long)v;
452
453         (*pos)++;
454
455         /* pid already is +1 of the actual prevous bit */
456         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
457
458         /* Return pid + 1 to allow zero to be represented */
459         if (pid < pid_list->pid_max)
460                 return (void *)(pid + 1);
461
462         return NULL;
463 }
464
465 /**
466  * trace_pid_start - Used for seq_file to start reading pid lists
467  * @pid_list: The pid list to show
468  * @pos: The position of the file
469  *
470  * This is used by seq_file "start" operation to start the iteration
471  * of listing pids.
472  *
473  * Returns the pid+1 as we want to display pid of zero, but NULL would
474  * stop the iteration.
475  */
476 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
477 {
478         unsigned long pid;
479         loff_t l = 0;
480
481         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
482         if (pid >= pid_list->pid_max)
483                 return NULL;
484
485         /* Return pid + 1 so that zero can be the exit value */
486         for (pid++; pid && l < *pos;
487              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
488                 ;
489         return (void *)pid;
490 }
491
492 /**
493  * trace_pid_show - show the current pid in seq_file processing
494  * @m: The seq_file structure to write into
495  * @v: A void pointer of the pid (+1) value to display
496  *
497  * Can be directly used by seq_file operations to display the current
498  * pid value.
499  */
500 int trace_pid_show(struct seq_file *m, void *v)
501 {
502         unsigned long pid = (unsigned long)v - 1;
503
504         seq_printf(m, "%lu\n", pid);
505         return 0;
506 }
507
508 /* 128 should be much more than enough */
509 #define PID_BUF_SIZE            127
510
511 int trace_pid_write(struct trace_pid_list *filtered_pids,
512                     struct trace_pid_list **new_pid_list,
513                     const char __user *ubuf, size_t cnt)
514 {
515         struct trace_pid_list *pid_list;
516         struct trace_parser parser;
517         unsigned long val;
518         int nr_pids = 0;
519         ssize_t read = 0;
520         ssize_t ret = 0;
521         loff_t pos;
522         pid_t pid;
523
524         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
525                 return -ENOMEM;
526
527         /*
528          * Always recreate a new array. The write is an all or nothing
529          * operation. Always create a new array when adding new pids by
530          * the user. If the operation fails, then the current list is
531          * not modified.
532          */
533         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
534         if (!pid_list) {
535                 trace_parser_put(&parser);
536                 return -ENOMEM;
537         }
538
539         pid_list->pid_max = READ_ONCE(pid_max);
540
541         /* Only truncating will shrink pid_max */
542         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
543                 pid_list->pid_max = filtered_pids->pid_max;
544
545         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
546         if (!pid_list->pids) {
547                 trace_parser_put(&parser);
548                 kfree(pid_list);
549                 return -ENOMEM;
550         }
551
552         if (filtered_pids) {
553                 /* copy the current bits to the new max */
554                 for_each_set_bit(pid, filtered_pids->pids,
555                                  filtered_pids->pid_max) {
556                         set_bit(pid, pid_list->pids);
557                         nr_pids++;
558                 }
559         }
560
561         while (cnt > 0) {
562
563                 pos = 0;
564
565                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
566                 if (ret < 0 || !trace_parser_loaded(&parser))
567                         break;
568
569                 read += ret;
570                 ubuf += ret;
571                 cnt -= ret;
572
573                 ret = -EINVAL;
574                 if (kstrtoul(parser.buffer, 0, &val))
575                         break;
576                 if (val >= pid_list->pid_max)
577                         break;
578
579                 pid = (pid_t)val;
580
581                 set_bit(pid, pid_list->pids);
582                 nr_pids++;
583
584                 trace_parser_clear(&parser);
585                 ret = 0;
586         }
587         trace_parser_put(&parser);
588
589         if (ret < 0) {
590                 trace_free_pid_list(pid_list);
591                 return ret;
592         }
593
594         if (!nr_pids) {
595                 /* Cleared the list of pids */
596                 trace_free_pid_list(pid_list);
597                 read = ret;
598                 pid_list = NULL;
599         }
600
601         *new_pid_list = pid_list;
602
603         return read;
604 }
605
606 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
607 {
608         u64 ts;
609
610         /* Early boot up does not have a buffer yet */
611         if (!buf->buffer)
612                 return trace_clock_local();
613
614         ts = ring_buffer_time_stamp(buf->buffer, cpu);
615         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
616
617         return ts;
618 }
619
620 u64 ftrace_now(int cpu)
621 {
622         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
623 }
624
625 /**
626  * tracing_is_enabled - Show if global_trace has been disabled
627  *
628  * Shows if the global trace has been enabled or not. It uses the
629  * mirror flag "buffer_disabled" to be used in fast paths such as for
630  * the irqsoff tracer. But it may be inaccurate due to races. If you
631  * need to know the accurate state, use tracing_is_on() which is a little
632  * slower, but accurate.
633  */
634 int tracing_is_enabled(void)
635 {
636         /*
637          * For quick access (irqsoff uses this in fast path), just
638          * return the mirror variable of the state of the ring buffer.
639          * It's a little racy, but we don't really care.
640          */
641         smp_rmb();
642         return !global_trace.buffer_disabled;
643 }
644
645 /*
646  * trace_buf_size is the size in bytes that is allocated
647  * for a buffer. Note, the number of bytes is always rounded
648  * to page size.
649  *
650  * This number is purposely set to a low number of 16384.
651  * If the dump on oops happens, it will be much appreciated
652  * to not have to wait for all that output. Anyway this can be
653  * boot time and run time configurable.
654  */
655 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
656
657 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
658
659 /* trace_types holds a link list of available tracers. */
660 static struct tracer            *trace_types __read_mostly;
661
662 /*
663  * trace_types_lock is used to protect the trace_types list.
664  */
665 DEFINE_MUTEX(trace_types_lock);
666
667 /*
668  * serialize the access of the ring buffer
669  *
670  * ring buffer serializes readers, but it is low level protection.
671  * The validity of the events (which returns by ring_buffer_peek() ..etc)
672  * are not protected by ring buffer.
673  *
674  * The content of events may become garbage if we allow other process consumes
675  * these events concurrently:
676  *   A) the page of the consumed events may become a normal page
677  *      (not reader page) in ring buffer, and this page will be rewrited
678  *      by events producer.
679  *   B) The page of the consumed events may become a page for splice_read,
680  *      and this page will be returned to system.
681  *
682  * These primitives allow multi process access to different cpu ring buffer
683  * concurrently.
684  *
685  * These primitives don't distinguish read-only and read-consume access.
686  * Multi read-only access are also serialized.
687  */
688
689 #ifdef CONFIG_SMP
690 static DECLARE_RWSEM(all_cpu_access_lock);
691 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         if (cpu == RING_BUFFER_ALL_CPUS) {
696                 /* gain it for accessing the whole ring buffer. */
697                 down_write(&all_cpu_access_lock);
698         } else {
699                 /* gain it for accessing a cpu ring buffer. */
700
701                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
702                 down_read(&all_cpu_access_lock);
703
704                 /* Secondly block other access to this @cpu ring buffer. */
705                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
706         }
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         if (cpu == RING_BUFFER_ALL_CPUS) {
712                 up_write(&all_cpu_access_lock);
713         } else {
714                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
715                 up_read(&all_cpu_access_lock);
716         }
717 }
718
719 static inline void trace_access_lock_init(void)
720 {
721         int cpu;
722
723         for_each_possible_cpu(cpu)
724                 mutex_init(&per_cpu(cpu_access_lock, cpu));
725 }
726
727 #else
728
729 static DEFINE_MUTEX(access_lock);
730
731 static inline void trace_access_lock(int cpu)
732 {
733         (void)cpu;
734         mutex_lock(&access_lock);
735 }
736
737 static inline void trace_access_unlock(int cpu)
738 {
739         (void)cpu;
740         mutex_unlock(&access_lock);
741 }
742
743 static inline void trace_access_lock_init(void)
744 {
745 }
746
747 #endif
748
749 #ifdef CONFIG_STACKTRACE
750 static void __ftrace_trace_stack(struct trace_buffer *buffer,
751                                  unsigned long flags,
752                                  int skip, int pc, struct pt_regs *regs);
753 static inline void ftrace_trace_stack(struct trace_array *tr,
754                                       struct trace_buffer *buffer,
755                                       unsigned long flags,
756                                       int skip, int pc, struct pt_regs *regs);
757
758 #else
759 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
760                                         unsigned long flags,
761                                         int skip, int pc, struct pt_regs *regs)
762 {
763 }
764 static inline void ftrace_trace_stack(struct trace_array *tr,
765                                       struct trace_buffer *buffer,
766                                       unsigned long flags,
767                                       int skip, int pc, struct pt_regs *regs)
768 {
769 }
770
771 #endif
772
773 static __always_inline void
774 trace_event_setup(struct ring_buffer_event *event,
775                   int type, unsigned long flags, int pc)
776 {
777         struct trace_entry *ent = ring_buffer_event_data(event);
778
779         tracing_generic_entry_update(ent, type, flags, pc);
780 }
781
782 static __always_inline struct ring_buffer_event *
783 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
784                           int type,
785                           unsigned long len,
786                           unsigned long flags, int pc)
787 {
788         struct ring_buffer_event *event;
789
790         event = ring_buffer_lock_reserve(buffer, len);
791         if (event != NULL)
792                 trace_event_setup(event, type, flags, pc);
793
794         return event;
795 }
796
797 void tracer_tracing_on(struct trace_array *tr)
798 {
799         if (tr->array_buffer.buffer)
800                 ring_buffer_record_on(tr->array_buffer.buffer);
801         /*
802          * This flag is looked at when buffers haven't been allocated
803          * yet, or by some tracers (like irqsoff), that just want to
804          * know if the ring buffer has been disabled, but it can handle
805          * races of where it gets disabled but we still do a record.
806          * As the check is in the fast path of the tracers, it is more
807          * important to be fast than accurate.
808          */
809         tr->buffer_disabled = 0;
810         /* Make the flag seen by readers */
811         smp_wmb();
812 }
813
814 /**
815  * tracing_on - enable tracing buffers
816  *
817  * This function enables tracing buffers that may have been
818  * disabled with tracing_off.
819  */
820 void tracing_on(void)
821 {
822         tracer_tracing_on(&global_trace);
823 }
824 EXPORT_SYMBOL_GPL(tracing_on);
825
826
827 static __always_inline void
828 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
829 {
830         __this_cpu_write(trace_taskinfo_save, true);
831
832         /* If this is the temp buffer, we need to commit fully */
833         if (this_cpu_read(trace_buffered_event) == event) {
834                 /* Length is in event->array[0] */
835                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
836                 /* Release the temp buffer */
837                 this_cpu_dec(trace_buffered_event_cnt);
838         } else
839                 ring_buffer_unlock_commit(buffer, event);
840 }
841
842 /**
843  * __trace_puts - write a constant string into the trace buffer.
844  * @ip:    The address of the caller
845  * @str:   The constant string to write
846  * @size:  The size of the string.
847  */
848 int __trace_puts(unsigned long ip, const char *str, int size)
849 {
850         struct ring_buffer_event *event;
851         struct trace_buffer *buffer;
852         struct print_entry *entry;
853         unsigned long irq_flags;
854         int alloc;
855         int pc;
856
857         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
858                 return 0;
859
860         pc = preempt_count();
861
862         if (unlikely(tracing_selftest_running || tracing_disabled))
863                 return 0;
864
865         alloc = sizeof(*entry) + size + 2; /* possible \n added */
866
867         local_save_flags(irq_flags);
868         buffer = global_trace.array_buffer.buffer;
869         ring_buffer_nest_start(buffer);
870         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
871                                             irq_flags, pc);
872         if (!event) {
873                 size = 0;
874                 goto out;
875         }
876
877         entry = ring_buffer_event_data(event);
878         entry->ip = ip;
879
880         memcpy(&entry->buf, str, size);
881
882         /* Add a newline if necessary */
883         if (entry->buf[size - 1] != '\n') {
884                 entry->buf[size] = '\n';
885                 entry->buf[size + 1] = '\0';
886         } else
887                 entry->buf[size] = '\0';
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891  out:
892         ring_buffer_nest_end(buffer);
893         return size;
894 }
895 EXPORT_SYMBOL_GPL(__trace_puts);
896
897 /**
898  * __trace_bputs - write the pointer to a constant string into trace buffer
899  * @ip:    The address of the caller
900  * @str:   The constant string to write to the buffer to
901  */
902 int __trace_bputs(unsigned long ip, const char *str)
903 {
904         struct ring_buffer_event *event;
905         struct trace_buffer *buffer;
906         struct bputs_entry *entry;
907         unsigned long irq_flags;
908         int size = sizeof(struct bputs_entry);
909         int ret = 0;
910         int pc;
911
912         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
913                 return 0;
914
915         pc = preempt_count();
916
917         if (unlikely(tracing_selftest_running || tracing_disabled))
918                 return 0;
919
920         local_save_flags(irq_flags);
921         buffer = global_trace.array_buffer.buffer;
922
923         ring_buffer_nest_start(buffer);
924         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
925                                             irq_flags, pc);
926         if (!event)
927                 goto out;
928
929         entry = ring_buffer_event_data(event);
930         entry->ip                       = ip;
931         entry->str                      = str;
932
933         __buffer_unlock_commit(buffer, event);
934         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
935
936         ret = 1;
937  out:
938         ring_buffer_nest_end(buffer);
939         return ret;
940 }
941 EXPORT_SYMBOL_GPL(__trace_bputs);
942
943 #ifdef CONFIG_TRACER_SNAPSHOT
944 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
945 {
946         struct tracer *tracer = tr->current_trace;
947         unsigned long flags;
948
949         if (in_nmi()) {
950                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
951                 internal_trace_puts("*** snapshot is being ignored        ***\n");
952                 return;
953         }
954
955         if (!tr->allocated_snapshot) {
956                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
957                 internal_trace_puts("*** stopping trace here!   ***\n");
958                 tracing_off();
959                 return;
960         }
961
962         /* Note, snapshot can not be used when the tracer uses it */
963         if (tracer->use_max_tr) {
964                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
965                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
966                 return;
967         }
968
969         local_irq_save(flags);
970         update_max_tr(tr, current, smp_processor_id(), cond_data);
971         local_irq_restore(flags);
972 }
973
974 void tracing_snapshot_instance(struct trace_array *tr)
975 {
976         tracing_snapshot_instance_cond(tr, NULL);
977 }
978
979 /**
980  * tracing_snapshot - take a snapshot of the current buffer.
981  *
982  * This causes a swap between the snapshot buffer and the current live
983  * tracing buffer. You can use this to take snapshots of the live
984  * trace when some condition is triggered, but continue to trace.
985  *
986  * Note, make sure to allocate the snapshot with either
987  * a tracing_snapshot_alloc(), or by doing it manually
988  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
989  *
990  * If the snapshot buffer is not allocated, it will stop tracing.
991  * Basically making a permanent snapshot.
992  */
993 void tracing_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996
997         tracing_snapshot_instance(tr);
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000
1001 /**
1002  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1003  * @tr:         The tracing instance to snapshot
1004  * @cond_data:  The data to be tested conditionally, and possibly saved
1005  *
1006  * This is the same as tracing_snapshot() except that the snapshot is
1007  * conditional - the snapshot will only happen if the
1008  * cond_snapshot.update() implementation receiving the cond_data
1009  * returns true, which means that the trace array's cond_snapshot
1010  * update() operation used the cond_data to determine whether the
1011  * snapshot should be taken, and if it was, presumably saved it along
1012  * with the snapshot.
1013  */
1014 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1015 {
1016         tracing_snapshot_instance_cond(tr, cond_data);
1017 }
1018 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1019
1020 /**
1021  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1022  * @tr:         The tracing instance
1023  *
1024  * When the user enables a conditional snapshot using
1025  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1026  * with the snapshot.  This accessor is used to retrieve it.
1027  *
1028  * Should not be called from cond_snapshot.update(), since it takes
1029  * the tr->max_lock lock, which the code calling
1030  * cond_snapshot.update() has already done.
1031  *
1032  * Returns the cond_data associated with the trace array's snapshot.
1033  */
1034 void *tracing_cond_snapshot_data(struct trace_array *tr)
1035 {
1036         void *cond_data = NULL;
1037
1038         arch_spin_lock(&tr->max_lock);
1039
1040         if (tr->cond_snapshot)
1041                 cond_data = tr->cond_snapshot->cond_data;
1042
1043         arch_spin_unlock(&tr->max_lock);
1044
1045         return cond_data;
1046 }
1047 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1048
1049 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1050                                         struct array_buffer *size_buf, int cpu_id);
1051 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1052
1053 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1054 {
1055         int ret;
1056
1057         if (!tr->allocated_snapshot) {
1058
1059                 /* allocate spare buffer */
1060                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1061                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1062                 if (ret < 0)
1063                         return ret;
1064
1065                 tr->allocated_snapshot = true;
1066         }
1067
1068         return 0;
1069 }
1070
1071 static void free_snapshot(struct trace_array *tr)
1072 {
1073         /*
1074          * We don't free the ring buffer. instead, resize it because
1075          * The max_tr ring buffer has some state (e.g. ring->clock) and
1076          * we want preserve it.
1077          */
1078         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1079         set_buffer_entries(&tr->max_buffer, 1);
1080         tracing_reset_online_cpus(&tr->max_buffer);
1081         tr->allocated_snapshot = false;
1082 }
1083
1084 /**
1085  * tracing_alloc_snapshot - allocate snapshot buffer.
1086  *
1087  * This only allocates the snapshot buffer if it isn't already
1088  * allocated - it doesn't also take a snapshot.
1089  *
1090  * This is meant to be used in cases where the snapshot buffer needs
1091  * to be set up for events that can't sleep but need to be able to
1092  * trigger a snapshot.
1093  */
1094 int tracing_alloc_snapshot(void)
1095 {
1096         struct trace_array *tr = &global_trace;
1097         int ret;
1098
1099         ret = tracing_alloc_snapshot_instance(tr);
1100         WARN_ON(ret < 0);
1101
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1105
1106 /**
1107  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1108  *
1109  * This is similar to tracing_snapshot(), but it will allocate the
1110  * snapshot buffer if it isn't already allocated. Use this only
1111  * where it is safe to sleep, as the allocation may sleep.
1112  *
1113  * This causes a swap between the snapshot buffer and the current live
1114  * tracing buffer. You can use this to take snapshots of the live
1115  * trace when some condition is triggered, but continue to trace.
1116  */
1117 void tracing_snapshot_alloc(void)
1118 {
1119         int ret;
1120
1121         ret = tracing_alloc_snapshot();
1122         if (ret < 0)
1123                 return;
1124
1125         tracing_snapshot();
1126 }
1127 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1128
1129 /**
1130  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1131  * @tr:         The tracing instance
1132  * @cond_data:  User data to associate with the snapshot
1133  * @update:     Implementation of the cond_snapshot update function
1134  *
1135  * Check whether the conditional snapshot for the given instance has
1136  * already been enabled, or if the current tracer is already using a
1137  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1138  * save the cond_data and update function inside.
1139  *
1140  * Returns 0 if successful, error otherwise.
1141  */
1142 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1143                                  cond_update_fn_t update)
1144 {
1145         struct cond_snapshot *cond_snapshot;
1146         int ret = 0;
1147
1148         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1149         if (!cond_snapshot)
1150                 return -ENOMEM;
1151
1152         cond_snapshot->cond_data = cond_data;
1153         cond_snapshot->update = update;
1154
1155         mutex_lock(&trace_types_lock);
1156
1157         ret = tracing_alloc_snapshot_instance(tr);
1158         if (ret)
1159                 goto fail_unlock;
1160
1161         if (tr->current_trace->use_max_tr) {
1162                 ret = -EBUSY;
1163                 goto fail_unlock;
1164         }
1165
1166         /*
1167          * The cond_snapshot can only change to NULL without the
1168          * trace_types_lock. We don't care if we race with it going
1169          * to NULL, but we want to make sure that it's not set to
1170          * something other than NULL when we get here, which we can
1171          * do safely with only holding the trace_types_lock and not
1172          * having to take the max_lock.
1173          */
1174         if (tr->cond_snapshot) {
1175                 ret = -EBUSY;
1176                 goto fail_unlock;
1177         }
1178
1179         arch_spin_lock(&tr->max_lock);
1180         tr->cond_snapshot = cond_snapshot;
1181         arch_spin_unlock(&tr->max_lock);
1182
1183         mutex_unlock(&trace_types_lock);
1184
1185         return ret;
1186
1187  fail_unlock:
1188         mutex_unlock(&trace_types_lock);
1189         kfree(cond_snapshot);
1190         return ret;
1191 }
1192 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1193
1194 /**
1195  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1196  * @tr:         The tracing instance
1197  *
1198  * Check whether the conditional snapshot for the given instance is
1199  * enabled; if so, free the cond_snapshot associated with it,
1200  * otherwise return -EINVAL.
1201  *
1202  * Returns 0 if successful, error otherwise.
1203  */
1204 int tracing_snapshot_cond_disable(struct trace_array *tr)
1205 {
1206         int ret = 0;
1207
1208         arch_spin_lock(&tr->max_lock);
1209
1210         if (!tr->cond_snapshot)
1211                 ret = -EINVAL;
1212         else {
1213                 kfree(tr->cond_snapshot);
1214                 tr->cond_snapshot = NULL;
1215         }
1216
1217         arch_spin_unlock(&tr->max_lock);
1218
1219         return ret;
1220 }
1221 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1222 #else
1223 void tracing_snapshot(void)
1224 {
1225         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot);
1228 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1229 {
1230         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1231 }
1232 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1233 int tracing_alloc_snapshot(void)
1234 {
1235         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1236         return -ENODEV;
1237 }
1238 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1239 void tracing_snapshot_alloc(void)
1240 {
1241         /* Give warning */
1242         tracing_snapshot();
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1245 void *tracing_cond_snapshot_data(struct trace_array *tr)
1246 {
1247         return NULL;
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1250 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1251 {
1252         return -ENODEV;
1253 }
1254 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1255 int tracing_snapshot_cond_disable(struct trace_array *tr)
1256 {
1257         return false;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1260 #endif /* CONFIG_TRACER_SNAPSHOT */
1261
1262 void tracer_tracing_off(struct trace_array *tr)
1263 {
1264         if (tr->array_buffer.buffer)
1265                 ring_buffer_record_off(tr->array_buffer.buffer);
1266         /*
1267          * This flag is looked at when buffers haven't been allocated
1268          * yet, or by some tracers (like irqsoff), that just want to
1269          * know if the ring buffer has been disabled, but it can handle
1270          * races of where it gets disabled but we still do a record.
1271          * As the check is in the fast path of the tracers, it is more
1272          * important to be fast than accurate.
1273          */
1274         tr->buffer_disabled = 1;
1275         /* Make the flag seen by readers */
1276         smp_wmb();
1277 }
1278
1279 /**
1280  * tracing_off - turn off tracing buffers
1281  *
1282  * This function stops the tracing buffers from recording data.
1283  * It does not disable any overhead the tracers themselves may
1284  * be causing. This function simply causes all recording to
1285  * the ring buffers to fail.
1286  */
1287 void tracing_off(void)
1288 {
1289         tracer_tracing_off(&global_trace);
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_off);
1292
1293 void disable_trace_on_warning(void)
1294 {
1295         if (__disable_trace_on_warning)
1296                 tracing_off();
1297 }
1298
1299 /**
1300  * tracer_tracing_is_on - show real state of ring buffer enabled
1301  * @tr : the trace array to know if ring buffer is enabled
1302  *
1303  * Shows real state of the ring buffer if it is enabled or not.
1304  */
1305 bool tracer_tracing_is_on(struct trace_array *tr)
1306 {
1307         if (tr->array_buffer.buffer)
1308                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1309         return !tr->buffer_disabled;
1310 }
1311
1312 /**
1313  * tracing_is_on - show state of ring buffers enabled
1314  */
1315 int tracing_is_on(void)
1316 {
1317         return tracer_tracing_is_on(&global_trace);
1318 }
1319 EXPORT_SYMBOL_GPL(tracing_is_on);
1320
1321 static int __init set_buf_size(char *str)
1322 {
1323         unsigned long buf_size;
1324
1325         if (!str)
1326                 return 0;
1327         buf_size = memparse(str, &str);
1328         /* nr_entries can not be zero */
1329         if (buf_size == 0)
1330                 return 0;
1331         trace_buf_size = buf_size;
1332         return 1;
1333 }
1334 __setup("trace_buf_size=", set_buf_size);
1335
1336 static int __init set_tracing_thresh(char *str)
1337 {
1338         unsigned long threshold;
1339         int ret;
1340
1341         if (!str)
1342                 return 0;
1343         ret = kstrtoul(str, 0, &threshold);
1344         if (ret < 0)
1345                 return 0;
1346         tracing_thresh = threshold * 1000;
1347         return 1;
1348 }
1349 __setup("tracing_thresh=", set_tracing_thresh);
1350
1351 unsigned long nsecs_to_usecs(unsigned long nsecs)
1352 {
1353         return nsecs / 1000;
1354 }
1355
1356 /*
1357  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1358  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1359  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1360  * of strings in the order that the evals (enum) were defined.
1361  */
1362 #undef C
1363 #define C(a, b) b
1364
1365 /* These must match the bit postions in trace_iterator_flags */
1366 static const char *trace_options[] = {
1367         TRACE_FLAGS
1368         NULL
1369 };
1370
1371 static struct {
1372         u64 (*func)(void);
1373         const char *name;
1374         int in_ns;              /* is this clock in nanoseconds? */
1375 } trace_clocks[] = {
1376         { trace_clock_local,            "local",        1 },
1377         { trace_clock_global,           "global",       1 },
1378         { trace_clock_counter,          "counter",      0 },
1379         { trace_clock_jiffies,          "uptime",       0 },
1380         { trace_clock,                  "perf",         1 },
1381         { ktime_get_mono_fast_ns,       "mono",         1 },
1382         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1383         { ktime_get_boot_fast_ns,       "boot",         1 },
1384         ARCH_TRACE_CLOCKS
1385 };
1386
1387 bool trace_clock_in_ns(struct trace_array *tr)
1388 {
1389         if (trace_clocks[tr->clock_id].in_ns)
1390                 return true;
1391
1392         return false;
1393 }
1394
1395 /*
1396  * trace_parser_get_init - gets the buffer for trace parser
1397  */
1398 int trace_parser_get_init(struct trace_parser *parser, int size)
1399 {
1400         memset(parser, 0, sizeof(*parser));
1401
1402         parser->buffer = kmalloc(size, GFP_KERNEL);
1403         if (!parser->buffer)
1404                 return 1;
1405
1406         parser->size = size;
1407         return 0;
1408 }
1409
1410 /*
1411  * trace_parser_put - frees the buffer for trace parser
1412  */
1413 void trace_parser_put(struct trace_parser *parser)
1414 {
1415         kfree(parser->buffer);
1416         parser->buffer = NULL;
1417 }
1418
1419 /*
1420  * trace_get_user - reads the user input string separated by  space
1421  * (matched by isspace(ch))
1422  *
1423  * For each string found the 'struct trace_parser' is updated,
1424  * and the function returns.
1425  *
1426  * Returns number of bytes read.
1427  *
1428  * See kernel/trace/trace.h for 'struct trace_parser' details.
1429  */
1430 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1431         size_t cnt, loff_t *ppos)
1432 {
1433         char ch;
1434         size_t read = 0;
1435         ssize_t ret;
1436
1437         if (!*ppos)
1438                 trace_parser_clear(parser);
1439
1440         ret = get_user(ch, ubuf++);
1441         if (ret)
1442                 goto out;
1443
1444         read++;
1445         cnt--;
1446
1447         /*
1448          * The parser is not finished with the last write,
1449          * continue reading the user input without skipping spaces.
1450          */
1451         if (!parser->cont) {
1452                 /* skip white space */
1453                 while (cnt && isspace(ch)) {
1454                         ret = get_user(ch, ubuf++);
1455                         if (ret)
1456                                 goto out;
1457                         read++;
1458                         cnt--;
1459                 }
1460
1461                 parser->idx = 0;
1462
1463                 /* only spaces were written */
1464                 if (isspace(ch) || !ch) {
1465                         *ppos += read;
1466                         ret = read;
1467                         goto out;
1468                 }
1469         }
1470
1471         /* read the non-space input */
1472         while (cnt && !isspace(ch) && ch) {
1473                 if (parser->idx < parser->size - 1)
1474                         parser->buffer[parser->idx++] = ch;
1475                 else {
1476                         ret = -EINVAL;
1477                         goto out;
1478                 }
1479                 ret = get_user(ch, ubuf++);
1480                 if (ret)
1481                         goto out;
1482                 read++;
1483                 cnt--;
1484         }
1485
1486         /* We either got finished input or we have to wait for another call. */
1487         if (isspace(ch) || !ch) {
1488                 parser->buffer[parser->idx] = 0;
1489                 parser->cont = false;
1490         } else if (parser->idx < parser->size - 1) {
1491                 parser->cont = true;
1492                 parser->buffer[parser->idx++] = ch;
1493                 /* Make sure the parsed string always terminates with '\0'. */
1494                 parser->buffer[parser->idx] = 0;
1495         } else {
1496                 ret = -EINVAL;
1497                 goto out;
1498         }
1499
1500         *ppos += read;
1501         ret = read;
1502
1503 out:
1504         return ret;
1505 }
1506
1507 /* TODO add a seq_buf_to_buffer() */
1508 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1509 {
1510         int len;
1511
1512         if (trace_seq_used(s) <= s->seq.readpos)
1513                 return -EBUSY;
1514
1515         len = trace_seq_used(s) - s->seq.readpos;
1516         if (cnt > len)
1517                 cnt = len;
1518         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1519
1520         s->seq.readpos += cnt;
1521         return cnt;
1522 }
1523
1524 unsigned long __read_mostly     tracing_thresh;
1525 static const struct file_operations tracing_max_lat_fops;
1526
1527 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1528         defined(CONFIG_FSNOTIFY)
1529
1530 static struct workqueue_struct *fsnotify_wq;
1531
1532 static void latency_fsnotify_workfn(struct work_struct *work)
1533 {
1534         struct trace_array *tr = container_of(work, struct trace_array,
1535                                               fsnotify_work);
1536         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1537                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1538 }
1539
1540 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1541 {
1542         struct trace_array *tr = container_of(iwork, struct trace_array,
1543                                               fsnotify_irqwork);
1544         queue_work(fsnotify_wq, &tr->fsnotify_work);
1545 }
1546
1547 static void trace_create_maxlat_file(struct trace_array *tr,
1548                                      struct dentry *d_tracer)
1549 {
1550         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1551         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1552         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1553                                               d_tracer, &tr->max_latency,
1554                                               &tracing_max_lat_fops);
1555 }
1556
1557 __init static int latency_fsnotify_init(void)
1558 {
1559         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1560                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1561         if (!fsnotify_wq) {
1562                 pr_err("Unable to allocate tr_max_lat_wq\n");
1563                 return -ENOMEM;
1564         }
1565         return 0;
1566 }
1567
1568 late_initcall_sync(latency_fsnotify_init);
1569
1570 void latency_fsnotify(struct trace_array *tr)
1571 {
1572         if (!fsnotify_wq)
1573                 return;
1574         /*
1575          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1576          * possible that we are called from __schedule() or do_idle(), which
1577          * could cause a deadlock.
1578          */
1579         irq_work_queue(&tr->fsnotify_irqwork);
1580 }
1581
1582 /*
1583  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1584  *  defined(CONFIG_FSNOTIFY)
1585  */
1586 #else
1587
1588 #define trace_create_maxlat_file(tr, d_tracer)                          \
1589         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1590                           &tr->max_latency, &tracing_max_lat_fops)
1591
1592 #endif
1593
1594 #ifdef CONFIG_TRACER_MAX_TRACE
1595 /*
1596  * Copy the new maximum trace into the separate maximum-trace
1597  * structure. (this way the maximum trace is permanently saved,
1598  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1599  */
1600 static void
1601 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1602 {
1603         struct array_buffer *trace_buf = &tr->array_buffer;
1604         struct array_buffer *max_buf = &tr->max_buffer;
1605         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1606         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1607
1608         max_buf->cpu = cpu;
1609         max_buf->time_start = data->preempt_timestamp;
1610
1611         max_data->saved_latency = tr->max_latency;
1612         max_data->critical_start = data->critical_start;
1613         max_data->critical_end = data->critical_end;
1614
1615         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1616         max_data->pid = tsk->pid;
1617         /*
1618          * If tsk == current, then use current_uid(), as that does not use
1619          * RCU. The irq tracer can be called out of RCU scope.
1620          */
1621         if (tsk == current)
1622                 max_data->uid = current_uid();
1623         else
1624                 max_data->uid = task_uid(tsk);
1625
1626         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1627         max_data->policy = tsk->policy;
1628         max_data->rt_priority = tsk->rt_priority;
1629
1630         /* record this tasks comm */
1631         tracing_record_cmdline(tsk);
1632         latency_fsnotify(tr);
1633 }
1634
1635 /**
1636  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1637  * @tr: tracer
1638  * @tsk: the task with the latency
1639  * @cpu: The cpu that initiated the trace.
1640  * @cond_data: User data associated with a conditional snapshot
1641  *
1642  * Flip the buffers between the @tr and the max_tr and record information
1643  * about which task was the cause of this latency.
1644  */
1645 void
1646 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1647               void *cond_data)
1648 {
1649         if (tr->stop_count)
1650                 return;
1651
1652         WARN_ON_ONCE(!irqs_disabled());
1653
1654         if (!tr->allocated_snapshot) {
1655                 /* Only the nop tracer should hit this when disabling */
1656                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1657                 return;
1658         }
1659
1660         arch_spin_lock(&tr->max_lock);
1661
1662         /* Inherit the recordable setting from array_buffer */
1663         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1664                 ring_buffer_record_on(tr->max_buffer.buffer);
1665         else
1666                 ring_buffer_record_off(tr->max_buffer.buffer);
1667
1668 #ifdef CONFIG_TRACER_SNAPSHOT
1669         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1670                 goto out_unlock;
1671 #endif
1672         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1673
1674         __update_max_tr(tr, tsk, cpu);
1675
1676  out_unlock:
1677         arch_spin_unlock(&tr->max_lock);
1678 }
1679
1680 /**
1681  * update_max_tr_single - only copy one trace over, and reset the rest
1682  * @tr: tracer
1683  * @tsk: task with the latency
1684  * @cpu: the cpu of the buffer to copy.
1685  *
1686  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1687  */
1688 void
1689 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1690 {
1691         int ret;
1692
1693         if (tr->stop_count)
1694                 return;
1695
1696         WARN_ON_ONCE(!irqs_disabled());
1697         if (!tr->allocated_snapshot) {
1698                 /* Only the nop tracer should hit this when disabling */
1699                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1700                 return;
1701         }
1702
1703         arch_spin_lock(&tr->max_lock);
1704
1705         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1706
1707         if (ret == -EBUSY) {
1708                 /*
1709                  * We failed to swap the buffer due to a commit taking
1710                  * place on this CPU. We fail to record, but we reset
1711                  * the max trace buffer (no one writes directly to it)
1712                  * and flag that it failed.
1713                  */
1714                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1715                         "Failed to swap buffers due to commit in progress\n");
1716         }
1717
1718         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1719
1720         __update_max_tr(tr, tsk, cpu);
1721         arch_spin_unlock(&tr->max_lock);
1722 }
1723 #endif /* CONFIG_TRACER_MAX_TRACE */
1724
1725 static int wait_on_pipe(struct trace_iterator *iter, int full)
1726 {
1727         /* Iterators are static, they should be filled or empty */
1728         if (trace_buffer_iter(iter, iter->cpu_file))
1729                 return 0;
1730
1731         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1732                                 full);
1733 }
1734
1735 #ifdef CONFIG_FTRACE_STARTUP_TEST
1736 static bool selftests_can_run;
1737
1738 struct trace_selftests {
1739         struct list_head                list;
1740         struct tracer                   *type;
1741 };
1742
1743 static LIST_HEAD(postponed_selftests);
1744
1745 static int save_selftest(struct tracer *type)
1746 {
1747         struct trace_selftests *selftest;
1748
1749         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1750         if (!selftest)
1751                 return -ENOMEM;
1752
1753         selftest->type = type;
1754         list_add(&selftest->list, &postponed_selftests);
1755         return 0;
1756 }
1757
1758 static int run_tracer_selftest(struct tracer *type)
1759 {
1760         struct trace_array *tr = &global_trace;
1761         struct tracer *saved_tracer = tr->current_trace;
1762         int ret;
1763
1764         if (!type->selftest || tracing_selftest_disabled)
1765                 return 0;
1766
1767         /*
1768          * If a tracer registers early in boot up (before scheduling is
1769          * initialized and such), then do not run its selftests yet.
1770          * Instead, run it a little later in the boot process.
1771          */
1772         if (!selftests_can_run)
1773                 return save_selftest(type);
1774
1775         /*
1776          * Run a selftest on this tracer.
1777          * Here we reset the trace buffer, and set the current
1778          * tracer to be this tracer. The tracer can then run some
1779          * internal tracing to verify that everything is in order.
1780          * If we fail, we do not register this tracer.
1781          */
1782         tracing_reset_online_cpus(&tr->array_buffer);
1783
1784         tr->current_trace = type;
1785
1786 #ifdef CONFIG_TRACER_MAX_TRACE
1787         if (type->use_max_tr) {
1788                 /* If we expanded the buffers, make sure the max is expanded too */
1789                 if (ring_buffer_expanded)
1790                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1791                                            RING_BUFFER_ALL_CPUS);
1792                 tr->allocated_snapshot = true;
1793         }
1794 #endif
1795
1796         /* the test is responsible for initializing and enabling */
1797         pr_info("Testing tracer %s: ", type->name);
1798         ret = type->selftest(type, tr);
1799         /* the test is responsible for resetting too */
1800         tr->current_trace = saved_tracer;
1801         if (ret) {
1802                 printk(KERN_CONT "FAILED!\n");
1803                 /* Add the warning after printing 'FAILED' */
1804                 WARN_ON(1);
1805                 return -1;
1806         }
1807         /* Only reset on passing, to avoid touching corrupted buffers */
1808         tracing_reset_online_cpus(&tr->array_buffer);
1809
1810 #ifdef CONFIG_TRACER_MAX_TRACE
1811         if (type->use_max_tr) {
1812                 tr->allocated_snapshot = false;
1813
1814                 /* Shrink the max buffer again */
1815                 if (ring_buffer_expanded)
1816                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1817                                            RING_BUFFER_ALL_CPUS);
1818         }
1819 #endif
1820
1821         printk(KERN_CONT "PASSED\n");
1822         return 0;
1823 }
1824
1825 static __init int init_trace_selftests(void)
1826 {
1827         struct trace_selftests *p, *n;
1828         struct tracer *t, **last;
1829         int ret;
1830
1831         selftests_can_run = true;
1832
1833         mutex_lock(&trace_types_lock);
1834
1835         if (list_empty(&postponed_selftests))
1836                 goto out;
1837
1838         pr_info("Running postponed tracer tests:\n");
1839
1840         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1841                 /* This loop can take minutes when sanitizers are enabled, so
1842                  * lets make sure we allow RCU processing.
1843                  */
1844                 cond_resched();
1845                 ret = run_tracer_selftest(p->type);
1846                 /* If the test fails, then warn and remove from available_tracers */
1847                 if (ret < 0) {
1848                         WARN(1, "tracer: %s failed selftest, disabling\n",
1849                              p->type->name);
1850                         last = &trace_types;
1851                         for (t = trace_types; t; t = t->next) {
1852                                 if (t == p->type) {
1853                                         *last = t->next;
1854                                         break;
1855                                 }
1856                                 last = &t->next;
1857                         }
1858                 }
1859                 list_del(&p->list);
1860                 kfree(p);
1861         }
1862
1863  out:
1864         mutex_unlock(&trace_types_lock);
1865
1866         return 0;
1867 }
1868 core_initcall(init_trace_selftests);
1869 #else
1870 static inline int run_tracer_selftest(struct tracer *type)
1871 {
1872         return 0;
1873 }
1874 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1875
1876 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1877
1878 static void __init apply_trace_boot_options(void);
1879
1880 /**
1881  * register_tracer - register a tracer with the ftrace system.
1882  * @type: the plugin for the tracer
1883  *
1884  * Register a new plugin tracer.
1885  */
1886 int __init register_tracer(struct tracer *type)
1887 {
1888         struct tracer *t;
1889         int ret = 0;
1890
1891         if (!type->name) {
1892                 pr_info("Tracer must have a name\n");
1893                 return -1;
1894         }
1895
1896         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1897                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1898                 return -1;
1899         }
1900
1901         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1902                 pr_warn("Can not register tracer %s due to lockdown\n",
1903                            type->name);
1904                 return -EPERM;
1905         }
1906
1907         mutex_lock(&trace_types_lock);
1908
1909         tracing_selftest_running = true;
1910
1911         for (t = trace_types; t; t = t->next) {
1912                 if (strcmp(type->name, t->name) == 0) {
1913                         /* already found */
1914                         pr_info("Tracer %s already registered\n",
1915                                 type->name);
1916                         ret = -1;
1917                         goto out;
1918                 }
1919         }
1920
1921         if (!type->set_flag)
1922                 type->set_flag = &dummy_set_flag;
1923         if (!type->flags) {
1924                 /*allocate a dummy tracer_flags*/
1925                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1926                 if (!type->flags) {
1927                         ret = -ENOMEM;
1928                         goto out;
1929                 }
1930                 type->flags->val = 0;
1931                 type->flags->opts = dummy_tracer_opt;
1932         } else
1933                 if (!type->flags->opts)
1934                         type->flags->opts = dummy_tracer_opt;
1935
1936         /* store the tracer for __set_tracer_option */
1937         type->flags->trace = type;
1938
1939         ret = run_tracer_selftest(type);
1940         if (ret < 0)
1941                 goto out;
1942
1943         type->next = trace_types;
1944         trace_types = type;
1945         add_tracer_options(&global_trace, type);
1946
1947  out:
1948         tracing_selftest_running = false;
1949         mutex_unlock(&trace_types_lock);
1950
1951         if (ret || !default_bootup_tracer)
1952                 goto out_unlock;
1953
1954         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1955                 goto out_unlock;
1956
1957         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1958         /* Do we want this tracer to start on bootup? */
1959         tracing_set_tracer(&global_trace, type->name);
1960         default_bootup_tracer = NULL;
1961
1962         apply_trace_boot_options();
1963
1964         /* disable other selftests, since this will break it. */
1965         tracing_selftest_disabled = true;
1966 #ifdef CONFIG_FTRACE_STARTUP_TEST
1967         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1968                type->name);
1969 #endif
1970
1971  out_unlock:
1972         return ret;
1973 }
1974
1975 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1976 {
1977         struct trace_buffer *buffer = buf->buffer;
1978
1979         if (!buffer)
1980                 return;
1981
1982         ring_buffer_record_disable(buffer);
1983
1984         /* Make sure all commits have finished */
1985         synchronize_rcu();
1986         ring_buffer_reset_cpu(buffer, cpu);
1987
1988         ring_buffer_record_enable(buffer);
1989 }
1990
1991 void tracing_reset_online_cpus(struct array_buffer *buf)
1992 {
1993         struct trace_buffer *buffer = buf->buffer;
1994         int cpu;
1995
1996         if (!buffer)
1997                 return;
1998
1999         ring_buffer_record_disable(buffer);
2000
2001         /* Make sure all commits have finished */
2002         synchronize_rcu();
2003
2004         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2005
2006         for_each_online_cpu(cpu)
2007                 ring_buffer_reset_cpu(buffer, cpu);
2008
2009         ring_buffer_record_enable(buffer);
2010 }
2011
2012 /* Must have trace_types_lock held */
2013 void tracing_reset_all_online_cpus(void)
2014 {
2015         struct trace_array *tr;
2016
2017         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2018                 if (!tr->clear_trace)
2019                         continue;
2020                 tr->clear_trace = false;
2021                 tracing_reset_online_cpus(&tr->array_buffer);
2022 #ifdef CONFIG_TRACER_MAX_TRACE
2023                 tracing_reset_online_cpus(&tr->max_buffer);
2024 #endif
2025         }
2026 }
2027
2028 static int *tgid_map;
2029
2030 #define SAVED_CMDLINES_DEFAULT 128
2031 #define NO_CMDLINE_MAP UINT_MAX
2032 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2033 struct saved_cmdlines_buffer {
2034         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2035         unsigned *map_cmdline_to_pid;
2036         unsigned cmdline_num;
2037         int cmdline_idx;
2038         char *saved_cmdlines;
2039 };
2040 static struct saved_cmdlines_buffer *savedcmd;
2041
2042 /* temporary disable recording */
2043 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2044
2045 static inline char *get_saved_cmdlines(int idx)
2046 {
2047         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2048 }
2049
2050 static inline void set_cmdline(int idx, const char *cmdline)
2051 {
2052         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2053 }
2054
2055 static int allocate_cmdlines_buffer(unsigned int val,
2056                                     struct saved_cmdlines_buffer *s)
2057 {
2058         s->map_cmdline_to_pid = kmalloc_array(val,
2059                                               sizeof(*s->map_cmdline_to_pid),
2060                                               GFP_KERNEL);
2061         if (!s->map_cmdline_to_pid)
2062                 return -ENOMEM;
2063
2064         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2065         if (!s->saved_cmdlines) {
2066                 kfree(s->map_cmdline_to_pid);
2067                 return -ENOMEM;
2068         }
2069
2070         s->cmdline_idx = 0;
2071         s->cmdline_num = val;
2072         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2073                sizeof(s->map_pid_to_cmdline));
2074         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2075                val * sizeof(*s->map_cmdline_to_pid));
2076
2077         return 0;
2078 }
2079
2080 static int trace_create_savedcmd(void)
2081 {
2082         int ret;
2083
2084         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2085         if (!savedcmd)
2086                 return -ENOMEM;
2087
2088         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2089         if (ret < 0) {
2090                 kfree(savedcmd);
2091                 savedcmd = NULL;
2092                 return -ENOMEM;
2093         }
2094
2095         return 0;
2096 }
2097
2098 int is_tracing_stopped(void)
2099 {
2100         return global_trace.stop_count;
2101 }
2102
2103 /**
2104  * tracing_start - quick start of the tracer
2105  *
2106  * If tracing is enabled but was stopped by tracing_stop,
2107  * this will start the tracer back up.
2108  */
2109 void tracing_start(void)
2110 {
2111         struct trace_buffer *buffer;
2112         unsigned long flags;
2113
2114         if (tracing_disabled)
2115                 return;
2116
2117         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2118         if (--global_trace.stop_count) {
2119                 if (global_trace.stop_count < 0) {
2120                         /* Someone screwed up their debugging */
2121                         WARN_ON_ONCE(1);
2122                         global_trace.stop_count = 0;
2123                 }
2124                 goto out;
2125         }
2126
2127         /* Prevent the buffers from switching */
2128         arch_spin_lock(&global_trace.max_lock);
2129
2130         buffer = global_trace.array_buffer.buffer;
2131         if (buffer)
2132                 ring_buffer_record_enable(buffer);
2133
2134 #ifdef CONFIG_TRACER_MAX_TRACE
2135         buffer = global_trace.max_buffer.buffer;
2136         if (buffer)
2137                 ring_buffer_record_enable(buffer);
2138 #endif
2139
2140         arch_spin_unlock(&global_trace.max_lock);
2141
2142  out:
2143         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2144 }
2145
2146 static void tracing_start_tr(struct trace_array *tr)
2147 {
2148         struct trace_buffer *buffer;
2149         unsigned long flags;
2150
2151         if (tracing_disabled)
2152                 return;
2153
2154         /* If global, we need to also start the max tracer */
2155         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2156                 return tracing_start();
2157
2158         raw_spin_lock_irqsave(&tr->start_lock, flags);
2159
2160         if (--tr->stop_count) {
2161                 if (tr->stop_count < 0) {
2162                         /* Someone screwed up their debugging */
2163                         WARN_ON_ONCE(1);
2164                         tr->stop_count = 0;
2165                 }
2166                 goto out;
2167         }
2168
2169         buffer = tr->array_buffer.buffer;
2170         if (buffer)
2171                 ring_buffer_record_enable(buffer);
2172
2173  out:
2174         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2175 }
2176
2177 /**
2178  * tracing_stop - quick stop of the tracer
2179  *
2180  * Light weight way to stop tracing. Use in conjunction with
2181  * tracing_start.
2182  */
2183 void tracing_stop(void)
2184 {
2185         struct trace_buffer *buffer;
2186         unsigned long flags;
2187
2188         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2189         if (global_trace.stop_count++)
2190                 goto out;
2191
2192         /* Prevent the buffers from switching */
2193         arch_spin_lock(&global_trace.max_lock);
2194
2195         buffer = global_trace.array_buffer.buffer;
2196         if (buffer)
2197                 ring_buffer_record_disable(buffer);
2198
2199 #ifdef CONFIG_TRACER_MAX_TRACE
2200         buffer = global_trace.max_buffer.buffer;
2201         if (buffer)
2202                 ring_buffer_record_disable(buffer);
2203 #endif
2204
2205         arch_spin_unlock(&global_trace.max_lock);
2206
2207  out:
2208         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2209 }
2210
2211 static void tracing_stop_tr(struct trace_array *tr)
2212 {
2213         struct trace_buffer *buffer;
2214         unsigned long flags;
2215
2216         /* If global, we need to also stop the max tracer */
2217         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2218                 return tracing_stop();
2219
2220         raw_spin_lock_irqsave(&tr->start_lock, flags);
2221         if (tr->stop_count++)
2222                 goto out;
2223
2224         buffer = tr->array_buffer.buffer;
2225         if (buffer)
2226                 ring_buffer_record_disable(buffer);
2227
2228  out:
2229         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2230 }
2231
2232 static int trace_save_cmdline(struct task_struct *tsk)
2233 {
2234         unsigned pid, idx;
2235
2236         /* treat recording of idle task as a success */
2237         if (!tsk->pid)
2238                 return 1;
2239
2240         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2241                 return 0;
2242
2243         /*
2244          * It's not the end of the world if we don't get
2245          * the lock, but we also don't want to spin
2246          * nor do we want to disable interrupts,
2247          * so if we miss here, then better luck next time.
2248          */
2249         if (!arch_spin_trylock(&trace_cmdline_lock))
2250                 return 0;
2251
2252         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2253         if (idx == NO_CMDLINE_MAP) {
2254                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2255
2256                 /*
2257                  * Check whether the cmdline buffer at idx has a pid
2258                  * mapped. We are going to overwrite that entry so we
2259                  * need to clear the map_pid_to_cmdline. Otherwise we
2260                  * would read the new comm for the old pid.
2261                  */
2262                 pid = savedcmd->map_cmdline_to_pid[idx];
2263                 if (pid != NO_CMDLINE_MAP)
2264                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2265
2266                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2267                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2268
2269                 savedcmd->cmdline_idx = idx;
2270         }
2271
2272         set_cmdline(idx, tsk->comm);
2273
2274         arch_spin_unlock(&trace_cmdline_lock);
2275
2276         return 1;
2277 }
2278
2279 static void __trace_find_cmdline(int pid, char comm[])
2280 {
2281         unsigned map;
2282
2283         if (!pid) {
2284                 strcpy(comm, "<idle>");
2285                 return;
2286         }
2287
2288         if (WARN_ON_ONCE(pid < 0)) {
2289                 strcpy(comm, "<XXX>");
2290                 return;
2291         }
2292
2293         if (pid > PID_MAX_DEFAULT) {
2294                 strcpy(comm, "<...>");
2295                 return;
2296         }
2297
2298         map = savedcmd->map_pid_to_cmdline[pid];
2299         if (map != NO_CMDLINE_MAP)
2300                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2301         else
2302                 strcpy(comm, "<...>");
2303 }
2304
2305 void trace_find_cmdline(int pid, char comm[])
2306 {
2307         preempt_disable();
2308         arch_spin_lock(&trace_cmdline_lock);
2309
2310         __trace_find_cmdline(pid, comm);
2311
2312         arch_spin_unlock(&trace_cmdline_lock);
2313         preempt_enable();
2314 }
2315
2316 int trace_find_tgid(int pid)
2317 {
2318         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2319                 return 0;
2320
2321         return tgid_map[pid];
2322 }
2323
2324 static int trace_save_tgid(struct task_struct *tsk)
2325 {
2326         /* treat recording of idle task as a success */
2327         if (!tsk->pid)
2328                 return 1;
2329
2330         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2331                 return 0;
2332
2333         tgid_map[tsk->pid] = tsk->tgid;
2334         return 1;
2335 }
2336
2337 static bool tracing_record_taskinfo_skip(int flags)
2338 {
2339         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2340                 return true;
2341         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2342                 return true;
2343         if (!__this_cpu_read(trace_taskinfo_save))
2344                 return true;
2345         return false;
2346 }
2347
2348 /**
2349  * tracing_record_taskinfo - record the task info of a task
2350  *
2351  * @task:  task to record
2352  * @flags: TRACE_RECORD_CMDLINE for recording comm
2353  *         TRACE_RECORD_TGID for recording tgid
2354  */
2355 void tracing_record_taskinfo(struct task_struct *task, int flags)
2356 {
2357         bool done;
2358
2359         if (tracing_record_taskinfo_skip(flags))
2360                 return;
2361
2362         /*
2363          * Record as much task information as possible. If some fail, continue
2364          * to try to record the others.
2365          */
2366         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2367         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2368
2369         /* If recording any information failed, retry again soon. */
2370         if (!done)
2371                 return;
2372
2373         __this_cpu_write(trace_taskinfo_save, false);
2374 }
2375
2376 /**
2377  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2378  *
2379  * @prev: previous task during sched_switch
2380  * @next: next task during sched_switch
2381  * @flags: TRACE_RECORD_CMDLINE for recording comm
2382  *         TRACE_RECORD_TGID for recording tgid
2383  */
2384 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2385                                           struct task_struct *next, int flags)
2386 {
2387         bool done;
2388
2389         if (tracing_record_taskinfo_skip(flags))
2390                 return;
2391
2392         /*
2393          * Record as much task information as possible. If some fail, continue
2394          * to try to record the others.
2395          */
2396         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2397         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2398         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2399         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2400
2401         /* If recording any information failed, retry again soon. */
2402         if (!done)
2403                 return;
2404
2405         __this_cpu_write(trace_taskinfo_save, false);
2406 }
2407
2408 /* Helpers to record a specific task information */
2409 void tracing_record_cmdline(struct task_struct *task)
2410 {
2411         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2412 }
2413
2414 void tracing_record_tgid(struct task_struct *task)
2415 {
2416         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2417 }
2418
2419 /*
2420  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2421  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2422  * simplifies those functions and keeps them in sync.
2423  */
2424 enum print_line_t trace_handle_return(struct trace_seq *s)
2425 {
2426         return trace_seq_has_overflowed(s) ?
2427                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2428 }
2429 EXPORT_SYMBOL_GPL(trace_handle_return);
2430
2431 void
2432 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2433                              unsigned long flags, int pc)
2434 {
2435         struct task_struct *tsk = current;
2436
2437         entry->preempt_count            = pc & 0xff;
2438         entry->pid                      = (tsk) ? tsk->pid : 0;
2439         entry->type                     = type;
2440         entry->flags =
2441 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2442                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2443 #else
2444                 TRACE_FLAG_IRQS_NOSUPPORT |
2445 #endif
2446                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2447                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2448                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2449                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2450                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2451 }
2452 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2453
2454 struct ring_buffer_event *
2455 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2456                           int type,
2457                           unsigned long len,
2458                           unsigned long flags, int pc)
2459 {
2460         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2461 }
2462
2463 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2464 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2465 static int trace_buffered_event_ref;
2466
2467 /**
2468  * trace_buffered_event_enable - enable buffering events
2469  *
2470  * When events are being filtered, it is quicker to use a temporary
2471  * buffer to write the event data into if there's a likely chance
2472  * that it will not be committed. The discard of the ring buffer
2473  * is not as fast as committing, and is much slower than copying
2474  * a commit.
2475  *
2476  * When an event is to be filtered, allocate per cpu buffers to
2477  * write the event data into, and if the event is filtered and discarded
2478  * it is simply dropped, otherwise, the entire data is to be committed
2479  * in one shot.
2480  */
2481 void trace_buffered_event_enable(void)
2482 {
2483         struct ring_buffer_event *event;
2484         struct page *page;
2485         int cpu;
2486
2487         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2488
2489         if (trace_buffered_event_ref++)
2490                 return;
2491
2492         for_each_tracing_cpu(cpu) {
2493                 page = alloc_pages_node(cpu_to_node(cpu),
2494                                         GFP_KERNEL | __GFP_NORETRY, 0);
2495                 if (!page)
2496                         goto failed;
2497
2498                 event = page_address(page);
2499                 memset(event, 0, sizeof(*event));
2500
2501                 per_cpu(trace_buffered_event, cpu) = event;
2502
2503                 preempt_disable();
2504                 if (cpu == smp_processor_id() &&
2505                     this_cpu_read(trace_buffered_event) !=
2506                     per_cpu(trace_buffered_event, cpu))
2507                         WARN_ON_ONCE(1);
2508                 preempt_enable();
2509         }
2510
2511         return;
2512  failed:
2513         trace_buffered_event_disable();
2514 }
2515
2516 static void enable_trace_buffered_event(void *data)
2517 {
2518         /* Probably not needed, but do it anyway */
2519         smp_rmb();
2520         this_cpu_dec(trace_buffered_event_cnt);
2521 }
2522
2523 static void disable_trace_buffered_event(void *data)
2524 {
2525         this_cpu_inc(trace_buffered_event_cnt);
2526 }
2527
2528 /**
2529  * trace_buffered_event_disable - disable buffering events
2530  *
2531  * When a filter is removed, it is faster to not use the buffered
2532  * events, and to commit directly into the ring buffer. Free up
2533  * the temp buffers when there are no more users. This requires
2534  * special synchronization with current events.
2535  */
2536 void trace_buffered_event_disable(void)
2537 {
2538         int cpu;
2539
2540         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2541
2542         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2543                 return;
2544
2545         if (--trace_buffered_event_ref)
2546                 return;
2547
2548         preempt_disable();
2549         /* For each CPU, set the buffer as used. */
2550         smp_call_function_many(tracing_buffer_mask,
2551                                disable_trace_buffered_event, NULL, 1);
2552         preempt_enable();
2553
2554         /* Wait for all current users to finish */
2555         synchronize_rcu();
2556
2557         for_each_tracing_cpu(cpu) {
2558                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2559                 per_cpu(trace_buffered_event, cpu) = NULL;
2560         }
2561         /*
2562          * Make sure trace_buffered_event is NULL before clearing
2563          * trace_buffered_event_cnt.
2564          */
2565         smp_wmb();
2566
2567         preempt_disable();
2568         /* Do the work on each cpu */
2569         smp_call_function_many(tracing_buffer_mask,
2570                                enable_trace_buffered_event, NULL, 1);
2571         preempt_enable();
2572 }
2573
2574 static struct trace_buffer *temp_buffer;
2575
2576 struct ring_buffer_event *
2577 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2578                           struct trace_event_file *trace_file,
2579                           int type, unsigned long len,
2580                           unsigned long flags, int pc)
2581 {
2582         struct ring_buffer_event *entry;
2583         int val;
2584
2585         *current_rb = trace_file->tr->array_buffer.buffer;
2586
2587         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2588              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2589             (entry = this_cpu_read(trace_buffered_event))) {
2590                 /* Try to use the per cpu buffer first */
2591                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2592                 if (val == 1) {
2593                         trace_event_setup(entry, type, flags, pc);
2594                         entry->array[0] = len;
2595                         return entry;
2596                 }
2597                 this_cpu_dec(trace_buffered_event_cnt);
2598         }
2599
2600         entry = __trace_buffer_lock_reserve(*current_rb,
2601                                             type, len, flags, pc);
2602         /*
2603          * If tracing is off, but we have triggers enabled
2604          * we still need to look at the event data. Use the temp_buffer
2605          * to store the trace event for the tigger to use. It's recusive
2606          * safe and will not be recorded anywhere.
2607          */
2608         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2609                 *current_rb = temp_buffer;
2610                 entry = __trace_buffer_lock_reserve(*current_rb,
2611                                                     type, len, flags, pc);
2612         }
2613         return entry;
2614 }
2615 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2616
2617 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2618 static DEFINE_MUTEX(tracepoint_printk_mutex);
2619
2620 static void output_printk(struct trace_event_buffer *fbuffer)
2621 {
2622         struct trace_event_call *event_call;
2623         struct trace_event_file *file;
2624         struct trace_event *event;
2625         unsigned long flags;
2626         struct trace_iterator *iter = tracepoint_print_iter;
2627
2628         /* We should never get here if iter is NULL */
2629         if (WARN_ON_ONCE(!iter))
2630                 return;
2631
2632         event_call = fbuffer->trace_file->event_call;
2633         if (!event_call || !event_call->event.funcs ||
2634             !event_call->event.funcs->trace)
2635                 return;
2636
2637         file = fbuffer->trace_file;
2638         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2639             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2640              !filter_match_preds(file->filter, fbuffer->entry)))
2641                 return;
2642
2643         event = &fbuffer->trace_file->event_call->event;
2644
2645         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2646         trace_seq_init(&iter->seq);
2647         iter->ent = fbuffer->entry;
2648         event_call->event.funcs->trace(iter, 0, event);
2649         trace_seq_putc(&iter->seq, 0);
2650         printk("%s", iter->seq.buffer);
2651
2652         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2653 }
2654
2655 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2656                              void __user *buffer, size_t *lenp,
2657                              loff_t *ppos)
2658 {
2659         int save_tracepoint_printk;
2660         int ret;
2661
2662         mutex_lock(&tracepoint_printk_mutex);
2663         save_tracepoint_printk = tracepoint_printk;
2664
2665         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2666
2667         /*
2668          * This will force exiting early, as tracepoint_printk
2669          * is always zero when tracepoint_printk_iter is not allocated
2670          */
2671         if (!tracepoint_print_iter)
2672                 tracepoint_printk = 0;
2673
2674         if (save_tracepoint_printk == tracepoint_printk)
2675                 goto out;
2676
2677         if (tracepoint_printk)
2678                 static_key_enable(&tracepoint_printk_key.key);
2679         else
2680                 static_key_disable(&tracepoint_printk_key.key);
2681
2682  out:
2683         mutex_unlock(&tracepoint_printk_mutex);
2684
2685         return ret;
2686 }
2687
2688 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2689 {
2690         if (static_key_false(&tracepoint_printk_key.key))
2691                 output_printk(fbuffer);
2692
2693         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2694                                     fbuffer->event, fbuffer->entry,
2695                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2696 }
2697 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2698
2699 /*
2700  * Skip 3:
2701  *
2702  *   trace_buffer_unlock_commit_regs()
2703  *   trace_event_buffer_commit()
2704  *   trace_event_raw_event_xxx()
2705  */
2706 # define STACK_SKIP 3
2707
2708 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2709                                      struct trace_buffer *buffer,
2710                                      struct ring_buffer_event *event,
2711                                      unsigned long flags, int pc,
2712                                      struct pt_regs *regs)
2713 {
2714         __buffer_unlock_commit(buffer, event);
2715
2716         /*
2717          * If regs is not set, then skip the necessary functions.
2718          * Note, we can still get here via blktrace, wakeup tracer
2719          * and mmiotrace, but that's ok if they lose a function or
2720          * two. They are not that meaningful.
2721          */
2722         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2723         ftrace_trace_userstack(buffer, flags, pc);
2724 }
2725
2726 /*
2727  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2728  */
2729 void
2730 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2731                                    struct ring_buffer_event *event)
2732 {
2733         __buffer_unlock_commit(buffer, event);
2734 }
2735
2736 static void
2737 trace_process_export(struct trace_export *export,
2738                struct ring_buffer_event *event)
2739 {
2740         struct trace_entry *entry;
2741         unsigned int size = 0;
2742
2743         entry = ring_buffer_event_data(event);
2744         size = ring_buffer_event_length(event);
2745         export->write(export, entry, size);
2746 }
2747
2748 static DEFINE_MUTEX(ftrace_export_lock);
2749
2750 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2751
2752 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2753
2754 static inline void ftrace_exports_enable(void)
2755 {
2756         static_branch_enable(&ftrace_exports_enabled);
2757 }
2758
2759 static inline void ftrace_exports_disable(void)
2760 {
2761         static_branch_disable(&ftrace_exports_enabled);
2762 }
2763
2764 static void ftrace_exports(struct ring_buffer_event *event)
2765 {
2766         struct trace_export *export;
2767
2768         preempt_disable_notrace();
2769
2770         export = rcu_dereference_raw_check(ftrace_exports_list);
2771         while (export) {
2772                 trace_process_export(export, event);
2773                 export = rcu_dereference_raw_check(export->next);
2774         }
2775
2776         preempt_enable_notrace();
2777 }
2778
2779 static inline void
2780 add_trace_export(struct trace_export **list, struct trace_export *export)
2781 {
2782         rcu_assign_pointer(export->next, *list);
2783         /*
2784          * We are entering export into the list but another
2785          * CPU might be walking that list. We need to make sure
2786          * the export->next pointer is valid before another CPU sees
2787          * the export pointer included into the list.
2788          */
2789         rcu_assign_pointer(*list, export);
2790 }
2791
2792 static inline int
2793 rm_trace_export(struct trace_export **list, struct trace_export *export)
2794 {
2795         struct trace_export **p;
2796
2797         for (p = list; *p != NULL; p = &(*p)->next)
2798                 if (*p == export)
2799                         break;
2800
2801         if (*p != export)
2802                 return -1;
2803
2804         rcu_assign_pointer(*p, (*p)->next);
2805
2806         return 0;
2807 }
2808
2809 static inline void
2810 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2811 {
2812         if (*list == NULL)
2813                 ftrace_exports_enable();
2814
2815         add_trace_export(list, export);
2816 }
2817
2818 static inline int
2819 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2820 {
2821         int ret;
2822
2823         ret = rm_trace_export(list, export);
2824         if (*list == NULL)
2825                 ftrace_exports_disable();
2826
2827         return ret;
2828 }
2829
2830 int register_ftrace_export(struct trace_export *export)
2831 {
2832         if (WARN_ON_ONCE(!export->write))
2833                 return -1;
2834
2835         mutex_lock(&ftrace_export_lock);
2836
2837         add_ftrace_export(&ftrace_exports_list, export);
2838
2839         mutex_unlock(&ftrace_export_lock);
2840
2841         return 0;
2842 }
2843 EXPORT_SYMBOL_GPL(register_ftrace_export);
2844
2845 int unregister_ftrace_export(struct trace_export *export)
2846 {
2847         int ret;
2848
2849         mutex_lock(&ftrace_export_lock);
2850
2851         ret = rm_ftrace_export(&ftrace_exports_list, export);
2852
2853         mutex_unlock(&ftrace_export_lock);
2854
2855         return ret;
2856 }
2857 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2858
2859 void
2860 trace_function(struct trace_array *tr,
2861                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2862                int pc)
2863 {
2864         struct trace_event_call *call = &event_function;
2865         struct trace_buffer *buffer = tr->array_buffer.buffer;
2866         struct ring_buffer_event *event;
2867         struct ftrace_entry *entry;
2868
2869         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2870                                             flags, pc);
2871         if (!event)
2872                 return;
2873         entry   = ring_buffer_event_data(event);
2874         entry->ip                       = ip;
2875         entry->parent_ip                = parent_ip;
2876
2877         if (!call_filter_check_discard(call, entry, buffer, event)) {
2878                 if (static_branch_unlikely(&ftrace_exports_enabled))
2879                         ftrace_exports(event);
2880                 __buffer_unlock_commit(buffer, event);
2881         }
2882 }
2883
2884 #ifdef CONFIG_STACKTRACE
2885
2886 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2887 #define FTRACE_KSTACK_NESTING   4
2888
2889 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2890
2891 struct ftrace_stack {
2892         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2893 };
2894
2895
2896 struct ftrace_stacks {
2897         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2898 };
2899
2900 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2901 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2902
2903 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2904                                  unsigned long flags,
2905                                  int skip, int pc, struct pt_regs *regs)
2906 {
2907         struct trace_event_call *call = &event_kernel_stack;
2908         struct ring_buffer_event *event;
2909         unsigned int size, nr_entries;
2910         struct ftrace_stack *fstack;
2911         struct stack_entry *entry;
2912         int stackidx;
2913
2914         /*
2915          * Add one, for this function and the call to save_stack_trace()
2916          * If regs is set, then these functions will not be in the way.
2917          */
2918 #ifndef CONFIG_UNWINDER_ORC
2919         if (!regs)
2920                 skip++;
2921 #endif
2922
2923         /*
2924          * Since events can happen in NMIs there's no safe way to
2925          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2926          * or NMI comes in, it will just have to use the default
2927          * FTRACE_STACK_SIZE.
2928          */
2929         preempt_disable_notrace();
2930
2931         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2932
2933         /* This should never happen. If it does, yell once and skip */
2934         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2935                 goto out;
2936
2937         /*
2938          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2939          * interrupt will either see the value pre increment or post
2940          * increment. If the interrupt happens pre increment it will have
2941          * restored the counter when it returns.  We just need a barrier to
2942          * keep gcc from moving things around.
2943          */
2944         barrier();
2945
2946         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2947         size = ARRAY_SIZE(fstack->calls);
2948
2949         if (regs) {
2950                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2951                                                    size, skip);
2952         } else {
2953                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2954         }
2955
2956         size = nr_entries * sizeof(unsigned long);
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2958                                             sizeof(*entry) + size, flags, pc);
2959         if (!event)
2960                 goto out;
2961         entry = ring_buffer_event_data(event);
2962
2963         memcpy(&entry->caller, fstack->calls, size);
2964         entry->size = nr_entries;
2965
2966         if (!call_filter_check_discard(call, entry, buffer, event))
2967                 __buffer_unlock_commit(buffer, event);
2968
2969  out:
2970         /* Again, don't let gcc optimize things here */
2971         barrier();
2972         __this_cpu_dec(ftrace_stack_reserve);
2973         preempt_enable_notrace();
2974
2975 }
2976
2977 static inline void ftrace_trace_stack(struct trace_array *tr,
2978                                       struct trace_buffer *buffer,
2979                                       unsigned long flags,
2980                                       int skip, int pc, struct pt_regs *regs)
2981 {
2982         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2983                 return;
2984
2985         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2986 }
2987
2988 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2989                    int pc)
2990 {
2991         struct trace_buffer *buffer = tr->array_buffer.buffer;
2992
2993         if (rcu_is_watching()) {
2994                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2995                 return;
2996         }
2997
2998         /*
2999          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3000          * but if the above rcu_is_watching() failed, then the NMI
3001          * triggered someplace critical, and rcu_irq_enter() should
3002          * not be called from NMI.
3003          */
3004         if (unlikely(in_nmi()))
3005                 return;
3006
3007         rcu_irq_enter_irqson();
3008         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3009         rcu_irq_exit_irqson();
3010 }
3011
3012 /**
3013  * trace_dump_stack - record a stack back trace in the trace buffer
3014  * @skip: Number of functions to skip (helper handlers)
3015  */
3016 void trace_dump_stack(int skip)
3017 {
3018         unsigned long flags;
3019
3020         if (tracing_disabled || tracing_selftest_running)
3021                 return;
3022
3023         local_save_flags(flags);
3024
3025 #ifndef CONFIG_UNWINDER_ORC
3026         /* Skip 1 to skip this function. */
3027         skip++;
3028 #endif
3029         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3030                              flags, skip, preempt_count(), NULL);
3031 }
3032 EXPORT_SYMBOL_GPL(trace_dump_stack);
3033
3034 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3035 static DEFINE_PER_CPU(int, user_stack_count);
3036
3037 static void
3038 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3039 {
3040         struct trace_event_call *call = &event_user_stack;
3041         struct ring_buffer_event *event;
3042         struct userstack_entry *entry;
3043
3044         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3045                 return;
3046
3047         /*
3048          * NMIs can not handle page faults, even with fix ups.
3049          * The save user stack can (and often does) fault.
3050          */
3051         if (unlikely(in_nmi()))
3052                 return;
3053
3054         /*
3055          * prevent recursion, since the user stack tracing may
3056          * trigger other kernel events.
3057          */
3058         preempt_disable();
3059         if (__this_cpu_read(user_stack_count))
3060                 goto out;
3061
3062         __this_cpu_inc(user_stack_count);
3063
3064         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3065                                             sizeof(*entry), flags, pc);
3066         if (!event)
3067                 goto out_drop_count;
3068         entry   = ring_buffer_event_data(event);
3069
3070         entry->tgid             = current->tgid;
3071         memset(&entry->caller, 0, sizeof(entry->caller));
3072
3073         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3074         if (!call_filter_check_discard(call, entry, buffer, event))
3075                 __buffer_unlock_commit(buffer, event);
3076
3077  out_drop_count:
3078         __this_cpu_dec(user_stack_count);
3079  out:
3080         preempt_enable();
3081 }
3082 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3083 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3084                                    unsigned long flags, int pc)
3085 {
3086 }
3087 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3088
3089 #endif /* CONFIG_STACKTRACE */
3090
3091 /* created for use with alloc_percpu */
3092 struct trace_buffer_struct {
3093         int nesting;
3094         char buffer[4][TRACE_BUF_SIZE];
3095 };
3096
3097 static struct trace_buffer_struct *trace_percpu_buffer;
3098
3099 /*
3100  * Thise allows for lockless recording.  If we're nested too deeply, then
3101  * this returns NULL.
3102  */
3103 static char *get_trace_buf(void)
3104 {
3105         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3106
3107         if (!buffer || buffer->nesting >= 4)
3108                 return NULL;
3109
3110         buffer->nesting++;
3111
3112         /* Interrupts must see nesting incremented before we use the buffer */
3113         barrier();
3114         return &buffer->buffer[buffer->nesting][0];
3115 }
3116
3117 static void put_trace_buf(void)
3118 {
3119         /* Don't let the decrement of nesting leak before this */
3120         barrier();
3121         this_cpu_dec(trace_percpu_buffer->nesting);
3122 }
3123
3124 static int alloc_percpu_trace_buffer(void)
3125 {
3126         struct trace_buffer_struct *buffers;
3127
3128         buffers = alloc_percpu(struct trace_buffer_struct);
3129         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3130                 return -ENOMEM;
3131
3132         trace_percpu_buffer = buffers;
3133         return 0;
3134 }
3135
3136 static int buffers_allocated;
3137
3138 void trace_printk_init_buffers(void)
3139 {
3140         if (buffers_allocated)
3141                 return;
3142
3143         if (alloc_percpu_trace_buffer())
3144                 return;
3145
3146         /* trace_printk() is for debug use only. Don't use it in production. */
3147
3148         pr_warn("\n");
3149         pr_warn("**********************************************************\n");
3150         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3151         pr_warn("**                                                      **\n");
3152         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3153         pr_warn("**                                                      **\n");
3154         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3155         pr_warn("** unsafe for production use.                           **\n");
3156         pr_warn("**                                                      **\n");
3157         pr_warn("** If you see this message and you are not debugging    **\n");
3158         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3159         pr_warn("**                                                      **\n");
3160         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3161         pr_warn("**********************************************************\n");
3162
3163         /* Expand the buffers to set size */
3164         tracing_update_buffers();
3165
3166         buffers_allocated = 1;
3167
3168         /*
3169          * trace_printk_init_buffers() can be called by modules.
3170          * If that happens, then we need to start cmdline recording
3171          * directly here. If the global_trace.buffer is already
3172          * allocated here, then this was called by module code.
3173          */
3174         if (global_trace.array_buffer.buffer)
3175                 tracing_start_cmdline_record();
3176 }
3177 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3178
3179 void trace_printk_start_comm(void)
3180 {
3181         /* Start tracing comms if trace printk is set */
3182         if (!buffers_allocated)
3183                 return;
3184         tracing_start_cmdline_record();
3185 }
3186
3187 static void trace_printk_start_stop_comm(int enabled)
3188 {
3189         if (!buffers_allocated)
3190                 return;
3191
3192         if (enabled)
3193                 tracing_start_cmdline_record();
3194         else
3195                 tracing_stop_cmdline_record();
3196 }
3197
3198 /**
3199  * trace_vbprintk - write binary msg to tracing buffer
3200  * @ip:    The address of the caller
3201  * @fmt:   The string format to write to the buffer
3202  * @args:  Arguments for @fmt
3203  */
3204 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3205 {
3206         struct trace_event_call *call = &event_bprint;
3207         struct ring_buffer_event *event;
3208         struct trace_buffer *buffer;
3209         struct trace_array *tr = &global_trace;
3210         struct bprint_entry *entry;
3211         unsigned long flags;
3212         char *tbuffer;
3213         int len = 0, size, pc;
3214
3215         if (unlikely(tracing_selftest_running || tracing_disabled))
3216                 return 0;
3217
3218         /* Don't pollute graph traces with trace_vprintk internals */
3219         pause_graph_tracing();
3220
3221         pc = preempt_count();
3222         preempt_disable_notrace();
3223
3224         tbuffer = get_trace_buf();
3225         if (!tbuffer) {
3226                 len = 0;
3227                 goto out_nobuffer;
3228         }
3229
3230         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3231
3232         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3233                 goto out_put;
3234
3235         local_save_flags(flags);
3236         size = sizeof(*entry) + sizeof(u32) * len;
3237         buffer = tr->array_buffer.buffer;
3238         ring_buffer_nest_start(buffer);
3239         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3240                                             flags, pc);
3241         if (!event)
3242                 goto out;
3243         entry = ring_buffer_event_data(event);
3244         entry->ip                       = ip;
3245         entry->fmt                      = fmt;
3246
3247         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3248         if (!call_filter_check_discard(call, entry, buffer, event)) {
3249                 __buffer_unlock_commit(buffer, event);
3250                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3251         }
3252
3253 out:
3254         ring_buffer_nest_end(buffer);
3255 out_put:
3256         put_trace_buf();
3257
3258 out_nobuffer:
3259         preempt_enable_notrace();
3260         unpause_graph_tracing();
3261
3262         return len;
3263 }
3264 EXPORT_SYMBOL_GPL(trace_vbprintk);
3265
3266 __printf(3, 0)
3267 static int
3268 __trace_array_vprintk(struct trace_buffer *buffer,
3269                       unsigned long ip, const char *fmt, va_list args)
3270 {
3271         struct trace_event_call *call = &event_print;
3272         struct ring_buffer_event *event;
3273         int len = 0, size, pc;
3274         struct print_entry *entry;
3275         unsigned long flags;
3276         char *tbuffer;
3277
3278         if (tracing_disabled || tracing_selftest_running)
3279                 return 0;
3280
3281         /* Don't pollute graph traces with trace_vprintk internals */
3282         pause_graph_tracing();
3283
3284         pc = preempt_count();
3285         preempt_disable_notrace();
3286
3287
3288         tbuffer = get_trace_buf();
3289         if (!tbuffer) {
3290                 len = 0;
3291                 goto out_nobuffer;
3292         }
3293
3294         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3295
3296         local_save_flags(flags);
3297         size = sizeof(*entry) + len + 1;
3298         ring_buffer_nest_start(buffer);
3299         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3300                                             flags, pc);
3301         if (!event)
3302                 goto out;
3303         entry = ring_buffer_event_data(event);
3304         entry->ip = ip;
3305
3306         memcpy(&entry->buf, tbuffer, len + 1);
3307         if (!call_filter_check_discard(call, entry, buffer, event)) {
3308                 __buffer_unlock_commit(buffer, event);
3309                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3310         }
3311
3312 out:
3313         ring_buffer_nest_end(buffer);
3314         put_trace_buf();
3315
3316 out_nobuffer:
3317         preempt_enable_notrace();
3318         unpause_graph_tracing();
3319
3320         return len;
3321 }
3322
3323 __printf(3, 0)
3324 int trace_array_vprintk(struct trace_array *tr,
3325                         unsigned long ip, const char *fmt, va_list args)
3326 {
3327         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3328 }
3329
3330 __printf(3, 0)
3331 int trace_array_printk(struct trace_array *tr,
3332                        unsigned long ip, const char *fmt, ...)
3333 {
3334         int ret;
3335         va_list ap;
3336
3337         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3338                 return 0;
3339
3340         if (!tr)
3341                 return -ENOENT;
3342
3343         va_start(ap, fmt);
3344         ret = trace_array_vprintk(tr, ip, fmt, ap);
3345         va_end(ap);
3346         return ret;
3347 }
3348 EXPORT_SYMBOL_GPL(trace_array_printk);
3349
3350 __printf(3, 4)
3351 int trace_array_printk_buf(struct trace_buffer *buffer,
3352                            unsigned long ip, const char *fmt, ...)
3353 {
3354         int ret;
3355         va_list ap;
3356
3357         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3358                 return 0;
3359
3360         va_start(ap, fmt);
3361         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3362         va_end(ap);
3363         return ret;
3364 }
3365
3366 __printf(2, 0)
3367 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3368 {
3369         return trace_array_vprintk(&global_trace, ip, fmt, args);
3370 }
3371 EXPORT_SYMBOL_GPL(trace_vprintk);
3372
3373 static void trace_iterator_increment(struct trace_iterator *iter)
3374 {
3375         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3376
3377         iter->idx++;
3378         if (buf_iter)
3379                 ring_buffer_read(buf_iter, NULL);
3380 }
3381
3382 static struct trace_entry *
3383 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3384                 unsigned long *lost_events)
3385 {
3386         struct ring_buffer_event *event;
3387         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3388
3389         if (buf_iter)
3390                 event = ring_buffer_iter_peek(buf_iter, ts);
3391         else
3392                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3393                                          lost_events);
3394
3395         if (event) {
3396                 iter->ent_size = ring_buffer_event_length(event);
3397                 return ring_buffer_event_data(event);
3398         }
3399         iter->ent_size = 0;
3400         return NULL;
3401 }
3402
3403 static struct trace_entry *
3404 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3405                   unsigned long *missing_events, u64 *ent_ts)
3406 {
3407         struct trace_buffer *buffer = iter->array_buffer->buffer;
3408         struct trace_entry *ent, *next = NULL;
3409         unsigned long lost_events = 0, next_lost = 0;
3410         int cpu_file = iter->cpu_file;
3411         u64 next_ts = 0, ts;
3412         int next_cpu = -1;
3413         int next_size = 0;
3414         int cpu;
3415
3416         /*
3417          * If we are in a per_cpu trace file, don't bother by iterating over
3418          * all cpu and peek directly.
3419          */
3420         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3421                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3422                         return NULL;
3423                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3424                 if (ent_cpu)
3425                         *ent_cpu = cpu_file;
3426
3427                 return ent;
3428         }
3429
3430         for_each_tracing_cpu(cpu) {
3431
3432                 if (ring_buffer_empty_cpu(buffer, cpu))
3433                         continue;
3434
3435                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3436
3437                 /*
3438                  * Pick the entry with the smallest timestamp:
3439                  */
3440                 if (ent && (!next || ts < next_ts)) {
3441                         next = ent;
3442                         next_cpu = cpu;
3443                         next_ts = ts;
3444                         next_lost = lost_events;
3445                         next_size = iter->ent_size;
3446                 }
3447         }
3448
3449         iter->ent_size = next_size;
3450
3451         if (ent_cpu)
3452                 *ent_cpu = next_cpu;
3453
3454         if (ent_ts)
3455                 *ent_ts = next_ts;
3456
3457         if (missing_events)
3458                 *missing_events = next_lost;
3459
3460         return next;
3461 }
3462
3463 /* Find the next real entry, without updating the iterator itself */
3464 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3465                                           int *ent_cpu, u64 *ent_ts)
3466 {
3467         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3468 }
3469
3470 /* Find the next real entry, and increment the iterator to the next entry */
3471 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3472 {
3473         iter->ent = __find_next_entry(iter, &iter->cpu,
3474                                       &iter->lost_events, &iter->ts);
3475
3476         if (iter->ent)
3477                 trace_iterator_increment(iter);
3478
3479         return iter->ent ? iter : NULL;
3480 }
3481
3482 static void trace_consume(struct trace_iterator *iter)
3483 {
3484         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3485                             &iter->lost_events);
3486 }
3487
3488 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3489 {
3490         struct trace_iterator *iter = m->private;
3491         int i = (int)*pos;
3492         void *ent;
3493
3494         WARN_ON_ONCE(iter->leftover);
3495
3496         (*pos)++;
3497
3498         /* can't go backwards */
3499         if (iter->idx > i)
3500                 return NULL;
3501
3502         if (iter->idx < 0)
3503                 ent = trace_find_next_entry_inc(iter);
3504         else
3505                 ent = iter;
3506
3507         while (ent && iter->idx < i)
3508                 ent = trace_find_next_entry_inc(iter);
3509
3510         iter->pos = *pos;
3511
3512         return ent;
3513 }
3514
3515 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3516 {
3517         struct ring_buffer_event *event;
3518         struct ring_buffer_iter *buf_iter;
3519         unsigned long entries = 0;
3520         u64 ts;
3521
3522         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3523
3524         buf_iter = trace_buffer_iter(iter, cpu);
3525         if (!buf_iter)
3526                 return;
3527
3528         ring_buffer_iter_reset(buf_iter);
3529
3530         /*
3531          * We could have the case with the max latency tracers
3532          * that a reset never took place on a cpu. This is evident
3533          * by the timestamp being before the start of the buffer.
3534          */
3535         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3536                 if (ts >= iter->array_buffer->time_start)
3537                         break;
3538                 entries++;
3539                 ring_buffer_read(buf_iter, NULL);
3540         }
3541
3542         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3543 }
3544
3545 /*
3546  * The current tracer is copied to avoid a global locking
3547  * all around.
3548  */
3549 static void *s_start(struct seq_file *m, loff_t *pos)
3550 {
3551         struct trace_iterator *iter = m->private;
3552         struct trace_array *tr = iter->tr;
3553         int cpu_file = iter->cpu_file;
3554         void *p = NULL;
3555         loff_t l = 0;
3556         int cpu;
3557
3558         /*
3559          * copy the tracer to avoid using a global lock all around.
3560          * iter->trace is a copy of current_trace, the pointer to the
3561          * name may be used instead of a strcmp(), as iter->trace->name
3562          * will point to the same string as current_trace->name.
3563          */
3564         mutex_lock(&trace_types_lock);
3565         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3566                 *iter->trace = *tr->current_trace;
3567         mutex_unlock(&trace_types_lock);
3568
3569 #ifdef CONFIG_TRACER_MAX_TRACE
3570         if (iter->snapshot && iter->trace->use_max_tr)
3571                 return ERR_PTR(-EBUSY);
3572 #endif
3573
3574         if (!iter->snapshot)
3575                 atomic_inc(&trace_record_taskinfo_disabled);
3576
3577         if (*pos != iter->pos) {
3578                 iter->ent = NULL;
3579                 iter->cpu = 0;
3580                 iter->idx = -1;
3581
3582                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3583                         for_each_tracing_cpu(cpu)
3584                                 tracing_iter_reset(iter, cpu);
3585                 } else
3586                         tracing_iter_reset(iter, cpu_file);
3587
3588                 iter->leftover = 0;
3589                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3590                         ;
3591
3592         } else {
3593                 /*
3594                  * If we overflowed the seq_file before, then we want
3595                  * to just reuse the trace_seq buffer again.
3596                  */
3597                 if (iter->leftover)
3598                         p = iter;
3599                 else {
3600                         l = *pos - 1;
3601                         p = s_next(m, p, &l);
3602                 }
3603         }
3604
3605         trace_event_read_lock();
3606         trace_access_lock(cpu_file);
3607         return p;
3608 }
3609
3610 static void s_stop(struct seq_file *m, void *p)
3611 {
3612         struct trace_iterator *iter = m->private;
3613
3614 #ifdef CONFIG_TRACER_MAX_TRACE
3615         if (iter->snapshot && iter->trace->use_max_tr)
3616                 return;
3617 #endif
3618
3619         if (!iter->snapshot)
3620                 atomic_dec(&trace_record_taskinfo_disabled);
3621
3622         trace_access_unlock(iter->cpu_file);
3623         trace_event_read_unlock();
3624 }
3625
3626 static void
3627 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3628                       unsigned long *entries, int cpu)
3629 {
3630         unsigned long count;
3631
3632         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3633         /*
3634          * If this buffer has skipped entries, then we hold all
3635          * entries for the trace and we need to ignore the
3636          * ones before the time stamp.
3637          */
3638         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3639                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3640                 /* total is the same as the entries */
3641                 *total = count;
3642         } else
3643                 *total = count +
3644                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3645         *entries = count;
3646 }
3647
3648 static void
3649 get_total_entries(struct array_buffer *buf,
3650                   unsigned long *total, unsigned long *entries)
3651 {
3652         unsigned long t, e;
3653         int cpu;
3654
3655         *total = 0;
3656         *entries = 0;
3657
3658         for_each_tracing_cpu(cpu) {
3659                 get_total_entries_cpu(buf, &t, &e, cpu);
3660                 *total += t;
3661                 *entries += e;
3662         }
3663 }
3664
3665 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3666 {
3667         unsigned long total, entries;
3668
3669         if (!tr)
3670                 tr = &global_trace;
3671
3672         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3673
3674         return entries;
3675 }
3676
3677 unsigned long trace_total_entries(struct trace_array *tr)
3678 {
3679         unsigned long total, entries;
3680
3681         if (!tr)
3682                 tr = &global_trace;
3683
3684         get_total_entries(&tr->array_buffer, &total, &entries);
3685
3686         return entries;
3687 }
3688
3689 static void print_lat_help_header(struct seq_file *m)
3690 {
3691         seq_puts(m, "#                  _------=> CPU#            \n"
3692                     "#                 / _-----=> irqs-off        \n"
3693                     "#                | / _----=> need-resched    \n"
3694                     "#                || / _---=> hardirq/softirq \n"
3695                     "#                ||| / _--=> preempt-depth   \n"
3696                     "#                |||| /     delay            \n"
3697                     "#  cmd     pid   ||||| time  |   caller      \n"
3698                     "#     \\   /      |||||  \\    |   /         \n");
3699 }
3700
3701 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3702 {
3703         unsigned long total;
3704         unsigned long entries;
3705
3706         get_total_entries(buf, &total, &entries);
3707         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3708                    entries, total, num_online_cpus());
3709         seq_puts(m, "#\n");
3710 }
3711
3712 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3713                                    unsigned int flags)
3714 {
3715         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3716
3717         print_event_info(buf, m);
3718
3719         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3720         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3721 }
3722
3723 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3724                                        unsigned int flags)
3725 {
3726         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3727         const char *space = "          ";
3728         int prec = tgid ? 10 : 2;
3729
3730         print_event_info(buf, m);
3731
3732         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3733         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3734         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3735         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3736         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3737         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3738         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3739 }
3740
3741 void
3742 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3743 {
3744         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3745         struct array_buffer *buf = iter->array_buffer;
3746         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3747         struct tracer *type = iter->trace;
3748         unsigned long entries;
3749         unsigned long total;
3750         const char *name = "preemption";
3751
3752         name = type->name;
3753
3754         get_total_entries(buf, &total, &entries);
3755
3756         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3757                    name, UTS_RELEASE);
3758         seq_puts(m, "# -----------------------------------"
3759                  "---------------------------------\n");
3760         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3761                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3762                    nsecs_to_usecs(data->saved_latency),
3763                    entries,
3764                    total,
3765                    buf->cpu,
3766 #if defined(CONFIG_PREEMPT_NONE)
3767                    "server",
3768 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3769                    "desktop",
3770 #elif defined(CONFIG_PREEMPT)
3771                    "preempt",
3772 #elif defined(CONFIG_PREEMPT_RT)
3773                    "preempt_rt",
3774 #else
3775                    "unknown",
3776 #endif
3777                    /* These are reserved for later use */
3778                    0, 0, 0, 0);
3779 #ifdef CONFIG_SMP
3780         seq_printf(m, " #P:%d)\n", num_online_cpus());
3781 #else
3782         seq_puts(m, ")\n");
3783 #endif
3784         seq_puts(m, "#    -----------------\n");
3785         seq_printf(m, "#    | task: %.16s-%d "
3786                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3787                    data->comm, data->pid,
3788                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3789                    data->policy, data->rt_priority);
3790         seq_puts(m, "#    -----------------\n");
3791
3792         if (data->critical_start) {
3793                 seq_puts(m, "#  => started at: ");
3794                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3795                 trace_print_seq(m, &iter->seq);
3796                 seq_puts(m, "\n#  => ended at:   ");
3797                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3798                 trace_print_seq(m, &iter->seq);
3799                 seq_puts(m, "\n#\n");
3800         }
3801
3802         seq_puts(m, "#\n");
3803 }
3804
3805 static void test_cpu_buff_start(struct trace_iterator *iter)
3806 {
3807         struct trace_seq *s = &iter->seq;
3808         struct trace_array *tr = iter->tr;
3809
3810         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3811                 return;
3812
3813         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3814                 return;
3815
3816         if (cpumask_available(iter->started) &&
3817             cpumask_test_cpu(iter->cpu, iter->started))
3818                 return;
3819
3820         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3821                 return;
3822
3823         if (cpumask_available(iter->started))
3824                 cpumask_set_cpu(iter->cpu, iter->started);
3825
3826         /* Don't print started cpu buffer for the first entry of the trace */
3827         if (iter->idx > 1)
3828                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3829                                 iter->cpu);
3830 }
3831
3832 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3833 {
3834         struct trace_array *tr = iter->tr;
3835         struct trace_seq *s = &iter->seq;
3836         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3837         struct trace_entry *entry;
3838         struct trace_event *event;
3839
3840         entry = iter->ent;
3841
3842         test_cpu_buff_start(iter);
3843
3844         event = ftrace_find_event(entry->type);
3845
3846         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3847                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3848                         trace_print_lat_context(iter);
3849                 else
3850                         trace_print_context(iter);
3851         }
3852
3853         if (trace_seq_has_overflowed(s))
3854                 return TRACE_TYPE_PARTIAL_LINE;
3855
3856         if (event)
3857                 return event->funcs->trace(iter, sym_flags, event);
3858
3859         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3860
3861         return trace_handle_return(s);
3862 }
3863
3864 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3865 {
3866         struct trace_array *tr = iter->tr;
3867         struct trace_seq *s = &iter->seq;
3868         struct trace_entry *entry;
3869         struct trace_event *event;
3870
3871         entry = iter->ent;
3872
3873         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3874                 trace_seq_printf(s, "%d %d %llu ",
3875                                  entry->pid, iter->cpu, iter->ts);
3876
3877         if (trace_seq_has_overflowed(s))
3878                 return TRACE_TYPE_PARTIAL_LINE;
3879
3880         event = ftrace_find_event(entry->type);
3881         if (event)
3882                 return event->funcs->raw(iter, 0, event);
3883
3884         trace_seq_printf(s, "%d ?\n", entry->type);
3885
3886         return trace_handle_return(s);
3887 }
3888
3889 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3890 {
3891         struct trace_array *tr = iter->tr;
3892         struct trace_seq *s = &iter->seq;
3893         unsigned char newline = '\n';
3894         struct trace_entry *entry;
3895         struct trace_event *event;
3896
3897         entry = iter->ent;
3898
3899         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3900                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3901                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3902                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3903                 if (trace_seq_has_overflowed(s))
3904                         return TRACE_TYPE_PARTIAL_LINE;
3905         }
3906
3907         event = ftrace_find_event(entry->type);
3908         if (event) {
3909                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3910                 if (ret != TRACE_TYPE_HANDLED)
3911                         return ret;
3912         }
3913
3914         SEQ_PUT_FIELD(s, newline);
3915
3916         return trace_handle_return(s);
3917 }
3918
3919 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3920 {
3921         struct trace_array *tr = iter->tr;
3922         struct trace_seq *s = &iter->seq;
3923         struct trace_entry *entry;
3924         struct trace_event *event;
3925
3926         entry = iter->ent;
3927
3928         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3929                 SEQ_PUT_FIELD(s, entry->pid);
3930                 SEQ_PUT_FIELD(s, iter->cpu);
3931                 SEQ_PUT_FIELD(s, iter->ts);
3932                 if (trace_seq_has_overflowed(s))
3933                         return TRACE_TYPE_PARTIAL_LINE;
3934         }
3935
3936         event = ftrace_find_event(entry->type);
3937         return event ? event->funcs->binary(iter, 0, event) :
3938                 TRACE_TYPE_HANDLED;
3939 }
3940
3941 int trace_empty(struct trace_iterator *iter)
3942 {
3943         struct ring_buffer_iter *buf_iter;
3944         int cpu;
3945
3946         /* If we are looking at one CPU buffer, only check that one */
3947         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3948                 cpu = iter->cpu_file;
3949                 buf_iter = trace_buffer_iter(iter, cpu);
3950                 if (buf_iter) {
3951                         if (!ring_buffer_iter_empty(buf_iter))
3952                                 return 0;
3953                 } else {
3954                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3955                                 return 0;
3956                 }
3957                 return 1;
3958         }
3959
3960         for_each_tracing_cpu(cpu) {
3961                 buf_iter = trace_buffer_iter(iter, cpu);
3962                 if (buf_iter) {
3963                         if (!ring_buffer_iter_empty(buf_iter))
3964                                 return 0;
3965                 } else {
3966                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3967                                 return 0;
3968                 }
3969         }
3970
3971         return 1;
3972 }
3973
3974 /*  Called with trace_event_read_lock() held. */
3975 enum print_line_t print_trace_line(struct trace_iterator *iter)
3976 {
3977         struct trace_array *tr = iter->tr;
3978         unsigned long trace_flags = tr->trace_flags;
3979         enum print_line_t ret;
3980
3981         if (iter->lost_events) {
3982                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3983                                  iter->cpu, iter->lost_events);
3984                 if (trace_seq_has_overflowed(&iter->seq))
3985                         return TRACE_TYPE_PARTIAL_LINE;
3986         }
3987
3988         if (iter->trace && iter->trace->print_line) {
3989                 ret = iter->trace->print_line(iter);
3990                 if (ret != TRACE_TYPE_UNHANDLED)
3991                         return ret;
3992         }
3993
3994         if (iter->ent->type == TRACE_BPUTS &&
3995                         trace_flags & TRACE_ITER_PRINTK &&
3996                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3997                 return trace_print_bputs_msg_only(iter);
3998
3999         if (iter->ent->type == TRACE_BPRINT &&
4000                         trace_flags & TRACE_ITER_PRINTK &&
4001                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4002                 return trace_print_bprintk_msg_only(iter);
4003
4004         if (iter->ent->type == TRACE_PRINT &&
4005                         trace_flags & TRACE_ITER_PRINTK &&
4006                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4007                 return trace_print_printk_msg_only(iter);
4008
4009         if (trace_flags & TRACE_ITER_BIN)
4010                 return print_bin_fmt(iter);
4011
4012         if (trace_flags & TRACE_ITER_HEX)
4013                 return print_hex_fmt(iter);
4014
4015         if (trace_flags & TRACE_ITER_RAW)
4016                 return print_raw_fmt(iter);
4017
4018         return print_trace_fmt(iter);
4019 }
4020
4021 void trace_latency_header(struct seq_file *m)
4022 {
4023         struct trace_iterator *iter = m->private;
4024         struct trace_array *tr = iter->tr;
4025
4026         /* print nothing if the buffers are empty */
4027         if (trace_empty(iter))
4028                 return;
4029
4030         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4031                 print_trace_header(m, iter);
4032
4033         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4034                 print_lat_help_header(m);
4035 }
4036
4037 void trace_default_header(struct seq_file *m)
4038 {
4039         struct trace_iterator *iter = m->private;
4040         struct trace_array *tr = iter->tr;
4041         unsigned long trace_flags = tr->trace_flags;
4042
4043         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4044                 return;
4045
4046         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4047                 /* print nothing if the buffers are empty */
4048                 if (trace_empty(iter))
4049                         return;
4050                 print_trace_header(m, iter);
4051                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4052                         print_lat_help_header(m);
4053         } else {
4054                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4055                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4056                                 print_func_help_header_irq(iter->array_buffer,
4057                                                            m, trace_flags);
4058                         else
4059                                 print_func_help_header(iter->array_buffer, m,
4060                                                        trace_flags);
4061                 }
4062         }
4063 }
4064
4065 static void test_ftrace_alive(struct seq_file *m)
4066 {
4067         if (!ftrace_is_dead())
4068                 return;
4069         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4070                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4071 }
4072
4073 #ifdef CONFIG_TRACER_MAX_TRACE
4074 static void show_snapshot_main_help(struct seq_file *m)
4075 {
4076         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4077                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4078                     "#                      Takes a snapshot of the main buffer.\n"
4079                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4080                     "#                      (Doesn't have to be '2' works with any number that\n"
4081                     "#                       is not a '0' or '1')\n");
4082 }
4083
4084 static void show_snapshot_percpu_help(struct seq_file *m)
4085 {
4086         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4087 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4088         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4089                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4090 #else
4091         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4092                     "#                     Must use main snapshot file to allocate.\n");
4093 #endif
4094         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4095                     "#                      (Doesn't have to be '2' works with any number that\n"
4096                     "#                       is not a '0' or '1')\n");
4097 }
4098
4099 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4100 {
4101         if (iter->tr->allocated_snapshot)
4102                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4103         else
4104                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4105
4106         seq_puts(m, "# Snapshot commands:\n");
4107         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4108                 show_snapshot_main_help(m);
4109         else
4110                 show_snapshot_percpu_help(m);
4111 }
4112 #else
4113 /* Should never be called */
4114 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4115 #endif
4116
4117 static int s_show(struct seq_file *m, void *v)
4118 {
4119         struct trace_iterator *iter = v;
4120         int ret;
4121
4122         if (iter->ent == NULL) {
4123                 if (iter->tr) {
4124                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4125                         seq_puts(m, "#\n");
4126                         test_ftrace_alive(m);
4127                 }
4128                 if (iter->snapshot && trace_empty(iter))
4129                         print_snapshot_help(m, iter);
4130                 else if (iter->trace && iter->trace->print_header)
4131                         iter->trace->print_header(m);
4132                 else
4133                         trace_default_header(m);
4134
4135         } else if (iter->leftover) {
4136                 /*
4137                  * If we filled the seq_file buffer earlier, we
4138                  * want to just show it now.
4139                  */
4140                 ret = trace_print_seq(m, &iter->seq);
4141
4142                 /* ret should this time be zero, but you never know */
4143                 iter->leftover = ret;
4144
4145         } else {
4146                 print_trace_line(iter);
4147                 ret = trace_print_seq(m, &iter->seq);
4148                 /*
4149                  * If we overflow the seq_file buffer, then it will
4150                  * ask us for this data again at start up.
4151                  * Use that instead.
4152                  *  ret is 0 if seq_file write succeeded.
4153                  *        -1 otherwise.
4154                  */
4155                 iter->leftover = ret;
4156         }
4157
4158         return 0;
4159 }
4160
4161 /*
4162  * Should be used after trace_array_get(), trace_types_lock
4163  * ensures that i_cdev was already initialized.
4164  */
4165 static inline int tracing_get_cpu(struct inode *inode)
4166 {
4167         if (inode->i_cdev) /* See trace_create_cpu_file() */
4168                 return (long)inode->i_cdev - 1;
4169         return RING_BUFFER_ALL_CPUS;
4170 }
4171
4172 static const struct seq_operations tracer_seq_ops = {
4173         .start          = s_start,
4174         .next           = s_next,
4175         .stop           = s_stop,
4176         .show           = s_show,
4177 };
4178
4179 static struct trace_iterator *
4180 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4181 {
4182         struct trace_array *tr = inode->i_private;
4183         struct trace_iterator *iter;
4184         int cpu;
4185
4186         if (tracing_disabled)
4187                 return ERR_PTR(-ENODEV);
4188
4189         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4190         if (!iter)
4191                 return ERR_PTR(-ENOMEM);
4192
4193         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4194                                     GFP_KERNEL);
4195         if (!iter->buffer_iter)
4196                 goto release;
4197
4198         /*
4199          * We make a copy of the current tracer to avoid concurrent
4200          * changes on it while we are reading.
4201          */
4202         mutex_lock(&trace_types_lock);
4203         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4204         if (!iter->trace)
4205                 goto fail;
4206
4207         *iter->trace = *tr->current_trace;
4208
4209         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4210                 goto fail;
4211
4212         iter->tr = tr;
4213
4214 #ifdef CONFIG_TRACER_MAX_TRACE
4215         /* Currently only the top directory has a snapshot */
4216         if (tr->current_trace->print_max || snapshot)
4217                 iter->array_buffer = &tr->max_buffer;
4218         else
4219 #endif
4220                 iter->array_buffer = &tr->array_buffer;
4221         iter->snapshot = snapshot;
4222         iter->pos = -1;
4223         iter->cpu_file = tracing_get_cpu(inode);
4224         mutex_init(&iter->mutex);
4225
4226         /* Notify the tracer early; before we stop tracing. */
4227         if (iter->trace->open)
4228                 iter->trace->open(iter);
4229
4230         /* Annotate start of buffers if we had overruns */
4231         if (ring_buffer_overruns(iter->array_buffer->buffer))
4232                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4233
4234         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4235         if (trace_clocks[tr->clock_id].in_ns)
4236                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4237
4238         /* stop the trace while dumping if we are not opening "snapshot" */
4239         if (!iter->snapshot)
4240                 tracing_stop_tr(tr);
4241
4242         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4243                 for_each_tracing_cpu(cpu) {
4244                         iter->buffer_iter[cpu] =
4245                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4246                                                          cpu, GFP_KERNEL);
4247                 }
4248                 ring_buffer_read_prepare_sync();
4249                 for_each_tracing_cpu(cpu) {
4250                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4251                         tracing_iter_reset(iter, cpu);
4252                 }
4253         } else {
4254                 cpu = iter->cpu_file;
4255                 iter->buffer_iter[cpu] =
4256                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4257                                                  cpu, GFP_KERNEL);
4258                 ring_buffer_read_prepare_sync();
4259                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4260                 tracing_iter_reset(iter, cpu);
4261         }
4262
4263         mutex_unlock(&trace_types_lock);
4264
4265         return iter;
4266
4267  fail:
4268         mutex_unlock(&trace_types_lock);
4269         kfree(iter->trace);
4270         kfree(iter->buffer_iter);
4271 release:
4272         seq_release_private(inode, file);
4273         return ERR_PTR(-ENOMEM);
4274 }
4275
4276 int tracing_open_generic(struct inode *inode, struct file *filp)
4277 {
4278         int ret;
4279
4280         ret = tracing_check_open_get_tr(NULL);
4281         if (ret)
4282                 return ret;
4283
4284         filp->private_data = inode->i_private;
4285         return 0;
4286 }
4287
4288 bool tracing_is_disabled(void)
4289 {
4290         return (tracing_disabled) ? true: false;
4291 }
4292
4293 /*
4294  * Open and update trace_array ref count.
4295  * Must have the current trace_array passed to it.
4296  */
4297 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4298 {
4299         struct trace_array *tr = inode->i_private;
4300         int ret;
4301
4302         ret = tracing_check_open_get_tr(tr);
4303         if (ret)
4304                 return ret;
4305
4306         filp->private_data = inode->i_private;
4307
4308         return 0;
4309 }
4310
4311 static int tracing_release(struct inode *inode, struct file *file)
4312 {
4313         struct trace_array *tr = inode->i_private;
4314         struct seq_file *m = file->private_data;
4315         struct trace_iterator *iter;
4316         int cpu;
4317
4318         if (!(file->f_mode & FMODE_READ)) {
4319                 trace_array_put(tr);
4320                 return 0;
4321         }
4322
4323         /* Writes do not use seq_file */
4324         iter = m->private;
4325         mutex_lock(&trace_types_lock);
4326
4327         for_each_tracing_cpu(cpu) {
4328                 if (iter->buffer_iter[cpu])
4329                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4330         }
4331
4332         if (iter->trace && iter->trace->close)
4333                 iter->trace->close(iter);
4334
4335         if (!iter->snapshot)
4336                 /* reenable tracing if it was previously enabled */
4337                 tracing_start_tr(tr);
4338
4339         __trace_array_put(tr);
4340
4341         mutex_unlock(&trace_types_lock);
4342
4343         mutex_destroy(&iter->mutex);
4344         free_cpumask_var(iter->started);
4345         kfree(iter->trace);
4346         kfree(iter->buffer_iter);
4347         seq_release_private(inode, file);
4348
4349         return 0;
4350 }
4351
4352 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4353 {
4354         struct trace_array *tr = inode->i_private;
4355
4356         trace_array_put(tr);
4357         return 0;
4358 }
4359
4360 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4361 {
4362         struct trace_array *tr = inode->i_private;
4363
4364         trace_array_put(tr);
4365
4366         return single_release(inode, file);
4367 }
4368
4369 static int tracing_open(struct inode *inode, struct file *file)
4370 {
4371         struct trace_array *tr = inode->i_private;
4372         struct trace_iterator *iter;
4373         int ret;
4374
4375         ret = tracing_check_open_get_tr(tr);
4376         if (ret)
4377                 return ret;
4378
4379         /* If this file was open for write, then erase contents */
4380         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4381                 int cpu = tracing_get_cpu(inode);
4382                 struct array_buffer *trace_buf = &tr->array_buffer;
4383
4384 #ifdef CONFIG_TRACER_MAX_TRACE
4385                 if (tr->current_trace->print_max)
4386                         trace_buf = &tr->max_buffer;
4387 #endif
4388
4389                 if (cpu == RING_BUFFER_ALL_CPUS)
4390                         tracing_reset_online_cpus(trace_buf);
4391                 else
4392                         tracing_reset_cpu(trace_buf, cpu);
4393         }
4394
4395         if (file->f_mode & FMODE_READ) {
4396                 iter = __tracing_open(inode, file, false);
4397                 if (IS_ERR(iter))
4398                         ret = PTR_ERR(iter);
4399                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4400                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4401         }
4402
4403         if (ret < 0)
4404                 trace_array_put(tr);
4405
4406         return ret;
4407 }
4408
4409 /*
4410  * Some tracers are not suitable for instance buffers.
4411  * A tracer is always available for the global array (toplevel)
4412  * or if it explicitly states that it is.
4413  */
4414 static bool
4415 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4416 {
4417         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4418 }
4419
4420 /* Find the next tracer that this trace array may use */
4421 static struct tracer *
4422 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4423 {
4424         while (t && !trace_ok_for_array(t, tr))
4425                 t = t->next;
4426
4427         return t;
4428 }
4429
4430 static void *
4431 t_next(struct seq_file *m, void *v, loff_t *pos)
4432 {
4433         struct trace_array *tr = m->private;
4434         struct tracer *t = v;
4435
4436         (*pos)++;
4437
4438         if (t)
4439                 t = get_tracer_for_array(tr, t->next);
4440
4441         return t;
4442 }
4443
4444 static void *t_start(struct seq_file *m, loff_t *pos)
4445 {
4446         struct trace_array *tr = m->private;
4447         struct tracer *t;
4448         loff_t l = 0;
4449
4450         mutex_lock(&trace_types_lock);
4451
4452         t = get_tracer_for_array(tr, trace_types);
4453         for (; t && l < *pos; t = t_next(m, t, &l))
4454                         ;
4455
4456         return t;
4457 }
4458
4459 static void t_stop(struct seq_file *m, void *p)
4460 {
4461         mutex_unlock(&trace_types_lock);
4462 }
4463
4464 static int t_show(struct seq_file *m, void *v)
4465 {
4466         struct tracer *t = v;
4467
4468         if (!t)
4469                 return 0;
4470
4471         seq_puts(m, t->name);
4472         if (t->next)
4473                 seq_putc(m, ' ');
4474         else
4475                 seq_putc(m, '\n');
4476
4477         return 0;
4478 }
4479
4480 static const struct seq_operations show_traces_seq_ops = {
4481         .start          = t_start,
4482         .next           = t_next,
4483         .stop           = t_stop,
4484         .show           = t_show,
4485 };
4486
4487 static int show_traces_open(struct inode *inode, struct file *file)
4488 {
4489         struct trace_array *tr = inode->i_private;
4490         struct seq_file *m;
4491         int ret;
4492
4493         ret = tracing_check_open_get_tr(tr);
4494         if (ret)
4495                 return ret;
4496
4497         ret = seq_open(file, &show_traces_seq_ops);
4498         if (ret) {
4499                 trace_array_put(tr);
4500                 return ret;
4501         }
4502
4503         m = file->private_data;
4504         m->private = tr;
4505
4506         return 0;
4507 }
4508
4509 static int show_traces_release(struct inode *inode, struct file *file)
4510 {
4511         struct trace_array *tr = inode->i_private;
4512
4513         trace_array_put(tr);
4514         return seq_release(inode, file);
4515 }
4516
4517 static ssize_t
4518 tracing_write_stub(struct file *filp, const char __user *ubuf,
4519                    size_t count, loff_t *ppos)
4520 {
4521         return count;
4522 }
4523
4524 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4525 {
4526         int ret;
4527
4528         if (file->f_mode & FMODE_READ)
4529                 ret = seq_lseek(file, offset, whence);
4530         else
4531                 file->f_pos = ret = 0;
4532
4533         return ret;
4534 }
4535
4536 static const struct file_operations tracing_fops = {
4537         .open           = tracing_open,
4538         .read           = seq_read,
4539         .write          = tracing_write_stub,
4540         .llseek         = tracing_lseek,
4541         .release        = tracing_release,
4542 };
4543
4544 static const struct file_operations show_traces_fops = {
4545         .open           = show_traces_open,
4546         .read           = seq_read,
4547         .llseek         = seq_lseek,
4548         .release        = show_traces_release,
4549 };
4550
4551 static ssize_t
4552 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4553                      size_t count, loff_t *ppos)
4554 {
4555         struct trace_array *tr = file_inode(filp)->i_private;
4556         char *mask_str;
4557         int len;
4558
4559         len = snprintf(NULL, 0, "%*pb\n",
4560                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4561         mask_str = kmalloc(len, GFP_KERNEL);
4562         if (!mask_str)
4563                 return -ENOMEM;
4564
4565         len = snprintf(mask_str, len, "%*pb\n",
4566                        cpumask_pr_args(tr->tracing_cpumask));
4567         if (len >= count) {
4568                 count = -EINVAL;
4569                 goto out_err;
4570         }
4571         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4572
4573 out_err:
4574         kfree(mask_str);
4575
4576         return count;
4577 }
4578
4579 int tracing_set_cpumask(struct trace_array *tr,
4580                         cpumask_var_t tracing_cpumask_new)
4581 {
4582         int cpu;
4583
4584         if (!tr)
4585                 return -EINVAL;
4586
4587         local_irq_disable();
4588         arch_spin_lock(&tr->max_lock);
4589         for_each_tracing_cpu(cpu) {
4590                 /*
4591                  * Increase/decrease the disabled counter if we are
4592                  * about to flip a bit in the cpumask:
4593                  */
4594                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4595                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4596                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4597                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4598                 }
4599                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4600                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4601                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4602                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4603                 }
4604         }
4605         arch_spin_unlock(&tr->max_lock);
4606         local_irq_enable();
4607
4608         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4609
4610         return 0;
4611 }
4612
4613 static ssize_t
4614 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4615                       size_t count, loff_t *ppos)
4616 {
4617         struct trace_array *tr = file_inode(filp)->i_private;
4618         cpumask_var_t tracing_cpumask_new;
4619         int err;
4620
4621         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4622                 return -ENOMEM;
4623
4624         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4625         if (err)
4626                 goto err_free;
4627
4628         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4629         if (err)
4630                 goto err_free;
4631
4632         free_cpumask_var(tracing_cpumask_new);
4633
4634         return count;
4635
4636 err_free:
4637         free_cpumask_var(tracing_cpumask_new);
4638
4639         return err;
4640 }
4641
4642 static const struct file_operations tracing_cpumask_fops = {
4643         .open           = tracing_open_generic_tr,
4644         .read           = tracing_cpumask_read,
4645         .write          = tracing_cpumask_write,
4646         .release        = tracing_release_generic_tr,
4647         .llseek         = generic_file_llseek,
4648 };
4649
4650 static int tracing_trace_options_show(struct seq_file *m, void *v)
4651 {
4652         struct tracer_opt *trace_opts;
4653         struct trace_array *tr = m->private;
4654         u32 tracer_flags;
4655         int i;
4656
4657         mutex_lock(&trace_types_lock);
4658         tracer_flags = tr->current_trace->flags->val;
4659         trace_opts = tr->current_trace->flags->opts;
4660
4661         for (i = 0; trace_options[i]; i++) {
4662                 if (tr->trace_flags & (1 << i))
4663                         seq_printf(m, "%s\n", trace_options[i]);
4664                 else
4665                         seq_printf(m, "no%s\n", trace_options[i]);
4666         }
4667
4668         for (i = 0; trace_opts[i].name; i++) {
4669                 if (tracer_flags & trace_opts[i].bit)
4670                         seq_printf(m, "%s\n", trace_opts[i].name);
4671                 else
4672                         seq_printf(m, "no%s\n", trace_opts[i].name);
4673         }
4674         mutex_unlock(&trace_types_lock);
4675
4676         return 0;
4677 }
4678
4679 static int __set_tracer_option(struct trace_array *tr,
4680                                struct tracer_flags *tracer_flags,
4681                                struct tracer_opt *opts, int neg)
4682 {
4683         struct tracer *trace = tracer_flags->trace;
4684         int ret;
4685
4686         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4687         if (ret)
4688                 return ret;
4689
4690         if (neg)
4691                 tracer_flags->val &= ~opts->bit;
4692         else
4693                 tracer_flags->val |= opts->bit;
4694         return 0;
4695 }
4696
4697 /* Try to assign a tracer specific option */
4698 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4699 {
4700         struct tracer *trace = tr->current_trace;
4701         struct tracer_flags *tracer_flags = trace->flags;
4702         struct tracer_opt *opts = NULL;
4703         int i;
4704
4705         for (i = 0; tracer_flags->opts[i].name; i++) {
4706                 opts = &tracer_flags->opts[i];
4707
4708                 if (strcmp(cmp, opts->name) == 0)
4709                         return __set_tracer_option(tr, trace->flags, opts, neg);
4710         }
4711
4712         return -EINVAL;
4713 }
4714
4715 /* Some tracers require overwrite to stay enabled */
4716 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4717 {
4718         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4719                 return -1;
4720
4721         return 0;
4722 }
4723
4724 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4725 {
4726         if ((mask == TRACE_ITER_RECORD_TGID) ||
4727             (mask == TRACE_ITER_RECORD_CMD))
4728                 lockdep_assert_held(&event_mutex);
4729
4730         /* do nothing if flag is already set */
4731         if (!!(tr->trace_flags & mask) == !!enabled)
4732                 return 0;
4733
4734         /* Give the tracer a chance to approve the change */
4735         if (tr->current_trace->flag_changed)
4736                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4737                         return -EINVAL;
4738
4739         if (enabled)
4740                 tr->trace_flags |= mask;
4741         else
4742                 tr->trace_flags &= ~mask;
4743
4744         if (mask == TRACE_ITER_RECORD_CMD)
4745                 trace_event_enable_cmd_record(enabled);
4746
4747         if (mask == TRACE_ITER_RECORD_TGID) {
4748                 if (!tgid_map)
4749                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4750                                            sizeof(*tgid_map),
4751                                            GFP_KERNEL);
4752                 if (!tgid_map) {
4753                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4754                         return -ENOMEM;
4755                 }
4756
4757                 trace_event_enable_tgid_record(enabled);
4758         }
4759
4760         if (mask == TRACE_ITER_EVENT_FORK)
4761                 trace_event_follow_fork(tr, enabled);
4762
4763         if (mask == TRACE_ITER_FUNC_FORK)
4764                 ftrace_pid_follow_fork(tr, enabled);
4765
4766         if (mask == TRACE_ITER_OVERWRITE) {
4767                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4768 #ifdef CONFIG_TRACER_MAX_TRACE
4769                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4770 #endif
4771         }
4772
4773         if (mask == TRACE_ITER_PRINTK) {
4774                 trace_printk_start_stop_comm(enabled);
4775                 trace_printk_control(enabled);
4776         }
4777
4778         return 0;
4779 }
4780
4781 int trace_set_options(struct trace_array *tr, char *option)
4782 {
4783         char *cmp;
4784         int neg = 0;
4785         int ret;
4786         size_t orig_len = strlen(option);
4787         int len;
4788
4789         cmp = strstrip(option);
4790
4791         len = str_has_prefix(cmp, "no");
4792         if (len)
4793                 neg = 1;
4794
4795         cmp += len;
4796
4797         mutex_lock(&event_mutex);
4798         mutex_lock(&trace_types_lock);
4799
4800         ret = match_string(trace_options, -1, cmp);
4801         /* If no option could be set, test the specific tracer options */
4802         if (ret < 0)
4803                 ret = set_tracer_option(tr, cmp, neg);
4804         else
4805                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4806
4807         mutex_unlock(&trace_types_lock);
4808         mutex_unlock(&event_mutex);
4809
4810         /*
4811          * If the first trailing whitespace is replaced with '\0' by strstrip,
4812          * turn it back into a space.
4813          */
4814         if (orig_len > strlen(option))
4815                 option[strlen(option)] = ' ';
4816
4817         return ret;
4818 }
4819
4820 static void __init apply_trace_boot_options(void)
4821 {
4822         char *buf = trace_boot_options_buf;
4823         char *option;
4824
4825         while (true) {
4826                 option = strsep(&buf, ",");
4827
4828                 if (!option)
4829                         break;
4830
4831                 if (*option)
4832                         trace_set_options(&global_trace, option);
4833
4834                 /* Put back the comma to allow this to be called again */
4835                 if (buf)
4836                         *(buf - 1) = ',';
4837         }
4838 }
4839
4840 static ssize_t
4841 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4842                         size_t cnt, loff_t *ppos)
4843 {
4844         struct seq_file *m = filp->private_data;
4845         struct trace_array *tr = m->private;
4846         char buf[64];
4847         int ret;
4848
4849         if (cnt >= sizeof(buf))
4850                 return -EINVAL;
4851
4852         if (copy_from_user(buf, ubuf, cnt))
4853                 return -EFAULT;
4854
4855         buf[cnt] = 0;
4856
4857         ret = trace_set_options(tr, buf);
4858         if (ret < 0)
4859                 return ret;
4860
4861         *ppos += cnt;
4862
4863         return cnt;
4864 }
4865
4866 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4867 {
4868         struct trace_array *tr = inode->i_private;
4869         int ret;
4870
4871         ret = tracing_check_open_get_tr(tr);
4872         if (ret)
4873                 return ret;
4874
4875         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4876         if (ret < 0)
4877                 trace_array_put(tr);
4878
4879         return ret;
4880 }
4881
4882 static const struct file_operations tracing_iter_fops = {
4883         .open           = tracing_trace_options_open,
4884         .read           = seq_read,
4885         .llseek         = seq_lseek,
4886         .release        = tracing_single_release_tr,
4887         .write          = tracing_trace_options_write,
4888 };
4889
4890 static const char readme_msg[] =
4891         "tracing mini-HOWTO:\n\n"
4892         "# echo 0 > tracing_on : quick way to disable tracing\n"
4893         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4894         " Important files:\n"
4895         "  trace\t\t\t- The static contents of the buffer\n"
4896         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4897         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4898         "  current_tracer\t- function and latency tracers\n"
4899         "  available_tracers\t- list of configured tracers for current_tracer\n"
4900         "  error_log\t- error log for failed commands (that support it)\n"
4901         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4902         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4903         "  trace_clock\t\t-change the clock used to order events\n"
4904         "       local:   Per cpu clock but may not be synced across CPUs\n"
4905         "      global:   Synced across CPUs but slows tracing down.\n"
4906         "     counter:   Not a clock, but just an increment\n"
4907         "      uptime:   Jiffy counter from time of boot\n"
4908         "        perf:   Same clock that perf events use\n"
4909 #ifdef CONFIG_X86_64
4910         "     x86-tsc:   TSC cycle counter\n"
4911 #endif
4912         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4913         "       delta:   Delta difference against a buffer-wide timestamp\n"
4914         "    absolute:   Absolute (standalone) timestamp\n"
4915         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4916         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4917         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4918         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4919         "\t\t\t  Remove sub-buffer with rmdir\n"
4920         "  trace_options\t\t- Set format or modify how tracing happens\n"
4921         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4922         "\t\t\t  option name\n"
4923         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4924 #ifdef CONFIG_DYNAMIC_FTRACE
4925         "\n  available_filter_functions - list of functions that can be filtered on\n"
4926         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4927         "\t\t\t  functions\n"
4928         "\t     accepts: func_full_name or glob-matching-pattern\n"
4929         "\t     modules: Can select a group via module\n"
4930         "\t      Format: :mod:<module-name>\n"
4931         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4932         "\t    triggers: a command to perform when function is hit\n"
4933         "\t      Format: <function>:<trigger>[:count]\n"
4934         "\t     trigger: traceon, traceoff\n"
4935         "\t\t      enable_event:<system>:<event>\n"
4936         "\t\t      disable_event:<system>:<event>\n"
4937 #ifdef CONFIG_STACKTRACE
4938         "\t\t      stacktrace\n"
4939 #endif
4940 #ifdef CONFIG_TRACER_SNAPSHOT
4941         "\t\t      snapshot\n"
4942 #endif
4943         "\t\t      dump\n"
4944         "\t\t      cpudump\n"
4945         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4946         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4947         "\t     The first one will disable tracing every time do_fault is hit\n"
4948         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4949         "\t       The first time do trap is hit and it disables tracing, the\n"
4950         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4951         "\t       the counter will not decrement. It only decrements when the\n"
4952         "\t       trigger did work\n"
4953         "\t     To remove trigger without count:\n"
4954         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4955         "\t     To remove trigger with a count:\n"
4956         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4957         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4958         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4959         "\t    modules: Can select a group via module command :mod:\n"
4960         "\t    Does not accept triggers\n"
4961 #endif /* CONFIG_DYNAMIC_FTRACE */
4962 #ifdef CONFIG_FUNCTION_TRACER
4963         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4964         "\t\t    (function)\n"
4965 #endif
4966 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4967         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4968         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4969         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4970 #endif
4971 #ifdef CONFIG_TRACER_SNAPSHOT
4972         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4973         "\t\t\t  snapshot buffer. Read the contents for more\n"
4974         "\t\t\t  information\n"
4975 #endif
4976 #ifdef CONFIG_STACK_TRACER
4977         "  stack_trace\t\t- Shows the max stack trace when active\n"
4978         "  stack_max_size\t- Shows current max stack size that was traced\n"
4979         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4980         "\t\t\t  new trace)\n"
4981 #ifdef CONFIG_DYNAMIC_FTRACE
4982         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4983         "\t\t\t  traces\n"
4984 #endif
4985 #endif /* CONFIG_STACK_TRACER */
4986 #ifdef CONFIG_DYNAMIC_EVENTS
4987         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4988         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4989 #endif
4990 #ifdef CONFIG_KPROBE_EVENTS
4991         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4992         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4993 #endif
4994 #ifdef CONFIG_UPROBE_EVENTS
4995         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4996         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4997 #endif
4998 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4999         "\t  accepts: event-definitions (one definition per line)\n"
5000         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5001         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5002 #ifdef CONFIG_HIST_TRIGGERS
5003         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5004 #endif
5005         "\t           -:[<group>/]<event>\n"
5006 #ifdef CONFIG_KPROBE_EVENTS
5007         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5008   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5009 #endif
5010 #ifdef CONFIG_UPROBE_EVENTS
5011   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5012 #endif
5013         "\t     args: <name>=fetcharg[:type]\n"
5014         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5015 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5016         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5017 #else
5018         "\t           $stack<index>, $stack, $retval, $comm,\n"
5019 #endif
5020         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5021         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5022         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5023         "\t           <type>\\[<array-size>\\]\n"
5024 #ifdef CONFIG_HIST_TRIGGERS
5025         "\t    field: <stype> <name>;\n"
5026         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5027         "\t           [unsigned] char/int/long\n"
5028 #endif
5029 #endif
5030         "  events/\t\t- Directory containing all trace event subsystems:\n"
5031         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5032         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5033         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5034         "\t\t\t  events\n"
5035         "      filter\t\t- If set, only events passing filter are traced\n"
5036         "  events/<system>/<event>/\t- Directory containing control files for\n"
5037         "\t\t\t  <event>:\n"
5038         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5039         "      filter\t\t- If set, only events passing filter are traced\n"
5040         "      trigger\t\t- If set, a command to perform when event is hit\n"
5041         "\t    Format: <trigger>[:count][if <filter>]\n"
5042         "\t   trigger: traceon, traceoff\n"
5043         "\t            enable_event:<system>:<event>\n"
5044         "\t            disable_event:<system>:<event>\n"
5045 #ifdef CONFIG_HIST_TRIGGERS
5046         "\t            enable_hist:<system>:<event>\n"
5047         "\t            disable_hist:<system>:<event>\n"
5048 #endif
5049 #ifdef CONFIG_STACKTRACE
5050         "\t\t    stacktrace\n"
5051 #endif
5052 #ifdef CONFIG_TRACER_SNAPSHOT
5053         "\t\t    snapshot\n"
5054 #endif
5055 #ifdef CONFIG_HIST_TRIGGERS
5056         "\t\t    hist (see below)\n"
5057 #endif
5058         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5059         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5060         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5061         "\t                  events/block/block_unplug/trigger\n"
5062         "\t   The first disables tracing every time block_unplug is hit.\n"
5063         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5064         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5065         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5066         "\t   Like function triggers, the counter is only decremented if it\n"
5067         "\t    enabled or disabled tracing.\n"
5068         "\t   To remove a trigger without a count:\n"
5069         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5070         "\t   To remove a trigger with a count:\n"
5071         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5072         "\t   Filters can be ignored when removing a trigger.\n"
5073 #ifdef CONFIG_HIST_TRIGGERS
5074         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5075         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5076         "\t            [:values=<field1[,field2,...]>]\n"
5077         "\t            [:sort=<field1[,field2,...]>]\n"
5078         "\t            [:size=#entries]\n"
5079         "\t            [:pause][:continue][:clear]\n"
5080         "\t            [:name=histname1]\n"
5081         "\t            [:<handler>.<action>]\n"
5082         "\t            [if <filter>]\n\n"
5083         "\t    When a matching event is hit, an entry is added to a hash\n"
5084         "\t    table using the key(s) and value(s) named, and the value of a\n"
5085         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5086         "\t    correspond to fields in the event's format description.  Keys\n"
5087         "\t    can be any field, or the special string 'stacktrace'.\n"
5088         "\t    Compound keys consisting of up to two fields can be specified\n"
5089         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5090         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5091         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5092         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5093         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5094         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5095         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5096         "\t    its histogram data will be shared with other triggers of the\n"
5097         "\t    same name, and trigger hits will update this common data.\n\n"
5098         "\t    Reading the 'hist' file for the event will dump the hash\n"
5099         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5100         "\t    triggers attached to an event, there will be a table for each\n"
5101         "\t    trigger in the output.  The table displayed for a named\n"
5102         "\t    trigger will be the same as any other instance having the\n"
5103         "\t    same name.  The default format used to display a given field\n"
5104         "\t    can be modified by appending any of the following modifiers\n"
5105         "\t    to the field name, as applicable:\n\n"
5106         "\t            .hex        display a number as a hex value\n"
5107         "\t            .sym        display an address as a symbol\n"
5108         "\t            .sym-offset display an address as a symbol and offset\n"
5109         "\t            .execname   display a common_pid as a program name\n"
5110         "\t            .syscall    display a syscall id as a syscall name\n"
5111         "\t            .log2       display log2 value rather than raw number\n"
5112         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5113         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5114         "\t    trigger or to start a hist trigger but not log any events\n"
5115         "\t    until told to do so.  'continue' can be used to start or\n"
5116         "\t    restart a paused hist trigger.\n\n"
5117         "\t    The 'clear' parameter will clear the contents of a running\n"
5118         "\t    hist trigger and leave its current paused/active state\n"
5119         "\t    unchanged.\n\n"
5120         "\t    The enable_hist and disable_hist triggers can be used to\n"
5121         "\t    have one event conditionally start and stop another event's\n"
5122         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5123         "\t    the enable_event and disable_event triggers.\n\n"
5124         "\t    Hist trigger handlers and actions are executed whenever a\n"
5125         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5126         "\t        <handler>.<action>\n\n"
5127         "\t    The available handlers are:\n\n"
5128         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5129         "\t        onmax(var)               - invoke if var exceeds current max\n"
5130         "\t        onchange(var)            - invoke action if var changes\n\n"
5131         "\t    The available actions are:\n\n"
5132         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5133         "\t        save(field,...)                      - save current event fields\n"
5134 #ifdef CONFIG_TRACER_SNAPSHOT
5135         "\t        snapshot()                           - snapshot the trace buffer\n"
5136 #endif
5137 #endif
5138 ;
5139
5140 static ssize_t
5141 tracing_readme_read(struct file *filp, char __user *ubuf,
5142                        size_t cnt, loff_t *ppos)
5143 {
5144         return simple_read_from_buffer(ubuf, cnt, ppos,
5145                                         readme_msg, strlen(readme_msg));
5146 }
5147
5148 static const struct file_operations tracing_readme_fops = {
5149         .open           = tracing_open_generic,
5150         .read           = tracing_readme_read,
5151         .llseek         = generic_file_llseek,
5152 };
5153
5154 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5155 {
5156         int *ptr = v;
5157
5158         if (*pos || m->count)
5159                 ptr++;
5160
5161         (*pos)++;
5162
5163         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5164                 if (trace_find_tgid(*ptr))
5165                         return ptr;
5166         }
5167
5168         return NULL;
5169 }
5170
5171 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5172 {
5173         void *v;
5174         loff_t l = 0;
5175
5176         if (!tgid_map)
5177                 return NULL;
5178
5179         v = &tgid_map[0];
5180         while (l <= *pos) {
5181                 v = saved_tgids_next(m, v, &l);
5182                 if (!v)
5183                         return NULL;
5184         }
5185
5186         return v;
5187 }
5188
5189 static void saved_tgids_stop(struct seq_file *m, void *v)
5190 {
5191 }
5192
5193 static int saved_tgids_show(struct seq_file *m, void *v)
5194 {
5195         int pid = (int *)v - tgid_map;
5196
5197         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5198         return 0;
5199 }
5200
5201 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5202         .start          = saved_tgids_start,
5203         .stop           = saved_tgids_stop,
5204         .next           = saved_tgids_next,
5205         .show           = saved_tgids_show,
5206 };
5207
5208 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5209 {
5210         int ret;
5211
5212         ret = tracing_check_open_get_tr(NULL);
5213         if (ret)
5214                 return ret;
5215
5216         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5217 }
5218
5219
5220 static const struct file_operations tracing_saved_tgids_fops = {
5221         .open           = tracing_saved_tgids_open,
5222         .read           = seq_read,
5223         .llseek         = seq_lseek,
5224         .release        = seq_release,
5225 };
5226
5227 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5228 {
5229         unsigned int *ptr = v;
5230
5231         if (*pos || m->count)
5232                 ptr++;
5233
5234         (*pos)++;
5235
5236         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5237              ptr++) {
5238                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5239                         continue;
5240
5241                 return ptr;
5242         }
5243
5244         return NULL;
5245 }
5246
5247 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5248 {
5249         void *v;
5250         loff_t l = 0;
5251
5252         preempt_disable();
5253         arch_spin_lock(&trace_cmdline_lock);
5254
5255         v = &savedcmd->map_cmdline_to_pid[0];
5256         while (l <= *pos) {
5257                 v = saved_cmdlines_next(m, v, &l);
5258                 if (!v)
5259                         return NULL;
5260         }
5261
5262         return v;
5263 }
5264
5265 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5266 {
5267         arch_spin_unlock(&trace_cmdline_lock);
5268         preempt_enable();
5269 }
5270
5271 static int saved_cmdlines_show(struct seq_file *m, void *v)
5272 {
5273         char buf[TASK_COMM_LEN];
5274         unsigned int *pid = v;
5275
5276         __trace_find_cmdline(*pid, buf);
5277         seq_printf(m, "%d %s\n", *pid, buf);
5278         return 0;
5279 }
5280
5281 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5282         .start          = saved_cmdlines_start,
5283         .next           = saved_cmdlines_next,
5284         .stop           = saved_cmdlines_stop,
5285         .show           = saved_cmdlines_show,
5286 };
5287
5288 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5289 {
5290         int ret;
5291
5292         ret = tracing_check_open_get_tr(NULL);
5293         if (ret)
5294                 return ret;
5295
5296         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5297 }
5298
5299 static const struct file_operations tracing_saved_cmdlines_fops = {
5300         .open           = tracing_saved_cmdlines_open,
5301         .read           = seq_read,
5302         .llseek         = seq_lseek,
5303         .release        = seq_release,
5304 };
5305
5306 static ssize_t
5307 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5308                                  size_t cnt, loff_t *ppos)
5309 {
5310         char buf[64];
5311         int r;
5312
5313         arch_spin_lock(&trace_cmdline_lock);
5314         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5315         arch_spin_unlock(&trace_cmdline_lock);
5316
5317         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5318 }
5319
5320 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5321 {
5322         kfree(s->saved_cmdlines);
5323         kfree(s->map_cmdline_to_pid);
5324         kfree(s);
5325 }
5326
5327 static int tracing_resize_saved_cmdlines(unsigned int val)
5328 {
5329         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5330
5331         s = kmalloc(sizeof(*s), GFP_KERNEL);
5332         if (!s)
5333                 return -ENOMEM;
5334
5335         if (allocate_cmdlines_buffer(val, s) < 0) {
5336                 kfree(s);
5337                 return -ENOMEM;
5338         }
5339
5340         arch_spin_lock(&trace_cmdline_lock);
5341         savedcmd_temp = savedcmd;
5342         savedcmd = s;
5343         arch_spin_unlock(&trace_cmdline_lock);
5344         free_saved_cmdlines_buffer(savedcmd_temp);
5345
5346         return 0;
5347 }
5348
5349 static ssize_t
5350 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5351                                   size_t cnt, loff_t *ppos)
5352 {
5353         unsigned long val;
5354         int ret;
5355
5356         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5357         if (ret)
5358                 return ret;
5359
5360         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5361         if (!val || val > PID_MAX_DEFAULT)
5362                 return -EINVAL;
5363
5364         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5365         if (ret < 0)
5366                 return ret;
5367
5368         *ppos += cnt;
5369
5370         return cnt;
5371 }
5372
5373 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5374         .open           = tracing_open_generic,
5375         .read           = tracing_saved_cmdlines_size_read,
5376         .write          = tracing_saved_cmdlines_size_write,
5377 };
5378
5379 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5380 static union trace_eval_map_item *
5381 update_eval_map(union trace_eval_map_item *ptr)
5382 {
5383         if (!ptr->map.eval_string) {
5384                 if (ptr->tail.next) {
5385                         ptr = ptr->tail.next;
5386                         /* Set ptr to the next real item (skip head) */
5387                         ptr++;
5388                 } else
5389                         return NULL;
5390         }
5391         return ptr;
5392 }
5393
5394 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5395 {
5396         union trace_eval_map_item *ptr = v;
5397
5398         /*
5399          * Paranoid! If ptr points to end, we don't want to increment past it.
5400          * This really should never happen.
5401          */
5402         (*pos)++;
5403         ptr = update_eval_map(ptr);
5404         if (WARN_ON_ONCE(!ptr))
5405                 return NULL;
5406
5407         ptr++;
5408         ptr = update_eval_map(ptr);
5409
5410         return ptr;
5411 }
5412
5413 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5414 {
5415         union trace_eval_map_item *v;
5416         loff_t l = 0;
5417
5418         mutex_lock(&trace_eval_mutex);
5419
5420         v = trace_eval_maps;
5421         if (v)
5422                 v++;
5423
5424         while (v && l < *pos) {
5425                 v = eval_map_next(m, v, &l);
5426         }
5427
5428         return v;
5429 }
5430
5431 static void eval_map_stop(struct seq_file *m, void *v)
5432 {
5433         mutex_unlock(&trace_eval_mutex);
5434 }
5435
5436 static int eval_map_show(struct seq_file *m, void *v)
5437 {
5438         union trace_eval_map_item *ptr = v;
5439
5440         seq_printf(m, "%s %ld (%s)\n",
5441                    ptr->map.eval_string, ptr->map.eval_value,
5442                    ptr->map.system);
5443
5444         return 0;
5445 }
5446
5447 static const struct seq_operations tracing_eval_map_seq_ops = {
5448         .start          = eval_map_start,
5449         .next           = eval_map_next,
5450         .stop           = eval_map_stop,
5451         .show           = eval_map_show,
5452 };
5453
5454 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5455 {
5456         int ret;
5457
5458         ret = tracing_check_open_get_tr(NULL);
5459         if (ret)
5460                 return ret;
5461
5462         return seq_open(filp, &tracing_eval_map_seq_ops);
5463 }
5464
5465 static const struct file_operations tracing_eval_map_fops = {
5466         .open           = tracing_eval_map_open,
5467         .read           = seq_read,
5468         .llseek         = seq_lseek,
5469         .release        = seq_release,
5470 };
5471
5472 static inline union trace_eval_map_item *
5473 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5474 {
5475         /* Return tail of array given the head */
5476         return ptr + ptr->head.length + 1;
5477 }
5478
5479 static void
5480 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5481                            int len)
5482 {
5483         struct trace_eval_map **stop;
5484         struct trace_eval_map **map;
5485         union trace_eval_map_item *map_array;
5486         union trace_eval_map_item *ptr;
5487
5488         stop = start + len;
5489
5490         /*
5491          * The trace_eval_maps contains the map plus a head and tail item,
5492          * where the head holds the module and length of array, and the
5493          * tail holds a pointer to the next list.
5494          */
5495         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5496         if (!map_array) {
5497                 pr_warn("Unable to allocate trace eval mapping\n");
5498                 return;
5499         }
5500
5501         mutex_lock(&trace_eval_mutex);
5502
5503         if (!trace_eval_maps)
5504                 trace_eval_maps = map_array;
5505         else {
5506                 ptr = trace_eval_maps;
5507                 for (;;) {
5508                         ptr = trace_eval_jmp_to_tail(ptr);
5509                         if (!ptr->tail.next)
5510                                 break;
5511                         ptr = ptr->tail.next;
5512
5513                 }
5514                 ptr->tail.next = map_array;
5515         }
5516         map_array->head.mod = mod;
5517         map_array->head.length = len;
5518         map_array++;
5519
5520         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5521                 map_array->map = **map;
5522                 map_array++;
5523         }
5524         memset(map_array, 0, sizeof(*map_array));
5525
5526         mutex_unlock(&trace_eval_mutex);
5527 }
5528
5529 static void trace_create_eval_file(struct dentry *d_tracer)
5530 {
5531         trace_create_file("eval_map", 0444, d_tracer,
5532                           NULL, &tracing_eval_map_fops);
5533 }
5534
5535 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5536 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5537 static inline void trace_insert_eval_map_file(struct module *mod,
5538                               struct trace_eval_map **start, int len) { }
5539 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5540
5541 static void trace_insert_eval_map(struct module *mod,
5542                                   struct trace_eval_map **start, int len)
5543 {
5544         struct trace_eval_map **map;
5545
5546         if (len <= 0)
5547                 return;
5548
5549         map = start;
5550
5551         trace_event_eval_update(map, len);
5552
5553         trace_insert_eval_map_file(mod, start, len);
5554 }
5555
5556 static ssize_t
5557 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5558                        size_t cnt, loff_t *ppos)
5559 {
5560         struct trace_array *tr = filp->private_data;
5561         char buf[MAX_TRACER_SIZE+2];
5562         int r;
5563
5564         mutex_lock(&trace_types_lock);
5565         r = sprintf(buf, "%s\n", tr->current_trace->name);
5566         mutex_unlock(&trace_types_lock);
5567
5568         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5569 }
5570
5571 int tracer_init(struct tracer *t, struct trace_array *tr)
5572 {
5573         tracing_reset_online_cpus(&tr->array_buffer);
5574         return t->init(tr);
5575 }
5576
5577 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5578 {
5579         int cpu;
5580
5581         for_each_tracing_cpu(cpu)
5582                 per_cpu_ptr(buf->data, cpu)->entries = val;
5583 }
5584
5585 #ifdef CONFIG_TRACER_MAX_TRACE
5586 /* resize @tr's buffer to the size of @size_tr's entries */
5587 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5588                                         struct array_buffer *size_buf, int cpu_id)
5589 {
5590         int cpu, ret = 0;
5591
5592         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5593                 for_each_tracing_cpu(cpu) {
5594                         ret = ring_buffer_resize(trace_buf->buffer,
5595                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5596                         if (ret < 0)
5597                                 break;
5598                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5599                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5600                 }
5601         } else {
5602                 ret = ring_buffer_resize(trace_buf->buffer,
5603                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5604                 if (ret == 0)
5605                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5606                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5607         }
5608
5609         return ret;
5610 }
5611 #endif /* CONFIG_TRACER_MAX_TRACE */
5612
5613 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5614                                         unsigned long size, int cpu)
5615 {
5616         int ret;
5617
5618         /*
5619          * If kernel or user changes the size of the ring buffer
5620          * we use the size that was given, and we can forget about
5621          * expanding it later.
5622          */
5623         ring_buffer_expanded = true;
5624
5625         /* May be called before buffers are initialized */
5626         if (!tr->array_buffer.buffer)
5627                 return 0;
5628
5629         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5630         if (ret < 0)
5631                 return ret;
5632
5633 #ifdef CONFIG_TRACER_MAX_TRACE
5634         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5635             !tr->current_trace->use_max_tr)
5636                 goto out;
5637
5638         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5639         if (ret < 0) {
5640                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5641                                                      &tr->array_buffer, cpu);
5642                 if (r < 0) {
5643                         /*
5644                          * AARGH! We are left with different
5645                          * size max buffer!!!!
5646                          * The max buffer is our "snapshot" buffer.
5647                          * When a tracer needs a snapshot (one of the
5648                          * latency tracers), it swaps the max buffer
5649                          * with the saved snap shot. We succeeded to
5650                          * update the size of the main buffer, but failed to
5651                          * update the size of the max buffer. But when we tried
5652                          * to reset the main buffer to the original size, we
5653                          * failed there too. This is very unlikely to
5654                          * happen, but if it does, warn and kill all
5655                          * tracing.
5656                          */
5657                         WARN_ON(1);
5658                         tracing_disabled = 1;
5659                 }
5660                 return ret;
5661         }
5662
5663         if (cpu == RING_BUFFER_ALL_CPUS)
5664                 set_buffer_entries(&tr->max_buffer, size);
5665         else
5666                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5667
5668  out:
5669 #endif /* CONFIG_TRACER_MAX_TRACE */
5670
5671         if (cpu == RING_BUFFER_ALL_CPUS)
5672                 set_buffer_entries(&tr->array_buffer, size);
5673         else
5674                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5675
5676         return ret;
5677 }
5678
5679 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5680                                   unsigned long size, int cpu_id)
5681 {
5682         int ret = size;
5683
5684         mutex_lock(&trace_types_lock);
5685
5686         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5687                 /* make sure, this cpu is enabled in the mask */
5688                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5689                         ret = -EINVAL;
5690                         goto out;
5691                 }
5692         }
5693
5694         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5695         if (ret < 0)
5696                 ret = -ENOMEM;
5697
5698 out:
5699         mutex_unlock(&trace_types_lock);
5700
5701         return ret;
5702 }
5703
5704
5705 /**
5706  * tracing_update_buffers - used by tracing facility to expand ring buffers
5707  *
5708  * To save on memory when the tracing is never used on a system with it
5709  * configured in. The ring buffers are set to a minimum size. But once
5710  * a user starts to use the tracing facility, then they need to grow
5711  * to their default size.
5712  *
5713  * This function is to be called when a tracer is about to be used.
5714  */
5715 int tracing_update_buffers(void)
5716 {
5717         int ret = 0;
5718
5719         mutex_lock(&trace_types_lock);
5720         if (!ring_buffer_expanded)
5721                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5722                                                 RING_BUFFER_ALL_CPUS);
5723         mutex_unlock(&trace_types_lock);
5724
5725         return ret;
5726 }
5727
5728 struct trace_option_dentry;
5729
5730 static void
5731 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5732
5733 /*
5734  * Used to clear out the tracer before deletion of an instance.
5735  * Must have trace_types_lock held.
5736  */
5737 static void tracing_set_nop(struct trace_array *tr)
5738 {
5739         if (tr->current_trace == &nop_trace)
5740                 return;
5741         
5742         tr->current_trace->enabled--;
5743
5744         if (tr->current_trace->reset)
5745                 tr->current_trace->reset(tr);
5746
5747         tr->current_trace = &nop_trace;
5748 }
5749
5750 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5751 {
5752         /* Only enable if the directory has been created already. */
5753         if (!tr->dir)
5754                 return;
5755
5756         create_trace_option_files(tr, t);
5757 }
5758
5759 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5760 {
5761         struct tracer *t;
5762 #ifdef CONFIG_TRACER_MAX_TRACE
5763         bool had_max_tr;
5764 #endif
5765         int ret = 0;
5766
5767         mutex_lock(&trace_types_lock);
5768
5769         if (!ring_buffer_expanded) {
5770                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5771                                                 RING_BUFFER_ALL_CPUS);
5772                 if (ret < 0)
5773                         goto out;
5774                 ret = 0;
5775         }
5776
5777         for (t = trace_types; t; t = t->next) {
5778                 if (strcmp(t->name, buf) == 0)
5779                         break;
5780         }
5781         if (!t) {
5782                 ret = -EINVAL;
5783                 goto out;
5784         }
5785         if (t == tr->current_trace)
5786                 goto out;
5787
5788 #ifdef CONFIG_TRACER_SNAPSHOT
5789         if (t->use_max_tr) {
5790                 arch_spin_lock(&tr->max_lock);
5791                 if (tr->cond_snapshot)
5792                         ret = -EBUSY;
5793                 arch_spin_unlock(&tr->max_lock);
5794                 if (ret)
5795                         goto out;
5796         }
5797 #endif
5798         /* Some tracers won't work on kernel command line */
5799         if (system_state < SYSTEM_RUNNING && t->noboot) {
5800                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5801                         t->name);
5802                 goto out;
5803         }
5804
5805         /* Some tracers are only allowed for the top level buffer */
5806         if (!trace_ok_for_array(t, tr)) {
5807                 ret = -EINVAL;
5808                 goto out;
5809         }
5810
5811         /* If trace pipe files are being read, we can't change the tracer */
5812         if (tr->current_trace->ref) {
5813                 ret = -EBUSY;
5814                 goto out;
5815         }
5816
5817         trace_branch_disable();
5818
5819         tr->current_trace->enabled--;
5820
5821         if (tr->current_trace->reset)
5822                 tr->current_trace->reset(tr);
5823
5824         /* Current trace needs to be nop_trace before synchronize_rcu */
5825         tr->current_trace = &nop_trace;
5826
5827 #ifdef CONFIG_TRACER_MAX_TRACE
5828         had_max_tr = tr->allocated_snapshot;
5829
5830         if (had_max_tr && !t->use_max_tr) {
5831                 /*
5832                  * We need to make sure that the update_max_tr sees that
5833                  * current_trace changed to nop_trace to keep it from
5834                  * swapping the buffers after we resize it.
5835                  * The update_max_tr is called from interrupts disabled
5836                  * so a synchronized_sched() is sufficient.
5837                  */
5838                 synchronize_rcu();
5839                 free_snapshot(tr);
5840         }
5841 #endif
5842
5843 #ifdef CONFIG_TRACER_MAX_TRACE
5844         if (t->use_max_tr && !had_max_tr) {
5845                 ret = tracing_alloc_snapshot_instance(tr);
5846                 if (ret < 0)
5847                         goto out;
5848         }
5849 #endif
5850
5851         if (t->init) {
5852                 ret = tracer_init(t, tr);
5853                 if (ret)
5854                         goto out;
5855         }
5856
5857         tr->current_trace = t;
5858         tr->current_trace->enabled++;
5859         trace_branch_enable(tr);
5860  out:
5861         mutex_unlock(&trace_types_lock);
5862
5863         return ret;
5864 }
5865
5866 static ssize_t
5867 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5868                         size_t cnt, loff_t *ppos)
5869 {
5870         struct trace_array *tr = filp->private_data;
5871         char buf[MAX_TRACER_SIZE+1];
5872         int i;
5873         size_t ret;
5874         int err;
5875
5876         ret = cnt;
5877
5878         if (cnt > MAX_TRACER_SIZE)
5879                 cnt = MAX_TRACER_SIZE;
5880
5881         if (copy_from_user(buf, ubuf, cnt))
5882                 return -EFAULT;
5883
5884         buf[cnt] = 0;
5885
5886         /* strip ending whitespace. */
5887         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5888                 buf[i] = 0;
5889
5890         err = tracing_set_tracer(tr, buf);
5891         if (err)
5892                 return err;
5893
5894         *ppos += ret;
5895
5896         return ret;
5897 }
5898
5899 static ssize_t
5900 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5901                    size_t cnt, loff_t *ppos)
5902 {
5903         char buf[64];
5904         int r;
5905
5906         r = snprintf(buf, sizeof(buf), "%ld\n",
5907                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5908         if (r > sizeof(buf))
5909                 r = sizeof(buf);
5910         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5911 }
5912
5913 static ssize_t
5914 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5915                     size_t cnt, loff_t *ppos)
5916 {
5917         unsigned long val;
5918         int ret;
5919
5920         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5921         if (ret)
5922                 return ret;
5923
5924         *ptr = val * 1000;
5925
5926         return cnt;
5927 }
5928
5929 static ssize_t
5930 tracing_thresh_read(struct file *filp, char __user *ubuf,
5931                     size_t cnt, loff_t *ppos)
5932 {
5933         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5934 }
5935
5936 static ssize_t
5937 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5938                      size_t cnt, loff_t *ppos)
5939 {
5940         struct trace_array *tr = filp->private_data;
5941         int ret;
5942
5943         mutex_lock(&trace_types_lock);
5944         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5945         if (ret < 0)
5946                 goto out;
5947
5948         if (tr->current_trace->update_thresh) {
5949                 ret = tr->current_trace->update_thresh(tr);
5950                 if (ret < 0)
5951                         goto out;
5952         }
5953
5954         ret = cnt;
5955 out:
5956         mutex_unlock(&trace_types_lock);
5957
5958         return ret;
5959 }
5960
5961 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5962
5963 static ssize_t
5964 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5965                      size_t cnt, loff_t *ppos)
5966 {
5967         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5968 }
5969
5970 static ssize_t
5971 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5972                       size_t cnt, loff_t *ppos)
5973 {
5974         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5975 }
5976
5977 #endif
5978
5979 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5980 {
5981         struct trace_array *tr = inode->i_private;
5982         struct trace_iterator *iter;
5983         int ret;
5984
5985         ret = tracing_check_open_get_tr(tr);
5986         if (ret)
5987                 return ret;
5988
5989         mutex_lock(&trace_types_lock);
5990
5991         /* create a buffer to store the information to pass to userspace */
5992         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5993         if (!iter) {
5994                 ret = -ENOMEM;
5995                 __trace_array_put(tr);
5996                 goto out;
5997         }
5998
5999         trace_seq_init(&iter->seq);
6000         iter->trace = tr->current_trace;
6001
6002         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6003                 ret = -ENOMEM;
6004                 goto fail;
6005         }
6006
6007         /* trace pipe does not show start of buffer */
6008         cpumask_setall(iter->started);
6009
6010         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6011                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6012
6013         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6014         if (trace_clocks[tr->clock_id].in_ns)
6015                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6016
6017         iter->tr = tr;
6018         iter->array_buffer = &tr->array_buffer;
6019         iter->cpu_file = tracing_get_cpu(inode);
6020         mutex_init(&iter->mutex);
6021         filp->private_data = iter;
6022
6023         if (iter->trace->pipe_open)
6024                 iter->trace->pipe_open(iter);
6025
6026         nonseekable_open(inode, filp);
6027
6028         tr->current_trace->ref++;
6029 out:
6030         mutex_unlock(&trace_types_lock);
6031         return ret;
6032
6033 fail:
6034         kfree(iter);
6035         __trace_array_put(tr);
6036         mutex_unlock(&trace_types_lock);
6037         return ret;
6038 }
6039
6040 static int tracing_release_pipe(struct inode *inode, struct file *file)
6041 {
6042         struct trace_iterator *iter = file->private_data;
6043         struct trace_array *tr = inode->i_private;
6044
6045         mutex_lock(&trace_types_lock);
6046
6047         tr->current_trace->ref--;
6048
6049         if (iter->trace->pipe_close)
6050                 iter->trace->pipe_close(iter);
6051
6052         mutex_unlock(&trace_types_lock);
6053
6054         free_cpumask_var(iter->started);
6055         mutex_destroy(&iter->mutex);
6056         kfree(iter);
6057
6058         trace_array_put(tr);
6059
6060         return 0;
6061 }
6062
6063 static __poll_t
6064 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6065 {
6066         struct trace_array *tr = iter->tr;
6067
6068         /* Iterators are static, they should be filled or empty */
6069         if (trace_buffer_iter(iter, iter->cpu_file))
6070                 return EPOLLIN | EPOLLRDNORM;
6071
6072         if (tr->trace_flags & TRACE_ITER_BLOCK)
6073                 /*
6074                  * Always select as readable when in blocking mode
6075                  */
6076                 return EPOLLIN | EPOLLRDNORM;
6077         else
6078                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6079                                              filp, poll_table);
6080 }
6081
6082 static __poll_t
6083 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6084 {
6085         struct trace_iterator *iter = filp->private_data;
6086
6087         return trace_poll(iter, filp, poll_table);
6088 }
6089
6090 /* Must be called with iter->mutex held. */
6091 static int tracing_wait_pipe(struct file *filp)
6092 {
6093         struct trace_iterator *iter = filp->private_data;
6094         int ret;
6095
6096         while (trace_empty(iter)) {
6097
6098                 if ((filp->f_flags & O_NONBLOCK)) {
6099                         return -EAGAIN;
6100                 }
6101
6102                 /*
6103                  * We block until we read something and tracing is disabled.
6104                  * We still block if tracing is disabled, but we have never
6105                  * read anything. This allows a user to cat this file, and
6106                  * then enable tracing. But after we have read something,
6107                  * we give an EOF when tracing is again disabled.
6108                  *
6109                  * iter->pos will be 0 if we haven't read anything.
6110                  */
6111                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6112                         break;
6113
6114                 mutex_unlock(&iter->mutex);
6115
6116                 ret = wait_on_pipe(iter, 0);
6117
6118                 mutex_lock(&iter->mutex);
6119
6120                 if (ret)
6121                         return ret;
6122         }
6123
6124         return 1;
6125 }
6126
6127 /*
6128  * Consumer reader.
6129  */
6130 static ssize_t
6131 tracing_read_pipe(struct file *filp, char __user *ubuf,
6132                   size_t cnt, loff_t *ppos)
6133 {
6134         struct trace_iterator *iter = filp->private_data;
6135         ssize_t sret;
6136
6137         /*
6138          * Avoid more than one consumer on a single file descriptor
6139          * This is just a matter of traces coherency, the ring buffer itself
6140          * is protected.
6141          */
6142         mutex_lock(&iter->mutex);
6143
6144         /* return any leftover data */
6145         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6146         if (sret != -EBUSY)
6147                 goto out;
6148
6149         trace_seq_init(&iter->seq);
6150
6151         if (iter->trace->read) {
6152                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6153                 if (sret)
6154                         goto out;
6155         }
6156
6157 waitagain:
6158         sret = tracing_wait_pipe(filp);
6159         if (sret <= 0)
6160                 goto out;
6161
6162         /* stop when tracing is finished */
6163         if (trace_empty(iter)) {
6164                 sret = 0;
6165                 goto out;
6166         }
6167
6168         if (cnt >= PAGE_SIZE)
6169                 cnt = PAGE_SIZE - 1;
6170
6171         /* reset all but tr, trace, and overruns */
6172         memset(&iter->seq, 0,
6173                sizeof(struct trace_iterator) -
6174                offsetof(struct trace_iterator, seq));
6175         cpumask_clear(iter->started);
6176         trace_seq_init(&iter->seq);
6177         iter->pos = -1;
6178
6179         trace_event_read_lock();
6180         trace_access_lock(iter->cpu_file);
6181         while (trace_find_next_entry_inc(iter) != NULL) {
6182                 enum print_line_t ret;
6183                 int save_len = iter->seq.seq.len;
6184
6185                 ret = print_trace_line(iter);
6186                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6187                         /* don't print partial lines */
6188                         iter->seq.seq.len = save_len;
6189                         break;
6190                 }
6191                 if (ret != TRACE_TYPE_NO_CONSUME)
6192                         trace_consume(iter);
6193
6194                 if (trace_seq_used(&iter->seq) >= cnt)
6195                         break;
6196
6197                 /*
6198                  * Setting the full flag means we reached the trace_seq buffer
6199                  * size and we should leave by partial output condition above.
6200                  * One of the trace_seq_* functions is not used properly.
6201                  */
6202                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6203                           iter->ent->type);
6204         }
6205         trace_access_unlock(iter->cpu_file);
6206         trace_event_read_unlock();
6207
6208         /* Now copy what we have to the user */
6209         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6210         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6211                 trace_seq_init(&iter->seq);
6212
6213         /*
6214          * If there was nothing to send to user, in spite of consuming trace
6215          * entries, go back to wait for more entries.
6216          */
6217         if (sret == -EBUSY)
6218                 goto waitagain;
6219
6220 out:
6221         mutex_unlock(&iter->mutex);
6222
6223         return sret;
6224 }
6225
6226 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6227                                      unsigned int idx)
6228 {
6229         __free_page(spd->pages[idx]);
6230 }
6231
6232 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6233         .confirm                = generic_pipe_buf_confirm,
6234         .release                = generic_pipe_buf_release,
6235         .steal                  = generic_pipe_buf_steal,
6236         .get                    = generic_pipe_buf_get,
6237 };
6238
6239 static size_t
6240 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6241 {
6242         size_t count;
6243         int save_len;
6244         int ret;
6245
6246         /* Seq buffer is page-sized, exactly what we need. */
6247         for (;;) {
6248                 save_len = iter->seq.seq.len;
6249                 ret = print_trace_line(iter);
6250
6251                 if (trace_seq_has_overflowed(&iter->seq)) {
6252                         iter->seq.seq.len = save_len;
6253                         break;
6254                 }
6255
6256                 /*
6257                  * This should not be hit, because it should only
6258                  * be set if the iter->seq overflowed. But check it
6259                  * anyway to be safe.
6260                  */
6261                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6262                         iter->seq.seq.len = save_len;
6263                         break;
6264                 }
6265
6266                 count = trace_seq_used(&iter->seq) - save_len;
6267                 if (rem < count) {
6268                         rem = 0;
6269                         iter->seq.seq.len = save_len;
6270                         break;
6271                 }
6272
6273                 if (ret != TRACE_TYPE_NO_CONSUME)
6274                         trace_consume(iter);
6275                 rem -= count;
6276                 if (!trace_find_next_entry_inc(iter))   {
6277                         rem = 0;
6278                         iter->ent = NULL;
6279                         break;
6280                 }
6281         }
6282
6283         return rem;
6284 }
6285
6286 static ssize_t tracing_splice_read_pipe(struct file *filp,
6287                                         loff_t *ppos,
6288                                         struct pipe_inode_info *pipe,
6289                                         size_t len,
6290                                         unsigned int flags)
6291 {
6292         struct page *pages_def[PIPE_DEF_BUFFERS];
6293         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6294         struct trace_iterator *iter = filp->private_data;
6295         struct splice_pipe_desc spd = {
6296                 .pages          = pages_def,
6297                 .partial        = partial_def,
6298                 .nr_pages       = 0, /* This gets updated below. */
6299                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6300                 .ops            = &tracing_pipe_buf_ops,
6301                 .spd_release    = tracing_spd_release_pipe,
6302         };
6303         ssize_t ret;
6304         size_t rem;
6305         unsigned int i;
6306
6307         if (splice_grow_spd(pipe, &spd))
6308                 return -ENOMEM;
6309
6310         mutex_lock(&iter->mutex);
6311
6312         if (iter->trace->splice_read) {
6313                 ret = iter->trace->splice_read(iter, filp,
6314                                                ppos, pipe, len, flags);
6315                 if (ret)
6316                         goto out_err;
6317         }
6318
6319         ret = tracing_wait_pipe(filp);
6320         if (ret <= 0)
6321                 goto out_err;
6322
6323         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6324                 ret = -EFAULT;
6325                 goto out_err;
6326         }
6327
6328         trace_event_read_lock();
6329         trace_access_lock(iter->cpu_file);
6330
6331         /* Fill as many pages as possible. */
6332         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6333                 spd.pages[i] = alloc_page(GFP_KERNEL);
6334                 if (!spd.pages[i])
6335                         break;
6336
6337                 rem = tracing_fill_pipe_page(rem, iter);
6338
6339                 /* Copy the data into the page, so we can start over. */
6340                 ret = trace_seq_to_buffer(&iter->seq,
6341                                           page_address(spd.pages[i]),
6342                                           trace_seq_used(&iter->seq));
6343                 if (ret < 0) {
6344                         __free_page(spd.pages[i]);
6345                         break;
6346                 }
6347                 spd.partial[i].offset = 0;
6348                 spd.partial[i].len = trace_seq_used(&iter->seq);
6349
6350                 trace_seq_init(&iter->seq);
6351         }
6352
6353         trace_access_unlock(iter->cpu_file);
6354         trace_event_read_unlock();
6355         mutex_unlock(&iter->mutex);
6356
6357         spd.nr_pages = i;
6358
6359         if (i)
6360                 ret = splice_to_pipe(pipe, &spd);
6361         else
6362                 ret = 0;
6363 out:
6364         splice_shrink_spd(&spd);
6365         return ret;
6366
6367 out_err:
6368         mutex_unlock(&iter->mutex);
6369         goto out;
6370 }
6371
6372 static ssize_t
6373 tracing_entries_read(struct file *filp, char __user *ubuf,
6374                      size_t cnt, loff_t *ppos)
6375 {
6376         struct inode *inode = file_inode(filp);
6377         struct trace_array *tr = inode->i_private;
6378         int cpu = tracing_get_cpu(inode);
6379         char buf[64];
6380         int r = 0;
6381         ssize_t ret;
6382
6383         mutex_lock(&trace_types_lock);
6384
6385         if (cpu == RING_BUFFER_ALL_CPUS) {
6386                 int cpu, buf_size_same;
6387                 unsigned long size;
6388
6389                 size = 0;
6390                 buf_size_same = 1;
6391                 /* check if all cpu sizes are same */
6392                 for_each_tracing_cpu(cpu) {
6393                         /* fill in the size from first enabled cpu */
6394                         if (size == 0)
6395                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6396                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6397                                 buf_size_same = 0;
6398                                 break;
6399                         }
6400                 }
6401
6402                 if (buf_size_same) {
6403                         if (!ring_buffer_expanded)
6404                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6405                                             size >> 10,
6406                                             trace_buf_size >> 10);
6407                         else
6408                                 r = sprintf(buf, "%lu\n", size >> 10);
6409                 } else
6410                         r = sprintf(buf, "X\n");
6411         } else
6412                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6413
6414         mutex_unlock(&trace_types_lock);
6415
6416         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6417         return ret;
6418 }
6419
6420 static ssize_t
6421 tracing_entries_write(struct file *filp, const char __user *ubuf,
6422                       size_t cnt, loff_t *ppos)
6423 {
6424         struct inode *inode = file_inode(filp);
6425         struct trace_array *tr = inode->i_private;
6426         unsigned long val;
6427         int ret;
6428
6429         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6430         if (ret)
6431                 return ret;
6432
6433         /* must have at least 1 entry */
6434         if (!val)
6435                 return -EINVAL;
6436
6437         /* value is in KB */
6438         val <<= 10;
6439         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6440         if (ret < 0)
6441                 return ret;
6442
6443         *ppos += cnt;
6444
6445         return cnt;
6446 }
6447
6448 static ssize_t
6449 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6450                                 size_t cnt, loff_t *ppos)
6451 {
6452         struct trace_array *tr = filp->private_data;
6453         char buf[64];
6454         int r, cpu;
6455         unsigned long size = 0, expanded_size = 0;
6456
6457         mutex_lock(&trace_types_lock);
6458         for_each_tracing_cpu(cpu) {
6459                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6460                 if (!ring_buffer_expanded)
6461                         expanded_size += trace_buf_size >> 10;
6462         }
6463         if (ring_buffer_expanded)
6464                 r = sprintf(buf, "%lu\n", size);
6465         else
6466                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6467         mutex_unlock(&trace_types_lock);
6468
6469         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6470 }
6471
6472 static ssize_t
6473 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6474                           size_t cnt, loff_t *ppos)
6475 {
6476         /*
6477          * There is no need to read what the user has written, this function
6478          * is just to make sure that there is no error when "echo" is used
6479          */
6480
6481         *ppos += cnt;
6482
6483         return cnt;
6484 }
6485
6486 static int
6487 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6488 {
6489         struct trace_array *tr = inode->i_private;
6490
6491         /* disable tracing ? */
6492         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6493                 tracer_tracing_off(tr);
6494         /* resize the ring buffer to 0 */
6495         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6496
6497         trace_array_put(tr);
6498
6499         return 0;
6500 }
6501
6502 static ssize_t
6503 tracing_mark_write(struct file *filp, const char __user *ubuf,
6504                                         size_t cnt, loff_t *fpos)
6505 {
6506         struct trace_array *tr = filp->private_data;
6507         struct ring_buffer_event *event;
6508         enum event_trigger_type tt = ETT_NONE;
6509         struct trace_buffer *buffer;
6510         struct print_entry *entry;
6511         unsigned long irq_flags;
6512         ssize_t written;
6513         int size;
6514         int len;
6515
6516 /* Used in tracing_mark_raw_write() as well */
6517 #define FAULTED_STR "<faulted>"
6518 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6519
6520         if (tracing_disabled)
6521                 return -EINVAL;
6522
6523         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6524                 return -EINVAL;
6525
6526         if (cnt > TRACE_BUF_SIZE)
6527                 cnt = TRACE_BUF_SIZE;
6528
6529         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6530
6531         local_save_flags(irq_flags);
6532         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6533
6534         /* If less than "<faulted>", then make sure we can still add that */
6535         if (cnt < FAULTED_SIZE)
6536                 size += FAULTED_SIZE - cnt;
6537
6538         buffer = tr->array_buffer.buffer;
6539         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6540                                             irq_flags, preempt_count());
6541         if (unlikely(!event))
6542                 /* Ring buffer disabled, return as if not open for write */
6543                 return -EBADF;
6544
6545         entry = ring_buffer_event_data(event);
6546         entry->ip = _THIS_IP_;
6547
6548         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6549         if (len) {
6550                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6551                 cnt = FAULTED_SIZE;
6552                 written = -EFAULT;
6553         } else
6554                 written = cnt;
6555         len = cnt;
6556
6557         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6558                 /* do not add \n before testing triggers, but add \0 */
6559                 entry->buf[cnt] = '\0';
6560                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6561         }
6562
6563         if (entry->buf[cnt - 1] != '\n') {
6564                 entry->buf[cnt] = '\n';
6565                 entry->buf[cnt + 1] = '\0';
6566         } else
6567                 entry->buf[cnt] = '\0';
6568
6569         __buffer_unlock_commit(buffer, event);
6570
6571         if (tt)
6572                 event_triggers_post_call(tr->trace_marker_file, tt);
6573
6574         if (written > 0)
6575                 *fpos += written;
6576
6577         return written;
6578 }
6579
6580 /* Limit it for now to 3K (including tag) */
6581 #define RAW_DATA_MAX_SIZE (1024*3)
6582
6583 static ssize_t
6584 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6585                                         size_t cnt, loff_t *fpos)
6586 {
6587         struct trace_array *tr = filp->private_data;
6588         struct ring_buffer_event *event;
6589         struct trace_buffer *buffer;
6590         struct raw_data_entry *entry;
6591         unsigned long irq_flags;
6592         ssize_t written;
6593         int size;
6594         int len;
6595
6596 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6597
6598         if (tracing_disabled)
6599                 return -EINVAL;
6600
6601         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6602                 return -EINVAL;
6603
6604         /* The marker must at least have a tag id */
6605         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6606                 return -EINVAL;
6607
6608         if (cnt > TRACE_BUF_SIZE)
6609                 cnt = TRACE_BUF_SIZE;
6610
6611         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6612
6613         local_save_flags(irq_flags);
6614         size = sizeof(*entry) + cnt;
6615         if (cnt < FAULT_SIZE_ID)
6616                 size += FAULT_SIZE_ID - cnt;
6617
6618         buffer = tr->array_buffer.buffer;
6619         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6620                                             irq_flags, preempt_count());
6621         if (!event)
6622                 /* Ring buffer disabled, return as if not open for write */
6623                 return -EBADF;
6624
6625         entry = ring_buffer_event_data(event);
6626
6627         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6628         if (len) {
6629                 entry->id = -1;
6630                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6631                 written = -EFAULT;
6632         } else
6633                 written = cnt;
6634
6635         __buffer_unlock_commit(buffer, event);
6636
6637         if (written > 0)
6638                 *fpos += written;
6639
6640         return written;
6641 }
6642
6643 static int tracing_clock_show(struct seq_file *m, void *v)
6644 {
6645         struct trace_array *tr = m->private;
6646         int i;
6647
6648         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6649                 seq_printf(m,
6650                         "%s%s%s%s", i ? " " : "",
6651                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6652                         i == tr->clock_id ? "]" : "");
6653         seq_putc(m, '\n');
6654
6655         return 0;
6656 }
6657
6658 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6659 {
6660         int i;
6661
6662         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6663                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6664                         break;
6665         }
6666         if (i == ARRAY_SIZE(trace_clocks))
6667                 return -EINVAL;
6668
6669         mutex_lock(&trace_types_lock);
6670
6671         tr->clock_id = i;
6672
6673         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6674
6675         /*
6676          * New clock may not be consistent with the previous clock.
6677          * Reset the buffer so that it doesn't have incomparable timestamps.
6678          */
6679         tracing_reset_online_cpus(&tr->array_buffer);
6680
6681 #ifdef CONFIG_TRACER_MAX_TRACE
6682         if (tr->max_buffer.buffer)
6683                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6684         tracing_reset_online_cpus(&tr->max_buffer);
6685 #endif
6686
6687         mutex_unlock(&trace_types_lock);
6688
6689         return 0;
6690 }
6691
6692 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6693                                    size_t cnt, loff_t *fpos)
6694 {
6695         struct seq_file *m = filp->private_data;
6696         struct trace_array *tr = m->private;
6697         char buf[64];
6698         const char *clockstr;
6699         int ret;
6700
6701         if (cnt >= sizeof(buf))
6702                 return -EINVAL;
6703
6704         if (copy_from_user(buf, ubuf, cnt))
6705                 return -EFAULT;
6706
6707         buf[cnt] = 0;
6708
6709         clockstr = strstrip(buf);
6710
6711         ret = tracing_set_clock(tr, clockstr);
6712         if (ret)
6713                 return ret;
6714
6715         *fpos += cnt;
6716
6717         return cnt;
6718 }
6719
6720 static int tracing_clock_open(struct inode *inode, struct file *file)
6721 {
6722         struct trace_array *tr = inode->i_private;
6723         int ret;
6724
6725         ret = tracing_check_open_get_tr(tr);
6726         if (ret)
6727                 return ret;
6728
6729         ret = single_open(file, tracing_clock_show, inode->i_private);
6730         if (ret < 0)
6731                 trace_array_put(tr);
6732
6733         return ret;
6734 }
6735
6736 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6737 {
6738         struct trace_array *tr = m->private;
6739
6740         mutex_lock(&trace_types_lock);
6741
6742         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6743                 seq_puts(m, "delta [absolute]\n");
6744         else
6745                 seq_puts(m, "[delta] absolute\n");
6746
6747         mutex_unlock(&trace_types_lock);
6748
6749         return 0;
6750 }
6751
6752 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6753 {
6754         struct trace_array *tr = inode->i_private;
6755         int ret;
6756
6757         ret = tracing_check_open_get_tr(tr);
6758         if (ret)
6759                 return ret;
6760
6761         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6762         if (ret < 0)
6763                 trace_array_put(tr);
6764
6765         return ret;
6766 }
6767
6768 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6769 {
6770         int ret = 0;
6771
6772         mutex_lock(&trace_types_lock);
6773
6774         if (abs && tr->time_stamp_abs_ref++)
6775                 goto out;
6776
6777         if (!abs) {
6778                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6779                         ret = -EINVAL;
6780                         goto out;
6781                 }
6782
6783                 if (--tr->time_stamp_abs_ref)
6784                         goto out;
6785         }
6786
6787         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6788
6789 #ifdef CONFIG_TRACER_MAX_TRACE
6790         if (tr->max_buffer.buffer)
6791                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6792 #endif
6793  out:
6794         mutex_unlock(&trace_types_lock);
6795
6796         return ret;
6797 }
6798
6799 struct ftrace_buffer_info {
6800         struct trace_iterator   iter;
6801         void                    *spare;
6802         unsigned int            spare_cpu;
6803         unsigned int            read;
6804 };
6805
6806 #ifdef CONFIG_TRACER_SNAPSHOT
6807 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6808 {
6809         struct trace_array *tr = inode->i_private;
6810         struct trace_iterator *iter;
6811         struct seq_file *m;
6812         int ret;
6813
6814         ret = tracing_check_open_get_tr(tr);
6815         if (ret)
6816                 return ret;
6817
6818         if (file->f_mode & FMODE_READ) {
6819                 iter = __tracing_open(inode, file, true);
6820                 if (IS_ERR(iter))
6821                         ret = PTR_ERR(iter);
6822         } else {
6823                 /* Writes still need the seq_file to hold the private data */
6824                 ret = -ENOMEM;
6825                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6826                 if (!m)
6827                         goto out;
6828                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6829                 if (!iter) {
6830                         kfree(m);
6831                         goto out;
6832                 }
6833                 ret = 0;
6834
6835                 iter->tr = tr;
6836                 iter->array_buffer = &tr->max_buffer;
6837                 iter->cpu_file = tracing_get_cpu(inode);
6838                 m->private = iter;
6839                 file->private_data = m;
6840         }
6841 out:
6842         if (ret < 0)
6843                 trace_array_put(tr);
6844
6845         return ret;
6846 }
6847
6848 static ssize_t
6849 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6850                        loff_t *ppos)
6851 {
6852         struct seq_file *m = filp->private_data;
6853         struct trace_iterator *iter = m->private;
6854         struct trace_array *tr = iter->tr;
6855         unsigned long val;
6856         int ret;
6857
6858         ret = tracing_update_buffers();
6859         if (ret < 0)
6860                 return ret;
6861
6862         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6863         if (ret)
6864                 return ret;
6865
6866         mutex_lock(&trace_types_lock);
6867
6868         if (tr->current_trace->use_max_tr) {
6869                 ret = -EBUSY;
6870                 goto out;
6871         }
6872
6873         arch_spin_lock(&tr->max_lock);
6874         if (tr->cond_snapshot)
6875                 ret = -EBUSY;
6876         arch_spin_unlock(&tr->max_lock);
6877         if (ret)
6878                 goto out;
6879
6880         switch (val) {
6881         case 0:
6882                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6883                         ret = -EINVAL;
6884                         break;
6885                 }
6886                 if (tr->allocated_snapshot)
6887                         free_snapshot(tr);
6888                 break;
6889         case 1:
6890 /* Only allow per-cpu swap if the ring buffer supports it */
6891 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6892                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6893                         ret = -EINVAL;
6894                         break;
6895                 }
6896 #endif
6897                 if (tr->allocated_snapshot)
6898                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6899                                         &tr->array_buffer, iter->cpu_file);
6900                 else
6901                         ret = tracing_alloc_snapshot_instance(tr);
6902                 if (ret < 0)
6903                         break;
6904                 local_irq_disable();
6905                 /* Now, we're going to swap */
6906                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6907                         update_max_tr(tr, current, smp_processor_id(), NULL);
6908                 else
6909                         update_max_tr_single(tr, current, iter->cpu_file);
6910                 local_irq_enable();
6911                 break;
6912         default:
6913                 if (tr->allocated_snapshot) {
6914                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6915                                 tracing_reset_online_cpus(&tr->max_buffer);
6916                         else
6917                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6918                 }
6919                 break;
6920         }
6921
6922         if (ret >= 0) {
6923                 *ppos += cnt;
6924                 ret = cnt;
6925         }
6926 out:
6927         mutex_unlock(&trace_types_lock);
6928         return ret;
6929 }
6930
6931 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6932 {
6933         struct seq_file *m = file->private_data;
6934         int ret;
6935
6936         ret = tracing_release(inode, file);
6937
6938         if (file->f_mode & FMODE_READ)
6939                 return ret;
6940
6941         /* If write only, the seq_file is just a stub */
6942         if (m)
6943                 kfree(m->private);
6944         kfree(m);
6945
6946         return 0;
6947 }
6948
6949 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6950 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6951                                     size_t count, loff_t *ppos);
6952 static int tracing_buffers_release(struct inode *inode, struct file *file);
6953 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6954                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6955
6956 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6957 {
6958         struct ftrace_buffer_info *info;
6959         int ret;
6960
6961         /* The following checks for tracefs lockdown */
6962         ret = tracing_buffers_open(inode, filp);
6963         if (ret < 0)
6964                 return ret;
6965
6966         info = filp->private_data;
6967
6968         if (info->iter.trace->use_max_tr) {
6969                 tracing_buffers_release(inode, filp);
6970                 return -EBUSY;
6971         }
6972
6973         info->iter.snapshot = true;
6974         info->iter.array_buffer = &info->iter.tr->max_buffer;
6975
6976         return ret;
6977 }
6978
6979 #endif /* CONFIG_TRACER_SNAPSHOT */
6980
6981
6982 static const struct file_operations tracing_thresh_fops = {
6983         .open           = tracing_open_generic,
6984         .read           = tracing_thresh_read,
6985         .write          = tracing_thresh_write,
6986         .llseek         = generic_file_llseek,
6987 };
6988
6989 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6990 static const struct file_operations tracing_max_lat_fops = {
6991         .open           = tracing_open_generic,
6992         .read           = tracing_max_lat_read,
6993         .write          = tracing_max_lat_write,
6994         .llseek         = generic_file_llseek,
6995 };
6996 #endif
6997
6998 static const struct file_operations set_tracer_fops = {
6999         .open           = tracing_open_generic,
7000         .read           = tracing_set_trace_read,
7001         .write          = tracing_set_trace_write,
7002         .llseek         = generic_file_llseek,
7003 };
7004
7005 static const struct file_operations tracing_pipe_fops = {
7006         .open           = tracing_open_pipe,
7007         .poll           = tracing_poll_pipe,
7008         .read           = tracing_read_pipe,
7009         .splice_read    = tracing_splice_read_pipe,
7010         .release        = tracing_release_pipe,
7011         .llseek         = no_llseek,
7012 };
7013
7014 static const struct file_operations tracing_entries_fops = {
7015         .open           = tracing_open_generic_tr,
7016         .read           = tracing_entries_read,
7017         .write          = tracing_entries_write,
7018         .llseek         = generic_file_llseek,
7019         .release        = tracing_release_generic_tr,
7020 };
7021
7022 static const struct file_operations tracing_total_entries_fops = {
7023         .open           = tracing_open_generic_tr,
7024         .read           = tracing_total_entries_read,
7025         .llseek         = generic_file_llseek,
7026         .release        = tracing_release_generic_tr,
7027 };
7028
7029 static const struct file_operations tracing_free_buffer_fops = {
7030         .open           = tracing_open_generic_tr,
7031         .write          = tracing_free_buffer_write,
7032         .release        = tracing_free_buffer_release,
7033 };
7034
7035 static const struct file_operations tracing_mark_fops = {
7036         .open           = tracing_open_generic_tr,
7037         .write          = tracing_mark_write,
7038         .llseek         = generic_file_llseek,
7039         .release        = tracing_release_generic_tr,
7040 };
7041
7042 static const struct file_operations tracing_mark_raw_fops = {
7043         .open           = tracing_open_generic_tr,
7044         .write          = tracing_mark_raw_write,
7045         .llseek         = generic_file_llseek,
7046         .release        = tracing_release_generic_tr,
7047 };
7048
7049 static const struct file_operations trace_clock_fops = {
7050         .open           = tracing_clock_open,
7051         .read           = seq_read,
7052         .llseek         = seq_lseek,
7053         .release        = tracing_single_release_tr,
7054         .write          = tracing_clock_write,
7055 };
7056
7057 static const struct file_operations trace_time_stamp_mode_fops = {
7058         .open           = tracing_time_stamp_mode_open,
7059         .read           = seq_read,
7060         .llseek         = seq_lseek,
7061         .release        = tracing_single_release_tr,
7062 };
7063
7064 #ifdef CONFIG_TRACER_SNAPSHOT
7065 static const struct file_operations snapshot_fops = {
7066         .open           = tracing_snapshot_open,
7067         .read           = seq_read,
7068         .write          = tracing_snapshot_write,
7069         .llseek         = tracing_lseek,
7070         .release        = tracing_snapshot_release,
7071 };
7072
7073 static const struct file_operations snapshot_raw_fops = {
7074         .open           = snapshot_raw_open,
7075         .read           = tracing_buffers_read,
7076         .release        = tracing_buffers_release,
7077         .splice_read    = tracing_buffers_splice_read,
7078         .llseek         = no_llseek,
7079 };
7080
7081 #endif /* CONFIG_TRACER_SNAPSHOT */
7082
7083 #define TRACING_LOG_ERRS_MAX    8
7084 #define TRACING_LOG_LOC_MAX     128
7085
7086 #define CMD_PREFIX "  Command: "
7087
7088 struct err_info {
7089         const char      **errs; /* ptr to loc-specific array of err strings */
7090         u8              type;   /* index into errs -> specific err string */
7091         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7092         u64             ts;
7093 };
7094
7095 struct tracing_log_err {
7096         struct list_head        list;
7097         struct err_info         info;
7098         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7099         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7100 };
7101
7102 static DEFINE_MUTEX(tracing_err_log_lock);
7103
7104 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7105 {
7106         struct tracing_log_err *err;
7107
7108         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7109                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7110                 if (!err)
7111                         err = ERR_PTR(-ENOMEM);
7112                 tr->n_err_log_entries++;
7113
7114                 return err;
7115         }
7116
7117         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7118         list_del(&err->list);
7119
7120         return err;
7121 }
7122
7123 /**
7124  * err_pos - find the position of a string within a command for error careting
7125  * @cmd: The tracing command that caused the error
7126  * @str: The string to position the caret at within @cmd
7127  *
7128  * Finds the position of the first occurence of @str within @cmd.  The
7129  * return value can be passed to tracing_log_err() for caret placement
7130  * within @cmd.
7131  *
7132  * Returns the index within @cmd of the first occurence of @str or 0
7133  * if @str was not found.
7134  */
7135 unsigned int err_pos(char *cmd, const char *str)
7136 {
7137         char *found;
7138
7139         if (WARN_ON(!strlen(cmd)))
7140                 return 0;
7141
7142         found = strstr(cmd, str);
7143         if (found)
7144                 return found - cmd;
7145
7146         return 0;
7147 }
7148
7149 /**
7150  * tracing_log_err - write an error to the tracing error log
7151  * @tr: The associated trace array for the error (NULL for top level array)
7152  * @loc: A string describing where the error occurred
7153  * @cmd: The tracing command that caused the error
7154  * @errs: The array of loc-specific static error strings
7155  * @type: The index into errs[], which produces the specific static err string
7156  * @pos: The position the caret should be placed in the cmd
7157  *
7158  * Writes an error into tracing/error_log of the form:
7159  *
7160  * <loc>: error: <text>
7161  *   Command: <cmd>
7162  *              ^
7163  *
7164  * tracing/error_log is a small log file containing the last
7165  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7166  * unless there has been a tracing error, and the error log can be
7167  * cleared and have its memory freed by writing the empty string in
7168  * truncation mode to it i.e. echo > tracing/error_log.
7169  *
7170  * NOTE: the @errs array along with the @type param are used to
7171  * produce a static error string - this string is not copied and saved
7172  * when the error is logged - only a pointer to it is saved.  See
7173  * existing callers for examples of how static strings are typically
7174  * defined for use with tracing_log_err().
7175  */
7176 void tracing_log_err(struct trace_array *tr,
7177                      const char *loc, const char *cmd,
7178                      const char **errs, u8 type, u8 pos)
7179 {
7180         struct tracing_log_err *err;
7181
7182         if (!tr)
7183                 tr = &global_trace;
7184
7185         mutex_lock(&tracing_err_log_lock);
7186         err = get_tracing_log_err(tr);
7187         if (PTR_ERR(err) == -ENOMEM) {
7188                 mutex_unlock(&tracing_err_log_lock);
7189                 return;
7190         }
7191
7192         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7193         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7194
7195         err->info.errs = errs;
7196         err->info.type = type;
7197         err->info.pos = pos;
7198         err->info.ts = local_clock();
7199
7200         list_add_tail(&err->list, &tr->err_log);
7201         mutex_unlock(&tracing_err_log_lock);
7202 }
7203
7204 static void clear_tracing_err_log(struct trace_array *tr)
7205 {
7206         struct tracing_log_err *err, *next;
7207
7208         mutex_lock(&tracing_err_log_lock);
7209         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7210                 list_del(&err->list);
7211                 kfree(err);
7212         }
7213
7214         tr->n_err_log_entries = 0;
7215         mutex_unlock(&tracing_err_log_lock);
7216 }
7217
7218 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7219 {
7220         struct trace_array *tr = m->private;
7221
7222         mutex_lock(&tracing_err_log_lock);
7223
7224         return seq_list_start(&tr->err_log, *pos);
7225 }
7226
7227 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7228 {
7229         struct trace_array *tr = m->private;
7230
7231         return seq_list_next(v, &tr->err_log, pos);
7232 }
7233
7234 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7235 {
7236         mutex_unlock(&tracing_err_log_lock);
7237 }
7238
7239 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7240 {
7241         u8 i;
7242
7243         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7244                 seq_putc(m, ' ');
7245         for (i = 0; i < pos; i++)
7246                 seq_putc(m, ' ');
7247         seq_puts(m, "^\n");
7248 }
7249
7250 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7251 {
7252         struct tracing_log_err *err = v;
7253
7254         if (err) {
7255                 const char *err_text = err->info.errs[err->info.type];
7256                 u64 sec = err->info.ts;
7257                 u32 nsec;
7258
7259                 nsec = do_div(sec, NSEC_PER_SEC);
7260                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7261                            err->loc, err_text);
7262                 seq_printf(m, "%s", err->cmd);
7263                 tracing_err_log_show_pos(m, err->info.pos);
7264         }
7265
7266         return 0;
7267 }
7268
7269 static const struct seq_operations tracing_err_log_seq_ops = {
7270         .start  = tracing_err_log_seq_start,
7271         .next   = tracing_err_log_seq_next,
7272         .stop   = tracing_err_log_seq_stop,
7273         .show   = tracing_err_log_seq_show
7274 };
7275
7276 static int tracing_err_log_open(struct inode *inode, struct file *file)
7277 {
7278         struct trace_array *tr = inode->i_private;
7279         int ret = 0;
7280
7281         ret = tracing_check_open_get_tr(tr);
7282         if (ret)
7283                 return ret;
7284
7285         /* If this file was opened for write, then erase contents */
7286         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7287                 clear_tracing_err_log(tr);
7288
7289         if (file->f_mode & FMODE_READ) {
7290                 ret = seq_open(file, &tracing_err_log_seq_ops);
7291                 if (!ret) {
7292                         struct seq_file *m = file->private_data;
7293                         m->private = tr;
7294                 } else {
7295                         trace_array_put(tr);
7296                 }
7297         }
7298         return ret;
7299 }
7300
7301 static ssize_t tracing_err_log_write(struct file *file,
7302                                      const char __user *buffer,
7303                                      size_t count, loff_t *ppos)
7304 {
7305         return count;
7306 }
7307
7308 static int tracing_err_log_release(struct inode *inode, struct file *file)
7309 {
7310         struct trace_array *tr = inode->i_private;
7311
7312         trace_array_put(tr);
7313
7314         if (file->f_mode & FMODE_READ)
7315                 seq_release(inode, file);
7316
7317         return 0;
7318 }
7319
7320 static const struct file_operations tracing_err_log_fops = {
7321         .open           = tracing_err_log_open,
7322         .write          = tracing_err_log_write,
7323         .read           = seq_read,
7324         .llseek         = seq_lseek,
7325         .release        = tracing_err_log_release,
7326 };
7327
7328 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7329 {
7330         struct trace_array *tr = inode->i_private;
7331         struct ftrace_buffer_info *info;
7332         int ret;
7333
7334         ret = tracing_check_open_get_tr(tr);
7335         if (ret)
7336                 return ret;
7337
7338         info = kzalloc(sizeof(*info), GFP_KERNEL);
7339         if (!info) {
7340                 trace_array_put(tr);
7341                 return -ENOMEM;
7342         }
7343
7344         mutex_lock(&trace_types_lock);
7345
7346         info->iter.tr           = tr;
7347         info->iter.cpu_file     = tracing_get_cpu(inode);
7348         info->iter.trace        = tr->current_trace;
7349         info->iter.array_buffer = &tr->array_buffer;
7350         info->spare             = NULL;
7351         /* Force reading ring buffer for first read */
7352         info->read              = (unsigned int)-1;
7353
7354         filp->private_data = info;
7355
7356         tr->current_trace->ref++;
7357
7358         mutex_unlock(&trace_types_lock);
7359
7360         ret = nonseekable_open(inode, filp);
7361         if (ret < 0)
7362                 trace_array_put(tr);
7363
7364         return ret;
7365 }
7366
7367 static __poll_t
7368 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7369 {
7370         struct ftrace_buffer_info *info = filp->private_data;
7371         struct trace_iterator *iter = &info->iter;
7372
7373         return trace_poll(iter, filp, poll_table);
7374 }
7375
7376 static ssize_t
7377 tracing_buffers_read(struct file *filp, char __user *ubuf,
7378                      size_t count, loff_t *ppos)
7379 {
7380         struct ftrace_buffer_info *info = filp->private_data;
7381         struct trace_iterator *iter = &info->iter;
7382         ssize_t ret = 0;
7383         ssize_t size;
7384
7385         if (!count)
7386                 return 0;
7387
7388 #ifdef CONFIG_TRACER_MAX_TRACE
7389         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7390                 return -EBUSY;
7391 #endif
7392
7393         if (!info->spare) {
7394                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7395                                                           iter->cpu_file);
7396                 if (IS_ERR(info->spare)) {
7397                         ret = PTR_ERR(info->spare);
7398                         info->spare = NULL;
7399                 } else {
7400                         info->spare_cpu = iter->cpu_file;
7401                 }
7402         }
7403         if (!info->spare)
7404                 return ret;
7405
7406         /* Do we have previous read data to read? */
7407         if (info->read < PAGE_SIZE)
7408                 goto read;
7409
7410  again:
7411         trace_access_lock(iter->cpu_file);
7412         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7413                                     &info->spare,
7414                                     count,
7415                                     iter->cpu_file, 0);
7416         trace_access_unlock(iter->cpu_file);
7417
7418         if (ret < 0) {
7419                 if (trace_empty(iter)) {
7420                         if ((filp->f_flags & O_NONBLOCK))
7421                                 return -EAGAIN;
7422
7423                         ret = wait_on_pipe(iter, 0);
7424                         if (ret)
7425                                 return ret;
7426
7427                         goto again;
7428                 }
7429                 return 0;
7430         }
7431
7432         info->read = 0;
7433  read:
7434         size = PAGE_SIZE - info->read;
7435         if (size > count)
7436                 size = count;
7437
7438         ret = copy_to_user(ubuf, info->spare + info->read, size);
7439         if (ret == size)
7440                 return -EFAULT;
7441
7442         size -= ret;
7443
7444         *ppos += size;
7445         info->read += size;
7446
7447         return size;
7448 }
7449
7450 static int tracing_buffers_release(struct inode *inode, struct file *file)
7451 {
7452         struct ftrace_buffer_info *info = file->private_data;
7453         struct trace_iterator *iter = &info->iter;
7454
7455         mutex_lock(&trace_types_lock);
7456
7457         iter->tr->current_trace->ref--;
7458
7459         __trace_array_put(iter->tr);
7460
7461         if (info->spare)
7462                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7463                                            info->spare_cpu, info->spare);
7464         kfree(info);
7465
7466         mutex_unlock(&trace_types_lock);
7467
7468         return 0;
7469 }
7470
7471 struct buffer_ref {
7472         struct trace_buffer     *buffer;
7473         void                    *page;
7474         int                     cpu;
7475         refcount_t              refcount;
7476 };
7477
7478 static void buffer_ref_release(struct buffer_ref *ref)
7479 {
7480         if (!refcount_dec_and_test(&ref->refcount))
7481                 return;
7482         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7483         kfree(ref);
7484 }
7485
7486 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7487                                     struct pipe_buffer *buf)
7488 {
7489         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7490
7491         buffer_ref_release(ref);
7492         buf->private = 0;
7493 }
7494
7495 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7496                                 struct pipe_buffer *buf)
7497 {
7498         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7499
7500         if (refcount_read(&ref->refcount) > INT_MAX/2)
7501                 return false;
7502
7503         refcount_inc(&ref->refcount);
7504         return true;
7505 }
7506
7507 /* Pipe buffer operations for a buffer. */
7508 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7509         .confirm                = generic_pipe_buf_confirm,
7510         .release                = buffer_pipe_buf_release,
7511         .steal                  = generic_pipe_buf_nosteal,
7512         .get                    = buffer_pipe_buf_get,
7513 };
7514
7515 /*
7516  * Callback from splice_to_pipe(), if we need to release some pages
7517  * at the end of the spd in case we error'ed out in filling the pipe.
7518  */
7519 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7520 {
7521         struct buffer_ref *ref =
7522                 (struct buffer_ref *)spd->partial[i].private;
7523
7524         buffer_ref_release(ref);
7525         spd->partial[i].private = 0;
7526 }
7527
7528 static ssize_t
7529 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7530                             struct pipe_inode_info *pipe, size_t len,
7531                             unsigned int flags)
7532 {
7533         struct ftrace_buffer_info *info = file->private_data;
7534         struct trace_iterator *iter = &info->iter;
7535         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7536         struct page *pages_def[PIPE_DEF_BUFFERS];
7537         struct splice_pipe_desc spd = {
7538                 .pages          = pages_def,
7539                 .partial        = partial_def,
7540                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7541                 .ops            = &buffer_pipe_buf_ops,
7542                 .spd_release    = buffer_spd_release,
7543         };
7544         struct buffer_ref *ref;
7545         int entries, i;
7546         ssize_t ret = 0;
7547
7548 #ifdef CONFIG_TRACER_MAX_TRACE
7549         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7550                 return -EBUSY;
7551 #endif
7552
7553         if (*ppos & (PAGE_SIZE - 1))
7554                 return -EINVAL;
7555
7556         if (len & (PAGE_SIZE - 1)) {
7557                 if (len < PAGE_SIZE)
7558                         return -EINVAL;
7559                 len &= PAGE_MASK;
7560         }
7561
7562         if (splice_grow_spd(pipe, &spd))
7563                 return -ENOMEM;
7564
7565  again:
7566         trace_access_lock(iter->cpu_file);
7567         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7568
7569         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7570                 struct page *page;
7571                 int r;
7572
7573                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7574                 if (!ref) {
7575                         ret = -ENOMEM;
7576                         break;
7577                 }
7578
7579                 refcount_set(&ref->refcount, 1);
7580                 ref->buffer = iter->array_buffer->buffer;
7581                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7582                 if (IS_ERR(ref->page)) {
7583                         ret = PTR_ERR(ref->page);
7584                         ref->page = NULL;
7585                         kfree(ref);
7586                         break;
7587                 }
7588                 ref->cpu = iter->cpu_file;
7589
7590                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7591                                           len, iter->cpu_file, 1);
7592                 if (r < 0) {
7593                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7594                                                    ref->page);
7595                         kfree(ref);
7596                         break;
7597                 }
7598
7599                 page = virt_to_page(ref->page);
7600
7601                 spd.pages[i] = page;
7602                 spd.partial[i].len = PAGE_SIZE;
7603                 spd.partial[i].offset = 0;
7604                 spd.partial[i].private = (unsigned long)ref;
7605                 spd.nr_pages++;
7606                 *ppos += PAGE_SIZE;
7607
7608                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7609         }
7610
7611         trace_access_unlock(iter->cpu_file);
7612         spd.nr_pages = i;
7613
7614         /* did we read anything? */
7615         if (!spd.nr_pages) {
7616                 if (ret)
7617                         goto out;
7618
7619                 ret = -EAGAIN;
7620                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7621                         goto out;
7622
7623                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7624                 if (ret)
7625                         goto out;
7626
7627                 goto again;
7628         }
7629
7630         ret = splice_to_pipe(pipe, &spd);
7631 out:
7632         splice_shrink_spd(&spd);
7633
7634         return ret;
7635 }
7636
7637 static const struct file_operations tracing_buffers_fops = {
7638         .open           = tracing_buffers_open,
7639         .read           = tracing_buffers_read,
7640         .poll           = tracing_buffers_poll,
7641         .release        = tracing_buffers_release,
7642         .splice_read    = tracing_buffers_splice_read,
7643         .llseek         = no_llseek,
7644 };
7645
7646 static ssize_t
7647 tracing_stats_read(struct file *filp, char __user *ubuf,
7648                    size_t count, loff_t *ppos)
7649 {
7650         struct inode *inode = file_inode(filp);
7651         struct trace_array *tr = inode->i_private;
7652         struct array_buffer *trace_buf = &tr->array_buffer;
7653         int cpu = tracing_get_cpu(inode);
7654         struct trace_seq *s;
7655         unsigned long cnt;
7656         unsigned long long t;
7657         unsigned long usec_rem;
7658
7659         s = kmalloc(sizeof(*s), GFP_KERNEL);
7660         if (!s)
7661                 return -ENOMEM;
7662
7663         trace_seq_init(s);
7664
7665         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7666         trace_seq_printf(s, "entries: %ld\n", cnt);
7667
7668         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7669         trace_seq_printf(s, "overrun: %ld\n", cnt);
7670
7671         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7672         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7673
7674         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7675         trace_seq_printf(s, "bytes: %ld\n", cnt);
7676
7677         if (trace_clocks[tr->clock_id].in_ns) {
7678                 /* local or global for trace_clock */
7679                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7680                 usec_rem = do_div(t, USEC_PER_SEC);
7681                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7682                                                                 t, usec_rem);
7683
7684                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7685                 usec_rem = do_div(t, USEC_PER_SEC);
7686                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7687         } else {
7688                 /* counter or tsc mode for trace_clock */
7689                 trace_seq_printf(s, "oldest event ts: %llu\n",
7690                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7691
7692                 trace_seq_printf(s, "now ts: %llu\n",
7693                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7694         }
7695
7696         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7697         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7698
7699         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7700         trace_seq_printf(s, "read events: %ld\n", cnt);
7701
7702         count = simple_read_from_buffer(ubuf, count, ppos,
7703                                         s->buffer, trace_seq_used(s));
7704
7705         kfree(s);
7706
7707         return count;
7708 }
7709
7710 static const struct file_operations tracing_stats_fops = {
7711         .open           = tracing_open_generic_tr,
7712         .read           = tracing_stats_read,
7713         .llseek         = generic_file_llseek,
7714         .release        = tracing_release_generic_tr,
7715 };
7716
7717 #ifdef CONFIG_DYNAMIC_FTRACE
7718
7719 static ssize_t
7720 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7721                   size_t cnt, loff_t *ppos)
7722 {
7723         ssize_t ret;
7724         char *buf;
7725         int r;
7726
7727         /* 256 should be plenty to hold the amount needed */
7728         buf = kmalloc(256, GFP_KERNEL);
7729         if (!buf)
7730                 return -ENOMEM;
7731
7732         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7733                       ftrace_update_tot_cnt,
7734                       ftrace_number_of_pages,
7735                       ftrace_number_of_groups);
7736
7737         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7738         kfree(buf);
7739         return ret;
7740 }
7741
7742 static const struct file_operations tracing_dyn_info_fops = {
7743         .open           = tracing_open_generic,
7744         .read           = tracing_read_dyn_info,
7745         .llseek         = generic_file_llseek,
7746 };
7747 #endif /* CONFIG_DYNAMIC_FTRACE */
7748
7749 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7750 static void
7751 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7752                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7753                 void *data)
7754 {
7755         tracing_snapshot_instance(tr);
7756 }
7757
7758 static void
7759 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7760                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7761                       void *data)
7762 {
7763         struct ftrace_func_mapper *mapper = data;
7764         long *count = NULL;
7765
7766         if (mapper)
7767                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7768
7769         if (count) {
7770
7771                 if (*count <= 0)
7772                         return;
7773
7774                 (*count)--;
7775         }
7776
7777         tracing_snapshot_instance(tr);
7778 }
7779
7780 static int
7781 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7782                       struct ftrace_probe_ops *ops, void *data)
7783 {
7784         struct ftrace_func_mapper *mapper = data;
7785         long *count = NULL;
7786
7787         seq_printf(m, "%ps:", (void *)ip);
7788
7789         seq_puts(m, "snapshot");
7790
7791         if (mapper)
7792                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7793
7794         if (count)
7795                 seq_printf(m, ":count=%ld\n", *count);
7796         else
7797                 seq_puts(m, ":unlimited\n");
7798
7799         return 0;
7800 }
7801
7802 static int
7803 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7804                      unsigned long ip, void *init_data, void **data)
7805 {
7806         struct ftrace_func_mapper *mapper = *data;
7807
7808         if (!mapper) {
7809                 mapper = allocate_ftrace_func_mapper();
7810                 if (!mapper)
7811                         return -ENOMEM;
7812                 *data = mapper;
7813         }
7814
7815         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7816 }
7817
7818 static void
7819 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7820                      unsigned long ip, void *data)
7821 {
7822         struct ftrace_func_mapper *mapper = data;
7823
7824         if (!ip) {
7825                 if (!mapper)
7826                         return;
7827                 free_ftrace_func_mapper(mapper, NULL);
7828                 return;
7829         }
7830
7831         ftrace_func_mapper_remove_ip(mapper, ip);
7832 }
7833
7834 static struct ftrace_probe_ops snapshot_probe_ops = {
7835         .func                   = ftrace_snapshot,
7836         .print                  = ftrace_snapshot_print,
7837 };
7838
7839 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7840         .func                   = ftrace_count_snapshot,
7841         .print                  = ftrace_snapshot_print,
7842         .init                   = ftrace_snapshot_init,
7843         .free                   = ftrace_snapshot_free,
7844 };
7845
7846 static int
7847 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7848                                char *glob, char *cmd, char *param, int enable)
7849 {
7850         struct ftrace_probe_ops *ops;
7851         void *count = (void *)-1;
7852         char *number;
7853         int ret;
7854
7855         if (!tr)
7856                 return -ENODEV;
7857
7858         /* hash funcs only work with set_ftrace_filter */
7859         if (!enable)
7860                 return -EINVAL;
7861
7862         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7863
7864         if (glob[0] == '!')
7865                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7866
7867         if (!param)
7868                 goto out_reg;
7869
7870         number = strsep(&param, ":");
7871
7872         if (!strlen(number))
7873                 goto out_reg;
7874
7875         /*
7876          * We use the callback data field (which is a pointer)
7877          * as our counter.
7878          */
7879         ret = kstrtoul(number, 0, (unsigned long *)&count);
7880         if (ret)
7881                 return ret;
7882
7883  out_reg:
7884         ret = tracing_alloc_snapshot_instance(tr);
7885         if (ret < 0)
7886                 goto out;
7887
7888         ret = register_ftrace_function_probe(glob, tr, ops, count);
7889
7890  out:
7891         return ret < 0 ? ret : 0;
7892 }
7893
7894 static struct ftrace_func_command ftrace_snapshot_cmd = {
7895         .name                   = "snapshot",
7896         .func                   = ftrace_trace_snapshot_callback,
7897 };
7898
7899 static __init int register_snapshot_cmd(void)
7900 {
7901         return register_ftrace_command(&ftrace_snapshot_cmd);
7902 }
7903 #else
7904 static inline __init int register_snapshot_cmd(void) { return 0; }
7905 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7906
7907 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7908 {
7909         if (WARN_ON(!tr->dir))
7910                 return ERR_PTR(-ENODEV);
7911
7912         /* Top directory uses NULL as the parent */
7913         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7914                 return NULL;
7915
7916         /* All sub buffers have a descriptor */
7917         return tr->dir;
7918 }
7919
7920 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7921 {
7922         struct dentry *d_tracer;
7923
7924         if (tr->percpu_dir)
7925                 return tr->percpu_dir;
7926
7927         d_tracer = tracing_get_dentry(tr);
7928         if (IS_ERR(d_tracer))
7929                 return NULL;
7930
7931         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7932
7933         MEM_FAIL(!tr->percpu_dir,
7934                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7935
7936         return tr->percpu_dir;
7937 }
7938
7939 static struct dentry *
7940 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7941                       void *data, long cpu, const struct file_operations *fops)
7942 {
7943         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7944
7945         if (ret) /* See tracing_get_cpu() */
7946                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7947         return ret;
7948 }
7949
7950 static void
7951 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7952 {
7953         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7954         struct dentry *d_cpu;
7955         char cpu_dir[30]; /* 30 characters should be more than enough */
7956
7957         if (!d_percpu)
7958                 return;
7959
7960         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7961         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7962         if (!d_cpu) {
7963                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7964                 return;
7965         }
7966
7967         /* per cpu trace_pipe */
7968         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7969                                 tr, cpu, &tracing_pipe_fops);
7970
7971         /* per cpu trace */
7972         trace_create_cpu_file("trace", 0644, d_cpu,
7973                                 tr, cpu, &tracing_fops);
7974
7975         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7976                                 tr, cpu, &tracing_buffers_fops);
7977
7978         trace_create_cpu_file("stats", 0444, d_cpu,
7979                                 tr, cpu, &tracing_stats_fops);
7980
7981         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7982                                 tr, cpu, &tracing_entries_fops);
7983
7984 #ifdef CONFIG_TRACER_SNAPSHOT
7985         trace_create_cpu_file("snapshot", 0644, d_cpu,
7986                                 tr, cpu, &snapshot_fops);
7987
7988         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7989                                 tr, cpu, &snapshot_raw_fops);
7990 #endif
7991 }
7992
7993 #ifdef CONFIG_FTRACE_SELFTEST
7994 /* Let selftest have access to static functions in this file */
7995 #include "trace_selftest.c"
7996 #endif
7997
7998 static ssize_t
7999 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8000                         loff_t *ppos)
8001 {
8002         struct trace_option_dentry *topt = filp->private_data;
8003         char *buf;
8004
8005         if (topt->flags->val & topt->opt->bit)
8006                 buf = "1\n";
8007         else
8008                 buf = "0\n";
8009
8010         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8011 }
8012
8013 static ssize_t
8014 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8015                          loff_t *ppos)
8016 {
8017         struct trace_option_dentry *topt = filp->private_data;
8018         unsigned long val;
8019         int ret;
8020
8021         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8022         if (ret)
8023                 return ret;
8024
8025         if (val != 0 && val != 1)
8026                 return -EINVAL;
8027
8028         if (!!(topt->flags->val & topt->opt->bit) != val) {
8029                 mutex_lock(&trace_types_lock);
8030                 ret = __set_tracer_option(topt->tr, topt->flags,
8031                                           topt->opt, !val);
8032                 mutex_unlock(&trace_types_lock);
8033                 if (ret)
8034                         return ret;
8035         }
8036
8037         *ppos += cnt;
8038
8039         return cnt;
8040 }
8041
8042
8043 static const struct file_operations trace_options_fops = {
8044         .open = tracing_open_generic,
8045         .read = trace_options_read,
8046         .write = trace_options_write,
8047         .llseek = generic_file_llseek,
8048 };
8049
8050 /*
8051  * In order to pass in both the trace_array descriptor as well as the index
8052  * to the flag that the trace option file represents, the trace_array
8053  * has a character array of trace_flags_index[], which holds the index
8054  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8055  * The address of this character array is passed to the flag option file
8056  * read/write callbacks.
8057  *
8058  * In order to extract both the index and the trace_array descriptor,
8059  * get_tr_index() uses the following algorithm.
8060  *
8061  *   idx = *ptr;
8062  *
8063  * As the pointer itself contains the address of the index (remember
8064  * index[1] == 1).
8065  *
8066  * Then to get the trace_array descriptor, by subtracting that index
8067  * from the ptr, we get to the start of the index itself.
8068  *
8069  *   ptr - idx == &index[0]
8070  *
8071  * Then a simple container_of() from that pointer gets us to the
8072  * trace_array descriptor.
8073  */
8074 static void get_tr_index(void *data, struct trace_array **ptr,
8075                          unsigned int *pindex)
8076 {
8077         *pindex = *(unsigned char *)data;
8078
8079         *ptr = container_of(data - *pindex, struct trace_array,
8080                             trace_flags_index);
8081 }
8082
8083 static ssize_t
8084 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8085                         loff_t *ppos)
8086 {
8087         void *tr_index = filp->private_data;
8088         struct trace_array *tr;
8089         unsigned int index;
8090         char *buf;
8091
8092         get_tr_index(tr_index, &tr, &index);
8093
8094         if (tr->trace_flags & (1 << index))
8095                 buf = "1\n";
8096         else
8097                 buf = "0\n";
8098
8099         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8100 }
8101
8102 static ssize_t
8103 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8104                          loff_t *ppos)
8105 {
8106         void *tr_index = filp->private_data;
8107         struct trace_array *tr;
8108         unsigned int index;
8109         unsigned long val;
8110         int ret;
8111
8112         get_tr_index(tr_index, &tr, &index);
8113
8114         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8115         if (ret)
8116                 return ret;
8117
8118         if (val != 0 && val != 1)
8119                 return -EINVAL;
8120
8121         mutex_lock(&event_mutex);
8122         mutex_lock(&trace_types_lock);
8123         ret = set_tracer_flag(tr, 1 << index, val);
8124         mutex_unlock(&trace_types_lock);
8125         mutex_unlock(&event_mutex);
8126
8127         if (ret < 0)
8128                 return ret;
8129
8130         *ppos += cnt;
8131
8132         return cnt;
8133 }
8134
8135 static const struct file_operations trace_options_core_fops = {
8136         .open = tracing_open_generic,
8137         .read = trace_options_core_read,
8138         .write = trace_options_core_write,
8139         .llseek = generic_file_llseek,
8140 };
8141
8142 struct dentry *trace_create_file(const char *name,
8143                                  umode_t mode,
8144                                  struct dentry *parent,
8145                                  void *data,
8146                                  const struct file_operations *fops)
8147 {
8148         struct dentry *ret;
8149
8150         ret = tracefs_create_file(name, mode, parent, data, fops);
8151         if (!ret)
8152                 pr_warn("Could not create tracefs '%s' entry\n", name);
8153
8154         return ret;
8155 }
8156
8157
8158 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8159 {
8160         struct dentry *d_tracer;
8161
8162         if (tr->options)
8163                 return tr->options;
8164
8165         d_tracer = tracing_get_dentry(tr);
8166         if (IS_ERR(d_tracer))
8167                 return NULL;
8168
8169         tr->options = tracefs_create_dir("options", d_tracer);
8170         if (!tr->options) {
8171                 pr_warn("Could not create tracefs directory 'options'\n");
8172                 return NULL;
8173         }
8174
8175         return tr->options;
8176 }
8177
8178 static void
8179 create_trace_option_file(struct trace_array *tr,
8180                          struct trace_option_dentry *topt,
8181                          struct tracer_flags *flags,
8182                          struct tracer_opt *opt)
8183 {
8184         struct dentry *t_options;
8185
8186         t_options = trace_options_init_dentry(tr);
8187         if (!t_options)
8188                 return;
8189
8190         topt->flags = flags;
8191         topt->opt = opt;
8192         topt->tr = tr;
8193
8194         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8195                                     &trace_options_fops);
8196
8197 }
8198
8199 static void
8200 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8201 {
8202         struct trace_option_dentry *topts;
8203         struct trace_options *tr_topts;
8204         struct tracer_flags *flags;
8205         struct tracer_opt *opts;
8206         int cnt;
8207         int i;
8208
8209         if (!tracer)
8210                 return;
8211
8212         flags = tracer->flags;
8213
8214         if (!flags || !flags->opts)
8215                 return;
8216
8217         /*
8218          * If this is an instance, only create flags for tracers
8219          * the instance may have.
8220          */
8221         if (!trace_ok_for_array(tracer, tr))
8222                 return;
8223
8224         for (i = 0; i < tr->nr_topts; i++) {
8225                 /* Make sure there's no duplicate flags. */
8226                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8227                         return;
8228         }
8229
8230         opts = flags->opts;
8231
8232         for (cnt = 0; opts[cnt].name; cnt++)
8233                 ;
8234
8235         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8236         if (!topts)
8237                 return;
8238
8239         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8240                             GFP_KERNEL);
8241         if (!tr_topts) {
8242                 kfree(topts);
8243                 return;
8244         }
8245
8246         tr->topts = tr_topts;
8247         tr->topts[tr->nr_topts].tracer = tracer;
8248         tr->topts[tr->nr_topts].topts = topts;
8249         tr->nr_topts++;
8250
8251         for (cnt = 0; opts[cnt].name; cnt++) {
8252                 create_trace_option_file(tr, &topts[cnt], flags,
8253                                          &opts[cnt]);
8254                 MEM_FAIL(topts[cnt].entry == NULL,
8255                           "Failed to create trace option: %s",
8256                           opts[cnt].name);
8257         }
8258 }
8259
8260 static struct dentry *
8261 create_trace_option_core_file(struct trace_array *tr,
8262                               const char *option, long index)
8263 {
8264         struct dentry *t_options;
8265
8266         t_options = trace_options_init_dentry(tr);
8267         if (!t_options)
8268                 return NULL;
8269
8270         return trace_create_file(option, 0644, t_options,
8271                                  (void *)&tr->trace_flags_index[index],
8272                                  &trace_options_core_fops);
8273 }
8274
8275 static void create_trace_options_dir(struct trace_array *tr)
8276 {
8277         struct dentry *t_options;
8278         bool top_level = tr == &global_trace;
8279         int i;
8280
8281         t_options = trace_options_init_dentry(tr);
8282         if (!t_options)
8283                 return;
8284
8285         for (i = 0; trace_options[i]; i++) {
8286                 if (top_level ||
8287                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8288                         create_trace_option_core_file(tr, trace_options[i], i);
8289         }
8290 }
8291
8292 static ssize_t
8293 rb_simple_read(struct file *filp, char __user *ubuf,
8294                size_t cnt, loff_t *ppos)
8295 {
8296         struct trace_array *tr = filp->private_data;
8297         char buf[64];
8298         int r;
8299
8300         r = tracer_tracing_is_on(tr);
8301         r = sprintf(buf, "%d\n", r);
8302
8303         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8304 }
8305
8306 static ssize_t
8307 rb_simple_write(struct file *filp, const char __user *ubuf,
8308                 size_t cnt, loff_t *ppos)
8309 {
8310         struct trace_array *tr = filp->private_data;
8311         struct trace_buffer *buffer = tr->array_buffer.buffer;
8312         unsigned long val;
8313         int ret;
8314
8315         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8316         if (ret)
8317                 return ret;
8318
8319         if (buffer) {
8320                 mutex_lock(&trace_types_lock);
8321                 if (!!val == tracer_tracing_is_on(tr)) {
8322                         val = 0; /* do nothing */
8323                 } else if (val) {
8324                         tracer_tracing_on(tr);
8325                         if (tr->current_trace->start)
8326                                 tr->current_trace->start(tr);
8327                 } else {
8328                         tracer_tracing_off(tr);
8329                         if (tr->current_trace->stop)
8330                                 tr->current_trace->stop(tr);
8331                 }
8332                 mutex_unlock(&trace_types_lock);
8333         }
8334
8335         (*ppos)++;
8336
8337         return cnt;
8338 }
8339
8340 static const struct file_operations rb_simple_fops = {
8341         .open           = tracing_open_generic_tr,
8342         .read           = rb_simple_read,
8343         .write          = rb_simple_write,
8344         .release        = tracing_release_generic_tr,
8345         .llseek         = default_llseek,
8346 };
8347
8348 static ssize_t
8349 buffer_percent_read(struct file *filp, char __user *ubuf,
8350                     size_t cnt, loff_t *ppos)
8351 {
8352         struct trace_array *tr = filp->private_data;
8353         char buf[64];
8354         int r;
8355
8356         r = tr->buffer_percent;
8357         r = sprintf(buf, "%d\n", r);
8358
8359         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8360 }
8361
8362 static ssize_t
8363 buffer_percent_write(struct file *filp, const char __user *ubuf,
8364                      size_t cnt, loff_t *ppos)
8365 {
8366         struct trace_array *tr = filp->private_data;
8367         unsigned long val;
8368         int ret;
8369
8370         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8371         if (ret)
8372                 return ret;
8373
8374         if (val > 100)
8375                 return -EINVAL;
8376
8377         if (!val)
8378                 val = 1;
8379
8380         tr->buffer_percent = val;
8381
8382         (*ppos)++;
8383
8384         return cnt;
8385 }
8386
8387 static const struct file_operations buffer_percent_fops = {
8388         .open           = tracing_open_generic_tr,
8389         .read           = buffer_percent_read,
8390         .write          = buffer_percent_write,
8391         .release        = tracing_release_generic_tr,
8392         .llseek         = default_llseek,
8393 };
8394
8395 static struct dentry *trace_instance_dir;
8396
8397 static void
8398 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8399
8400 static int
8401 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8402 {
8403         enum ring_buffer_flags rb_flags;
8404
8405         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8406
8407         buf->tr = tr;
8408
8409         buf->buffer = ring_buffer_alloc(size, rb_flags);
8410         if (!buf->buffer)
8411                 return -ENOMEM;
8412
8413         buf->data = alloc_percpu(struct trace_array_cpu);
8414         if (!buf->data) {
8415                 ring_buffer_free(buf->buffer);
8416                 buf->buffer = NULL;
8417                 return -ENOMEM;
8418         }
8419
8420         /* Allocate the first page for all buffers */
8421         set_buffer_entries(&tr->array_buffer,
8422                            ring_buffer_size(tr->array_buffer.buffer, 0));
8423
8424         return 0;
8425 }
8426
8427 static int allocate_trace_buffers(struct trace_array *tr, int size)
8428 {
8429         int ret;
8430
8431         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8432         if (ret)
8433                 return ret;
8434
8435 #ifdef CONFIG_TRACER_MAX_TRACE
8436         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8437                                     allocate_snapshot ? size : 1);
8438         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8439                 ring_buffer_free(tr->array_buffer.buffer);
8440                 tr->array_buffer.buffer = NULL;
8441                 free_percpu(tr->array_buffer.data);
8442                 tr->array_buffer.data = NULL;
8443                 return -ENOMEM;
8444         }
8445         tr->allocated_snapshot = allocate_snapshot;
8446
8447         /*
8448          * Only the top level trace array gets its snapshot allocated
8449          * from the kernel command line.
8450          */
8451         allocate_snapshot = false;
8452 #endif
8453         return 0;
8454 }
8455
8456 static void free_trace_buffer(struct array_buffer *buf)
8457 {
8458         if (buf->buffer) {
8459                 ring_buffer_free(buf->buffer);
8460                 buf->buffer = NULL;
8461                 free_percpu(buf->data);
8462                 buf->data = NULL;
8463         }
8464 }
8465
8466 static void free_trace_buffers(struct trace_array *tr)
8467 {
8468         if (!tr)
8469                 return;
8470
8471         free_trace_buffer(&tr->array_buffer);
8472
8473 #ifdef CONFIG_TRACER_MAX_TRACE
8474         free_trace_buffer(&tr->max_buffer);
8475 #endif
8476 }
8477
8478 static void init_trace_flags_index(struct trace_array *tr)
8479 {
8480         int i;
8481
8482         /* Used by the trace options files */
8483         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8484                 tr->trace_flags_index[i] = i;
8485 }
8486
8487 static void __update_tracer_options(struct trace_array *tr)
8488 {
8489         struct tracer *t;
8490
8491         for (t = trace_types; t; t = t->next)
8492                 add_tracer_options(tr, t);
8493 }
8494
8495 static void update_tracer_options(struct trace_array *tr)
8496 {
8497         mutex_lock(&trace_types_lock);
8498         __update_tracer_options(tr);
8499         mutex_unlock(&trace_types_lock);
8500 }
8501
8502 /* Must have trace_types_lock held */
8503 struct trace_array *trace_array_find(const char *instance)
8504 {
8505         struct trace_array *tr, *found = NULL;
8506
8507         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8508                 if (tr->name && strcmp(tr->name, instance) == 0) {
8509                         found = tr;
8510                         break;
8511                 }
8512         }
8513
8514         return found;
8515 }
8516
8517 struct trace_array *trace_array_find_get(const char *instance)
8518 {
8519         struct trace_array *tr;
8520
8521         mutex_lock(&trace_types_lock);
8522         tr = trace_array_find(instance);
8523         if (tr)
8524                 tr->ref++;
8525         mutex_unlock(&trace_types_lock);
8526
8527         return tr;
8528 }
8529
8530 static struct trace_array *trace_array_create(const char *name)
8531 {
8532         struct trace_array *tr;
8533         int ret;
8534
8535         ret = -ENOMEM;
8536         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8537         if (!tr)
8538                 return ERR_PTR(ret);
8539
8540         tr->name = kstrdup(name, GFP_KERNEL);
8541         if (!tr->name)
8542                 goto out_free_tr;
8543
8544         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8545                 goto out_free_tr;
8546
8547         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8548
8549         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8550
8551         raw_spin_lock_init(&tr->start_lock);
8552
8553         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8554
8555         tr->current_trace = &nop_trace;
8556
8557         INIT_LIST_HEAD(&tr->systems);
8558         INIT_LIST_HEAD(&tr->events);
8559         INIT_LIST_HEAD(&tr->hist_vars);
8560         INIT_LIST_HEAD(&tr->err_log);
8561
8562         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8563                 goto out_free_tr;
8564
8565         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8566         if (!tr->dir)
8567                 goto out_free_tr;
8568
8569         ret = event_trace_add_tracer(tr->dir, tr);
8570         if (ret) {
8571                 tracefs_remove(tr->dir);
8572                 goto out_free_tr;
8573         }
8574
8575         ftrace_init_trace_array(tr);
8576
8577         init_tracer_tracefs(tr, tr->dir);
8578         init_trace_flags_index(tr);
8579         __update_tracer_options(tr);
8580
8581         list_add(&tr->list, &ftrace_trace_arrays);
8582
8583         tr->ref++;
8584
8585
8586         return tr;
8587
8588  out_free_tr:
8589         free_trace_buffers(tr);
8590         free_cpumask_var(tr->tracing_cpumask);
8591         kfree(tr->name);
8592         kfree(tr);
8593
8594         return ERR_PTR(ret);
8595 }
8596
8597 static int instance_mkdir(const char *name)
8598 {
8599         struct trace_array *tr;
8600         int ret;
8601
8602         mutex_lock(&event_mutex);
8603         mutex_lock(&trace_types_lock);
8604
8605         ret = -EEXIST;
8606         if (trace_array_find(name))
8607                 goto out_unlock;
8608
8609         tr = trace_array_create(name);
8610
8611         ret = PTR_ERR_OR_ZERO(tr);
8612
8613 out_unlock:
8614         mutex_unlock(&trace_types_lock);
8615         mutex_unlock(&event_mutex);
8616         return ret;
8617 }
8618
8619 /**
8620  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8621  * @name: The name of the trace array to be looked up/created.
8622  *
8623  * Returns pointer to trace array with given name.
8624  * NULL, if it cannot be created.
8625  *
8626  * NOTE: This function increments the reference counter associated with the
8627  * trace array returned. This makes sure it cannot be freed while in use.
8628  * Use trace_array_put() once the trace array is no longer needed.
8629  * If the trace_array is to be freed, trace_array_destroy() needs to
8630  * be called after the trace_array_put(), or simply let user space delete
8631  * it from the tracefs instances directory. But until the
8632  * trace_array_put() is called, user space can not delete it.
8633  *
8634  */
8635 struct trace_array *trace_array_get_by_name(const char *name)
8636 {
8637         struct trace_array *tr;
8638
8639         mutex_lock(&event_mutex);
8640         mutex_lock(&trace_types_lock);
8641
8642         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8643                 if (tr->name && strcmp(tr->name, name) == 0)
8644                         goto out_unlock;
8645         }
8646
8647         tr = trace_array_create(name);
8648
8649         if (IS_ERR(tr))
8650                 tr = NULL;
8651 out_unlock:
8652         if (tr)
8653                 tr->ref++;
8654
8655         mutex_unlock(&trace_types_lock);
8656         mutex_unlock(&event_mutex);
8657         return tr;
8658 }
8659 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8660
8661 static int __remove_instance(struct trace_array *tr)
8662 {
8663         int i;
8664
8665         /* Reference counter for a newly created trace array = 1. */
8666         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8667                 return -EBUSY;
8668
8669         list_del(&tr->list);
8670
8671         /* Disable all the flags that were enabled coming in */
8672         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8673                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8674                         set_tracer_flag(tr, 1 << i, 0);
8675         }
8676
8677         tracing_set_nop(tr);
8678         clear_ftrace_function_probes(tr);
8679         event_trace_del_tracer(tr);
8680         ftrace_clear_pids(tr);
8681         ftrace_destroy_function_files(tr);
8682         tracefs_remove(tr->dir);
8683         free_trace_buffers(tr);
8684
8685         for (i = 0; i < tr->nr_topts; i++) {
8686                 kfree(tr->topts[i].topts);
8687         }
8688         kfree(tr->topts);
8689
8690         free_cpumask_var(tr->tracing_cpumask);
8691         kfree(tr->name);
8692         kfree(tr);
8693         tr = NULL;
8694
8695         return 0;
8696 }
8697
8698 int trace_array_destroy(struct trace_array *this_tr)
8699 {
8700         struct trace_array *tr;
8701         int ret;
8702
8703         if (!this_tr)
8704                 return -EINVAL;
8705
8706         mutex_lock(&event_mutex);
8707         mutex_lock(&trace_types_lock);
8708
8709         ret = -ENODEV;
8710
8711         /* Making sure trace array exists before destroying it. */
8712         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8713                 if (tr == this_tr) {
8714                         ret = __remove_instance(tr);
8715                         break;
8716                 }
8717         }
8718
8719         mutex_unlock(&trace_types_lock);
8720         mutex_unlock(&event_mutex);
8721
8722         return ret;
8723 }
8724 EXPORT_SYMBOL_GPL(trace_array_destroy);
8725
8726 static int instance_rmdir(const char *name)
8727 {
8728         struct trace_array *tr;
8729         int ret;
8730
8731         mutex_lock(&event_mutex);
8732         mutex_lock(&trace_types_lock);
8733
8734         ret = -ENODEV;
8735         tr = trace_array_find(name);
8736         if (tr)
8737                 ret = __remove_instance(tr);
8738
8739         mutex_unlock(&trace_types_lock);
8740         mutex_unlock(&event_mutex);
8741
8742         return ret;
8743 }
8744
8745 static __init void create_trace_instances(struct dentry *d_tracer)
8746 {
8747         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8748                                                          instance_mkdir,
8749                                                          instance_rmdir);
8750         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8751                 return;
8752 }
8753
8754 static void
8755 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8756 {
8757         struct trace_event_file *file;
8758         int cpu;
8759
8760         trace_create_file("available_tracers", 0444, d_tracer,
8761                         tr, &show_traces_fops);
8762
8763         trace_create_file("current_tracer", 0644, d_tracer,
8764                         tr, &set_tracer_fops);
8765
8766         trace_create_file("tracing_cpumask", 0644, d_tracer,
8767                           tr, &tracing_cpumask_fops);
8768
8769         trace_create_file("trace_options", 0644, d_tracer,
8770                           tr, &tracing_iter_fops);
8771
8772         trace_create_file("trace", 0644, d_tracer,
8773                           tr, &tracing_fops);
8774
8775         trace_create_file("trace_pipe", 0444, d_tracer,
8776                           tr, &tracing_pipe_fops);
8777
8778         trace_create_file("buffer_size_kb", 0644, d_tracer,
8779                           tr, &tracing_entries_fops);
8780
8781         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8782                           tr, &tracing_total_entries_fops);
8783
8784         trace_create_file("free_buffer", 0200, d_tracer,
8785                           tr, &tracing_free_buffer_fops);
8786
8787         trace_create_file("trace_marker", 0220, d_tracer,
8788                           tr, &tracing_mark_fops);
8789
8790         file = __find_event_file(tr, "ftrace", "print");
8791         if (file && file->dir)
8792                 trace_create_file("trigger", 0644, file->dir, file,
8793                                   &event_trigger_fops);
8794         tr->trace_marker_file = file;
8795
8796         trace_create_file("trace_marker_raw", 0220, d_tracer,
8797                           tr, &tracing_mark_raw_fops);
8798
8799         trace_create_file("trace_clock", 0644, d_tracer, tr,
8800                           &trace_clock_fops);
8801
8802         trace_create_file("tracing_on", 0644, d_tracer,
8803                           tr, &rb_simple_fops);
8804
8805         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8806                           &trace_time_stamp_mode_fops);
8807
8808         tr->buffer_percent = 50;
8809
8810         trace_create_file("buffer_percent", 0444, d_tracer,
8811                         tr, &buffer_percent_fops);
8812
8813         create_trace_options_dir(tr);
8814
8815 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8816         trace_create_maxlat_file(tr, d_tracer);
8817 #endif
8818
8819         if (ftrace_create_function_files(tr, d_tracer))
8820                 MEM_FAIL(1, "Could not allocate function filter files");
8821
8822 #ifdef CONFIG_TRACER_SNAPSHOT
8823         trace_create_file("snapshot", 0644, d_tracer,
8824                           tr, &snapshot_fops);
8825 #endif
8826
8827         trace_create_file("error_log", 0644, d_tracer,
8828                           tr, &tracing_err_log_fops);
8829
8830         for_each_tracing_cpu(cpu)
8831                 tracing_init_tracefs_percpu(tr, cpu);
8832
8833         ftrace_init_tracefs(tr, d_tracer);
8834 }
8835
8836 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8837 {
8838         struct vfsmount *mnt;
8839         struct file_system_type *type;
8840
8841         /*
8842          * To maintain backward compatibility for tools that mount
8843          * debugfs to get to the tracing facility, tracefs is automatically
8844          * mounted to the debugfs/tracing directory.
8845          */
8846         type = get_fs_type("tracefs");
8847         if (!type)
8848                 return NULL;
8849         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8850         put_filesystem(type);
8851         if (IS_ERR(mnt))
8852                 return NULL;
8853         mntget(mnt);
8854
8855         return mnt;
8856 }
8857
8858 /**
8859  * tracing_init_dentry - initialize top level trace array
8860  *
8861  * This is called when creating files or directories in the tracing
8862  * directory. It is called via fs_initcall() by any of the boot up code
8863  * and expects to return the dentry of the top level tracing directory.
8864  */
8865 struct dentry *tracing_init_dentry(void)
8866 {
8867         struct trace_array *tr = &global_trace;
8868
8869         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8870                 pr_warn("Tracing disabled due to lockdown\n");
8871                 return ERR_PTR(-EPERM);
8872         }
8873
8874         /* The top level trace array uses  NULL as parent */
8875         if (tr->dir)
8876                 return NULL;
8877
8878         if (WARN_ON(!tracefs_initialized()) ||
8879                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8880                  WARN_ON(!debugfs_initialized())))
8881                 return ERR_PTR(-ENODEV);
8882
8883         /*
8884          * As there may still be users that expect the tracing
8885          * files to exist in debugfs/tracing, we must automount
8886          * the tracefs file system there, so older tools still
8887          * work with the newer kerenl.
8888          */
8889         tr->dir = debugfs_create_automount("tracing", NULL,
8890                                            trace_automount, NULL);
8891
8892         return NULL;
8893 }
8894
8895 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8896 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8897
8898 static void __init trace_eval_init(void)
8899 {
8900         int len;
8901
8902         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8903         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8904 }
8905
8906 #ifdef CONFIG_MODULES
8907 static void trace_module_add_evals(struct module *mod)
8908 {
8909         if (!mod->num_trace_evals)
8910                 return;
8911
8912         /*
8913          * Modules with bad taint do not have events created, do
8914          * not bother with enums either.
8915          */
8916         if (trace_module_has_bad_taint(mod))
8917                 return;
8918
8919         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8920 }
8921
8922 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8923 static void trace_module_remove_evals(struct module *mod)
8924 {
8925         union trace_eval_map_item *map;
8926         union trace_eval_map_item **last = &trace_eval_maps;
8927
8928         if (!mod->num_trace_evals)
8929                 return;
8930
8931         mutex_lock(&trace_eval_mutex);
8932
8933         map = trace_eval_maps;
8934
8935         while (map) {
8936                 if (map->head.mod == mod)
8937                         break;
8938                 map = trace_eval_jmp_to_tail(map);
8939                 last = &map->tail.next;
8940                 map = map->tail.next;
8941         }
8942         if (!map)
8943                 goto out;
8944
8945         *last = trace_eval_jmp_to_tail(map)->tail.next;
8946         kfree(map);
8947  out:
8948         mutex_unlock(&trace_eval_mutex);
8949 }
8950 #else
8951 static inline void trace_module_remove_evals(struct module *mod) { }
8952 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8953
8954 static int trace_module_notify(struct notifier_block *self,
8955                                unsigned long val, void *data)
8956 {
8957         struct module *mod = data;
8958
8959         switch (val) {
8960         case MODULE_STATE_COMING:
8961                 trace_module_add_evals(mod);
8962                 break;
8963         case MODULE_STATE_GOING:
8964                 trace_module_remove_evals(mod);
8965                 break;
8966         }
8967
8968         return 0;
8969 }
8970
8971 static struct notifier_block trace_module_nb = {
8972         .notifier_call = trace_module_notify,
8973         .priority = 0,
8974 };
8975 #endif /* CONFIG_MODULES */
8976
8977 static __init int tracer_init_tracefs(void)
8978 {
8979         struct dentry *d_tracer;
8980
8981         trace_access_lock_init();
8982
8983         d_tracer = tracing_init_dentry();
8984         if (IS_ERR(d_tracer))
8985                 return 0;
8986
8987         event_trace_init();
8988
8989         init_tracer_tracefs(&global_trace, d_tracer);
8990         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8991
8992         trace_create_file("tracing_thresh", 0644, d_tracer,
8993                         &global_trace, &tracing_thresh_fops);
8994
8995         trace_create_file("README", 0444, d_tracer,
8996                         NULL, &tracing_readme_fops);
8997
8998         trace_create_file("saved_cmdlines", 0444, d_tracer,
8999                         NULL, &tracing_saved_cmdlines_fops);
9000
9001         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9002                           NULL, &tracing_saved_cmdlines_size_fops);
9003
9004         trace_create_file("saved_tgids", 0444, d_tracer,
9005                         NULL, &tracing_saved_tgids_fops);
9006
9007         trace_eval_init();
9008
9009         trace_create_eval_file(d_tracer);
9010
9011 #ifdef CONFIG_MODULES
9012         register_module_notifier(&trace_module_nb);
9013 #endif
9014
9015 #ifdef CONFIG_DYNAMIC_FTRACE
9016         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9017                         NULL, &tracing_dyn_info_fops);
9018 #endif
9019
9020         create_trace_instances(d_tracer);
9021
9022         update_tracer_options(&global_trace);
9023
9024         return 0;
9025 }
9026
9027 static int trace_panic_handler(struct notifier_block *this,
9028                                unsigned long event, void *unused)
9029 {
9030         if (ftrace_dump_on_oops)
9031                 ftrace_dump(ftrace_dump_on_oops);
9032         return NOTIFY_OK;
9033 }
9034
9035 static struct notifier_block trace_panic_notifier = {
9036         .notifier_call  = trace_panic_handler,
9037         .next           = NULL,
9038         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9039 };
9040
9041 static int trace_die_handler(struct notifier_block *self,
9042                              unsigned long val,
9043                              void *data)
9044 {
9045         switch (val) {
9046         case DIE_OOPS:
9047                 if (ftrace_dump_on_oops)
9048                         ftrace_dump(ftrace_dump_on_oops);
9049                 break;
9050         default:
9051                 break;
9052         }
9053         return NOTIFY_OK;
9054 }
9055
9056 static struct notifier_block trace_die_notifier = {
9057         .notifier_call = trace_die_handler,
9058         .priority = 200
9059 };
9060
9061 /*
9062  * printk is set to max of 1024, we really don't need it that big.
9063  * Nothing should be printing 1000 characters anyway.
9064  */
9065 #define TRACE_MAX_PRINT         1000
9066
9067 /*
9068  * Define here KERN_TRACE so that we have one place to modify
9069  * it if we decide to change what log level the ftrace dump
9070  * should be at.
9071  */
9072 #define KERN_TRACE              KERN_EMERG
9073
9074 void
9075 trace_printk_seq(struct trace_seq *s)
9076 {
9077         /* Probably should print a warning here. */
9078         if (s->seq.len >= TRACE_MAX_PRINT)
9079                 s->seq.len = TRACE_MAX_PRINT;
9080
9081         /*
9082          * More paranoid code. Although the buffer size is set to
9083          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9084          * an extra layer of protection.
9085          */
9086         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9087                 s->seq.len = s->seq.size - 1;
9088
9089         /* should be zero ended, but we are paranoid. */
9090         s->buffer[s->seq.len] = 0;
9091
9092         printk(KERN_TRACE "%s", s->buffer);
9093
9094         trace_seq_init(s);
9095 }
9096
9097 void trace_init_global_iter(struct trace_iterator *iter)
9098 {
9099         iter->tr = &global_trace;
9100         iter->trace = iter->tr->current_trace;
9101         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9102         iter->array_buffer = &global_trace.array_buffer;
9103
9104         if (iter->trace && iter->trace->open)
9105                 iter->trace->open(iter);
9106
9107         /* Annotate start of buffers if we had overruns */
9108         if (ring_buffer_overruns(iter->array_buffer->buffer))
9109                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9110
9111         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9112         if (trace_clocks[iter->tr->clock_id].in_ns)
9113                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9114 }
9115
9116 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9117 {
9118         /* use static because iter can be a bit big for the stack */
9119         static struct trace_iterator iter;
9120         static atomic_t dump_running;
9121         struct trace_array *tr = &global_trace;
9122         unsigned int old_userobj;
9123         unsigned long flags;
9124         int cnt = 0, cpu;
9125
9126         /* Only allow one dump user at a time. */
9127         if (atomic_inc_return(&dump_running) != 1) {
9128                 atomic_dec(&dump_running);
9129                 return;
9130         }
9131
9132         /*
9133          * Always turn off tracing when we dump.
9134          * We don't need to show trace output of what happens
9135          * between multiple crashes.
9136          *
9137          * If the user does a sysrq-z, then they can re-enable
9138          * tracing with echo 1 > tracing_on.
9139          */
9140         tracing_off();
9141
9142         local_irq_save(flags);
9143         printk_nmi_direct_enter();
9144
9145         /* Simulate the iterator */
9146         trace_init_global_iter(&iter);
9147
9148         for_each_tracing_cpu(cpu) {
9149                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9150         }
9151
9152         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9153
9154         /* don't look at user memory in panic mode */
9155         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9156
9157         switch (oops_dump_mode) {
9158         case DUMP_ALL:
9159                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9160                 break;
9161         case DUMP_ORIG:
9162                 iter.cpu_file = raw_smp_processor_id();
9163                 break;
9164         case DUMP_NONE:
9165                 goto out_enable;
9166         default:
9167                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9168                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9169         }
9170
9171         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9172
9173         /* Did function tracer already get disabled? */
9174         if (ftrace_is_dead()) {
9175                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9176                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9177         }
9178
9179         /*
9180          * We need to stop all tracing on all CPUS to read the
9181          * the next buffer. This is a bit expensive, but is
9182          * not done often. We fill all what we can read,
9183          * and then release the locks again.
9184          */
9185
9186         while (!trace_empty(&iter)) {
9187
9188                 if (!cnt)
9189                         printk(KERN_TRACE "---------------------------------\n");
9190
9191                 cnt++;
9192
9193                 trace_iterator_reset(&iter);
9194                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9195
9196                 if (trace_find_next_entry_inc(&iter) != NULL) {
9197                         int ret;
9198
9199                         ret = print_trace_line(&iter);
9200                         if (ret != TRACE_TYPE_NO_CONSUME)
9201                                 trace_consume(&iter);
9202                 }
9203                 touch_nmi_watchdog();
9204
9205                 trace_printk_seq(&iter.seq);
9206         }
9207
9208         if (!cnt)
9209                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9210         else
9211                 printk(KERN_TRACE "---------------------------------\n");
9212
9213  out_enable:
9214         tr->trace_flags |= old_userobj;
9215
9216         for_each_tracing_cpu(cpu) {
9217                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9218         }
9219         atomic_dec(&dump_running);
9220         printk_nmi_direct_exit();
9221         local_irq_restore(flags);
9222 }
9223 EXPORT_SYMBOL_GPL(ftrace_dump);
9224
9225 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9226 {
9227         char **argv;
9228         int argc, ret;
9229
9230         argc = 0;
9231         ret = 0;
9232         argv = argv_split(GFP_KERNEL, buf, &argc);
9233         if (!argv)
9234                 return -ENOMEM;
9235
9236         if (argc)
9237                 ret = createfn(argc, argv);
9238
9239         argv_free(argv);
9240
9241         return ret;
9242 }
9243
9244 #define WRITE_BUFSIZE  4096
9245
9246 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9247                                 size_t count, loff_t *ppos,
9248                                 int (*createfn)(int, char **))
9249 {
9250         char *kbuf, *buf, *tmp;
9251         int ret = 0;
9252         size_t done = 0;
9253         size_t size;
9254
9255         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9256         if (!kbuf)
9257                 return -ENOMEM;
9258
9259         while (done < count) {
9260                 size = count - done;
9261
9262                 if (size >= WRITE_BUFSIZE)
9263                         size = WRITE_BUFSIZE - 1;
9264
9265                 if (copy_from_user(kbuf, buffer + done, size)) {
9266                         ret = -EFAULT;
9267                         goto out;
9268                 }
9269                 kbuf[size] = '\0';
9270                 buf = kbuf;
9271                 do {
9272                         tmp = strchr(buf, '\n');
9273                         if (tmp) {
9274                                 *tmp = '\0';
9275                                 size = tmp - buf + 1;
9276                         } else {
9277                                 size = strlen(buf);
9278                                 if (done + size < count) {
9279                                         if (buf != kbuf)
9280                                                 break;
9281                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9282                                         pr_warn("Line length is too long: Should be less than %d\n",
9283                                                 WRITE_BUFSIZE - 2);
9284                                         ret = -EINVAL;
9285                                         goto out;
9286                                 }
9287                         }
9288                         done += size;
9289
9290                         /* Remove comments */
9291                         tmp = strchr(buf, '#');
9292
9293                         if (tmp)
9294                                 *tmp = '\0';
9295
9296                         ret = trace_run_command(buf, createfn);
9297                         if (ret)
9298                                 goto out;
9299                         buf += size;
9300
9301                 } while (done < count);
9302         }
9303         ret = done;
9304
9305 out:
9306         kfree(kbuf);
9307
9308         return ret;
9309 }
9310
9311 __init static int tracer_alloc_buffers(void)
9312 {
9313         int ring_buf_size;
9314         int ret = -ENOMEM;
9315
9316
9317         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9318                 pr_warn("Tracing disabled due to lockdown\n");
9319                 return -EPERM;
9320         }
9321
9322         /*
9323          * Make sure we don't accidently add more trace options
9324          * than we have bits for.
9325          */
9326         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9327
9328         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9329                 goto out;
9330
9331         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9332                 goto out_free_buffer_mask;
9333
9334         /* Only allocate trace_printk buffers if a trace_printk exists */
9335         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9336                 /* Must be called before global_trace.buffer is allocated */
9337                 trace_printk_init_buffers();
9338
9339         /* To save memory, keep the ring buffer size to its minimum */
9340         if (ring_buffer_expanded)
9341                 ring_buf_size = trace_buf_size;
9342         else
9343                 ring_buf_size = 1;
9344
9345         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9346         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9347
9348         raw_spin_lock_init(&global_trace.start_lock);
9349
9350         /*
9351          * The prepare callbacks allocates some memory for the ring buffer. We
9352          * don't free the buffer if the if the CPU goes down. If we were to free
9353          * the buffer, then the user would lose any trace that was in the
9354          * buffer. The memory will be removed once the "instance" is removed.
9355          */
9356         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9357                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9358                                       NULL);
9359         if (ret < 0)
9360                 goto out_free_cpumask;
9361         /* Used for event triggers */
9362         ret = -ENOMEM;
9363         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9364         if (!temp_buffer)
9365                 goto out_rm_hp_state;
9366
9367         if (trace_create_savedcmd() < 0)
9368                 goto out_free_temp_buffer;
9369
9370         /* TODO: make the number of buffers hot pluggable with CPUS */
9371         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9372                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9373                 goto out_free_savedcmd;
9374         }
9375
9376         if (global_trace.buffer_disabled)
9377                 tracing_off();
9378
9379         if (trace_boot_clock) {
9380                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9381                 if (ret < 0)
9382                         pr_warn("Trace clock %s not defined, going back to default\n",
9383                                 trace_boot_clock);
9384         }
9385
9386         /*
9387          * register_tracer() might reference current_trace, so it
9388          * needs to be set before we register anything. This is
9389          * just a bootstrap of current_trace anyway.
9390          */
9391         global_trace.current_trace = &nop_trace;
9392
9393         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9394
9395         ftrace_init_global_array_ops(&global_trace);
9396
9397         init_trace_flags_index(&global_trace);
9398
9399         register_tracer(&nop_trace);
9400
9401         /* Function tracing may start here (via kernel command line) */
9402         init_function_trace();
9403
9404         /* All seems OK, enable tracing */
9405         tracing_disabled = 0;
9406
9407         atomic_notifier_chain_register(&panic_notifier_list,
9408                                        &trace_panic_notifier);
9409
9410         register_die_notifier(&trace_die_notifier);
9411
9412         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9413
9414         INIT_LIST_HEAD(&global_trace.systems);
9415         INIT_LIST_HEAD(&global_trace.events);
9416         INIT_LIST_HEAD(&global_trace.hist_vars);
9417         INIT_LIST_HEAD(&global_trace.err_log);
9418         list_add(&global_trace.list, &ftrace_trace_arrays);
9419
9420         apply_trace_boot_options();
9421
9422         register_snapshot_cmd();
9423
9424         return 0;
9425
9426 out_free_savedcmd:
9427         free_saved_cmdlines_buffer(savedcmd);
9428 out_free_temp_buffer:
9429         ring_buffer_free(temp_buffer);
9430 out_rm_hp_state:
9431         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9432 out_free_cpumask:
9433         free_cpumask_var(global_trace.tracing_cpumask);
9434 out_free_buffer_mask:
9435         free_cpumask_var(tracing_buffer_mask);
9436 out:
9437         return ret;
9438 }
9439
9440 void __init early_trace_init(void)
9441 {
9442         if (tracepoint_printk) {
9443                 tracepoint_print_iter =
9444                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9445                 if (MEM_FAIL(!tracepoint_print_iter,
9446                              "Failed to allocate trace iterator\n"))
9447                         tracepoint_printk = 0;
9448                 else
9449                         static_key_enable(&tracepoint_printk_key.key);
9450         }
9451         tracer_alloc_buffers();
9452 }
9453
9454 void __init trace_init(void)
9455 {
9456         trace_event_init();
9457 }
9458
9459 __init static int clear_boot_tracer(void)
9460 {
9461         /*
9462          * The default tracer at boot buffer is an init section.
9463          * This function is called in lateinit. If we did not
9464          * find the boot tracer, then clear it out, to prevent
9465          * later registration from accessing the buffer that is
9466          * about to be freed.
9467          */
9468         if (!default_bootup_tracer)
9469                 return 0;
9470
9471         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9472                default_bootup_tracer);
9473         default_bootup_tracer = NULL;
9474
9475         return 0;
9476 }
9477
9478 fs_initcall(tracer_init_tracefs);
9479 late_initcall_sync(clear_boot_tracer);
9480
9481 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9482 __init static int tracing_set_default_clock(void)
9483 {
9484         /* sched_clock_stable() is determined in late_initcall */
9485         if (!trace_boot_clock && !sched_clock_stable()) {
9486                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9487                         pr_warn("Can not set tracing clock due to lockdown\n");
9488                         return -EPERM;
9489                 }
9490
9491                 printk(KERN_WARNING
9492                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9493                        "If you want to keep using the local clock, then add:\n"
9494                        "  \"trace_clock=local\"\n"
9495                        "on the kernel command line\n");
9496                 tracing_set_clock(&global_trace, "global");
9497         }
9498
9499         return 0;
9500 }
9501 late_initcall_sync(tracing_set_default_clock);
9502 #endif