Merge branches 'clk-range', 'clk-uniphier', 'clk-apple' and 'clk-qcom' into clk-next
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         /* Ignore the "tp_printk_stop_on_boot" param */
256         if (*str == '_')
257                 return 0;
258
259         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260                 tracepoint_printk = 1;
261         return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264
265 static int __init set_tracepoint_printk_stop(char *str)
266 {
267         tracepoint_printk_stop_on_boot = true;
268         return 1;
269 }
270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
271
272 unsigned long long ns2usecs(u64 nsec)
273 {
274         nsec += 500;
275         do_div(nsec, 1000);
276         return nsec;
277 }
278
279 static void
280 trace_process_export(struct trace_export *export,
281                struct ring_buffer_event *event, int flag)
282 {
283         struct trace_entry *entry;
284         unsigned int size = 0;
285
286         if (export->flags & flag) {
287                 entry = ring_buffer_event_data(event);
288                 size = ring_buffer_event_length(event);
289                 export->write(export, entry, size);
290         }
291 }
292
293 static DEFINE_MUTEX(ftrace_export_lock);
294
295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
296
297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
300
301 static inline void ftrace_exports_enable(struct trace_export *export)
302 {
303         if (export->flags & TRACE_EXPORT_FUNCTION)
304                 static_branch_inc(&trace_function_exports_enabled);
305
306         if (export->flags & TRACE_EXPORT_EVENT)
307                 static_branch_inc(&trace_event_exports_enabled);
308
309         if (export->flags & TRACE_EXPORT_MARKER)
310                 static_branch_inc(&trace_marker_exports_enabled);
311 }
312
313 static inline void ftrace_exports_disable(struct trace_export *export)
314 {
315         if (export->flags & TRACE_EXPORT_FUNCTION)
316                 static_branch_dec(&trace_function_exports_enabled);
317
318         if (export->flags & TRACE_EXPORT_EVENT)
319                 static_branch_dec(&trace_event_exports_enabled);
320
321         if (export->flags & TRACE_EXPORT_MARKER)
322                 static_branch_dec(&trace_marker_exports_enabled);
323 }
324
325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
326 {
327         struct trace_export *export;
328
329         preempt_disable_notrace();
330
331         export = rcu_dereference_raw_check(ftrace_exports_list);
332         while (export) {
333                 trace_process_export(export, event, flag);
334                 export = rcu_dereference_raw_check(export->next);
335         }
336
337         preempt_enable_notrace();
338 }
339
340 static inline void
341 add_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         rcu_assign_pointer(export->next, *list);
344         /*
345          * We are entering export into the list but another
346          * CPU might be walking that list. We need to make sure
347          * the export->next pointer is valid before another CPU sees
348          * the export pointer included into the list.
349          */
350         rcu_assign_pointer(*list, export);
351 }
352
353 static inline int
354 rm_trace_export(struct trace_export **list, struct trace_export *export)
355 {
356         struct trace_export **p;
357
358         for (p = list; *p != NULL; p = &(*p)->next)
359                 if (*p == export)
360                         break;
361
362         if (*p != export)
363                 return -1;
364
365         rcu_assign_pointer(*p, (*p)->next);
366
367         return 0;
368 }
369
370 static inline void
371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373         ftrace_exports_enable(export);
374
375         add_trace_export(list, export);
376 }
377
378 static inline int
379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
380 {
381         int ret;
382
383         ret = rm_trace_export(list, export);
384         ftrace_exports_disable(export);
385
386         return ret;
387 }
388
389 int register_ftrace_export(struct trace_export *export)
390 {
391         if (WARN_ON_ONCE(!export->write))
392                 return -1;
393
394         mutex_lock(&ftrace_export_lock);
395
396         add_ftrace_export(&ftrace_exports_list, export);
397
398         mutex_unlock(&ftrace_export_lock);
399
400         return 0;
401 }
402 EXPORT_SYMBOL_GPL(register_ftrace_export);
403
404 int unregister_ftrace_export(struct trace_export *export)
405 {
406         int ret;
407
408         mutex_lock(&ftrace_export_lock);
409
410         ret = rm_ftrace_export(&ftrace_exports_list, export);
411
412         mutex_unlock(&ftrace_export_lock);
413
414         return ret;
415 }
416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
417
418 /* trace_flags holds trace_options default values */
419 #define TRACE_DEFAULT_FLAGS                                             \
420         (FUNCTION_DEFAULT_FLAGS |                                       \
421          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
422          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
423          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
424          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
425          TRACE_ITER_HASH_PTR)
426
427 /* trace_options that are only supported by global_trace */
428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
429                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
430
431 /* trace_flags that are default zero for instances */
432 #define ZEROED_TRACE_FLAGS \
433         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
434
435 /*
436  * The global_trace is the descriptor that holds the top-level tracing
437  * buffers for the live tracing.
438  */
439 static struct trace_array global_trace = {
440         .trace_flags = TRACE_DEFAULT_FLAGS,
441 };
442
443 LIST_HEAD(ftrace_trace_arrays);
444
445 int trace_array_get(struct trace_array *this_tr)
446 {
447         struct trace_array *tr;
448         int ret = -ENODEV;
449
450         mutex_lock(&trace_types_lock);
451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
452                 if (tr == this_tr) {
453                         tr->ref++;
454                         ret = 0;
455                         break;
456                 }
457         }
458         mutex_unlock(&trace_types_lock);
459
460         return ret;
461 }
462
463 static void __trace_array_put(struct trace_array *this_tr)
464 {
465         WARN_ON(!this_tr->ref);
466         this_tr->ref--;
467 }
468
469 /**
470  * trace_array_put - Decrement the reference counter for this trace array.
471  * @this_tr : pointer to the trace array
472  *
473  * NOTE: Use this when we no longer need the trace array returned by
474  * trace_array_get_by_name(). This ensures the trace array can be later
475  * destroyed.
476  *
477  */
478 void trace_array_put(struct trace_array *this_tr)
479 {
480         if (!this_tr)
481                 return;
482
483         mutex_lock(&trace_types_lock);
484         __trace_array_put(this_tr);
485         mutex_unlock(&trace_types_lock);
486 }
487 EXPORT_SYMBOL_GPL(trace_array_put);
488
489 int tracing_check_open_get_tr(struct trace_array *tr)
490 {
491         int ret;
492
493         ret = security_locked_down(LOCKDOWN_TRACEFS);
494         if (ret)
495                 return ret;
496
497         if (tracing_disabled)
498                 return -ENODEV;
499
500         if (tr && trace_array_get(tr) < 0)
501                 return -ENODEV;
502
503         return 0;
504 }
505
506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
507                               struct trace_buffer *buffer,
508                               struct ring_buffer_event *event)
509 {
510         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
511             !filter_match_preds(call->filter, rec)) {
512                 __trace_event_discard_commit(buffer, event);
513                 return 1;
514         }
515
516         return 0;
517 }
518
519 /**
520  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
521  * @filtered_pids: The list of pids to check
522  * @search_pid: The PID to find in @filtered_pids
523  *
524  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
525  */
526 bool
527 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
528 {
529         return trace_pid_list_is_set(filtered_pids, search_pid);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* "self" is set for forks, and NULL for exits */
587         if (self)
588                 trace_pid_list_set(pid_list, task->pid);
589         else
590                 trace_pid_list_clear(pid_list, task->pid);
591 }
592
593 /**
594  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
595  * @pid_list: The pid list to show
596  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
597  * @pos: The position of the file
598  *
599  * This is used by the seq_file "next" operation to iterate the pids
600  * listed in a trace_pid_list structure.
601  *
602  * Returns the pid+1 as we want to display pid of zero, but NULL would
603  * stop the iteration.
604  */
605 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
606 {
607         long pid = (unsigned long)v;
608         unsigned int next;
609
610         (*pos)++;
611
612         /* pid already is +1 of the actual previous bit */
613         if (trace_pid_list_next(pid_list, pid, &next) < 0)
614                 return NULL;
615
616         pid = next;
617
618         /* Return pid + 1 to allow zero to be represented */
619         return (void *)(pid + 1);
620 }
621
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635         unsigned long pid;
636         unsigned int first;
637         loff_t l = 0;
638
639         if (trace_pid_list_first(pid_list, &first) < 0)
640                 return NULL;
641
642         pid = first;
643
644         /* Return pid + 1 so that zero can be the exit value */
645         for (pid++; pid && l < *pos;
646              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
647                 ;
648         return (void *)pid;
649 }
650
651 /**
652  * trace_pid_show - show the current pid in seq_file processing
653  * @m: The seq_file structure to write into
654  * @v: A void pointer of the pid (+1) value to display
655  *
656  * Can be directly used by seq_file operations to display the current
657  * pid value.
658  */
659 int trace_pid_show(struct seq_file *m, void *v)
660 {
661         unsigned long pid = (unsigned long)v - 1;
662
663         seq_printf(m, "%lu\n", pid);
664         return 0;
665 }
666
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE            127
669
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671                     struct trace_pid_list **new_pid_list,
672                     const char __user *ubuf, size_t cnt)
673 {
674         struct trace_pid_list *pid_list;
675         struct trace_parser parser;
676         unsigned long val;
677         int nr_pids = 0;
678         ssize_t read = 0;
679         ssize_t ret;
680         loff_t pos;
681         pid_t pid;
682
683         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
684                 return -ENOMEM;
685
686         /*
687          * Always recreate a new array. The write is an all or nothing
688          * operation. Always create a new array when adding new pids by
689          * the user. If the operation fails, then the current list is
690          * not modified.
691          */
692         pid_list = trace_pid_list_alloc();
693         if (!pid_list) {
694                 trace_parser_put(&parser);
695                 return -ENOMEM;
696         }
697
698         if (filtered_pids) {
699                 /* copy the current bits to the new max */
700                 ret = trace_pid_list_first(filtered_pids, &pid);
701                 while (!ret) {
702                         trace_pid_list_set(pid_list, pid);
703                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
704                         nr_pids++;
705                 }
706         }
707
708         ret = 0;
709         while (cnt > 0) {
710
711                 pos = 0;
712
713                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
714                 if (ret < 0 || !trace_parser_loaded(&parser))
715                         break;
716
717                 read += ret;
718                 ubuf += ret;
719                 cnt -= ret;
720
721                 ret = -EINVAL;
722                 if (kstrtoul(parser.buffer, 0, &val))
723                         break;
724
725                 pid = (pid_t)val;
726
727                 if (trace_pid_list_set(pid_list, pid) < 0) {
728                         ret = -1;
729                         break;
730                 }
731                 nr_pids++;
732
733                 trace_parser_clear(&parser);
734                 ret = 0;
735         }
736         trace_parser_put(&parser);
737
738         if (ret < 0) {
739                 trace_pid_list_free(pid_list);
740                 return ret;
741         }
742
743         if (!nr_pids) {
744                 /* Cleared the list of pids */
745                 trace_pid_list_free(pid_list);
746                 read = ret;
747                 pid_list = NULL;
748         }
749
750         *new_pid_list = pid_list;
751
752         return read;
753 }
754
755 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
756 {
757         u64 ts;
758
759         /* Early boot up does not have a buffer yet */
760         if (!buf->buffer)
761                 return trace_clock_local();
762
763         ts = ring_buffer_time_stamp(buf->buffer);
764         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
765
766         return ts;
767 }
768
769 u64 ftrace_now(int cpu)
770 {
771         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
772 }
773
774 /**
775  * tracing_is_enabled - Show if global_trace has been enabled
776  *
777  * Shows if the global trace has been enabled or not. It uses the
778  * mirror flag "buffer_disabled" to be used in fast paths such as for
779  * the irqsoff tracer. But it may be inaccurate due to races. If you
780  * need to know the accurate state, use tracing_is_on() which is a little
781  * slower, but accurate.
782  */
783 int tracing_is_enabled(void)
784 {
785         /*
786          * For quick access (irqsoff uses this in fast path), just
787          * return the mirror variable of the state of the ring buffer.
788          * It's a little racy, but we don't really care.
789          */
790         smp_rmb();
791         return !global_trace.buffer_disabled;
792 }
793
794 /*
795  * trace_buf_size is the size in bytes that is allocated
796  * for a buffer. Note, the number of bytes is always rounded
797  * to page size.
798  *
799  * This number is purposely set to a low number of 16384.
800  * If the dump on oops happens, it will be much appreciated
801  * to not have to wait for all that output. Anyway this can be
802  * boot time and run time configurable.
803  */
804 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
805
806 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
807
808 /* trace_types holds a link list of available tracers. */
809 static struct tracer            *trace_types __read_mostly;
810
811 /*
812  * trace_types_lock is used to protect the trace_types list.
813  */
814 DEFINE_MUTEX(trace_types_lock);
815
816 /*
817  * serialize the access of the ring buffer
818  *
819  * ring buffer serializes readers, but it is low level protection.
820  * The validity of the events (which returns by ring_buffer_peek() ..etc)
821  * are not protected by ring buffer.
822  *
823  * The content of events may become garbage if we allow other process consumes
824  * these events concurrently:
825  *   A) the page of the consumed events may become a normal page
826  *      (not reader page) in ring buffer, and this page will be rewritten
827  *      by events producer.
828  *   B) The page of the consumed events may become a page for splice_read,
829  *      and this page will be returned to system.
830  *
831  * These primitives allow multi process access to different cpu ring buffer
832  * concurrently.
833  *
834  * These primitives don't distinguish read-only and read-consume access.
835  * Multi read-only access are also serialized.
836  */
837
838 #ifdef CONFIG_SMP
839 static DECLARE_RWSEM(all_cpu_access_lock);
840 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
841
842 static inline void trace_access_lock(int cpu)
843 {
844         if (cpu == RING_BUFFER_ALL_CPUS) {
845                 /* gain it for accessing the whole ring buffer. */
846                 down_write(&all_cpu_access_lock);
847         } else {
848                 /* gain it for accessing a cpu ring buffer. */
849
850                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
851                 down_read(&all_cpu_access_lock);
852
853                 /* Secondly block other access to this @cpu ring buffer. */
854                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
855         }
856 }
857
858 static inline void trace_access_unlock(int cpu)
859 {
860         if (cpu == RING_BUFFER_ALL_CPUS) {
861                 up_write(&all_cpu_access_lock);
862         } else {
863                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
864                 up_read(&all_cpu_access_lock);
865         }
866 }
867
868 static inline void trace_access_lock_init(void)
869 {
870         int cpu;
871
872         for_each_possible_cpu(cpu)
873                 mutex_init(&per_cpu(cpu_access_lock, cpu));
874 }
875
876 #else
877
878 static DEFINE_MUTEX(access_lock);
879
880 static inline void trace_access_lock(int cpu)
881 {
882         (void)cpu;
883         mutex_lock(&access_lock);
884 }
885
886 static inline void trace_access_unlock(int cpu)
887 {
888         (void)cpu;
889         mutex_unlock(&access_lock);
890 }
891
892 static inline void trace_access_lock_init(void)
893 {
894 }
895
896 #endif
897
898 #ifdef CONFIG_STACKTRACE
899 static void __ftrace_trace_stack(struct trace_buffer *buffer,
900                                  unsigned int trace_ctx,
901                                  int skip, struct pt_regs *regs);
902 static inline void ftrace_trace_stack(struct trace_array *tr,
903                                       struct trace_buffer *buffer,
904                                       unsigned int trace_ctx,
905                                       int skip, struct pt_regs *regs);
906
907 #else
908 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
909                                         unsigned int trace_ctx,
910                                         int skip, struct pt_regs *regs)
911 {
912 }
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned long trace_ctx,
916                                       int skip, struct pt_regs *regs)
917 {
918 }
919
920 #endif
921
922 static __always_inline void
923 trace_event_setup(struct ring_buffer_event *event,
924                   int type, unsigned int trace_ctx)
925 {
926         struct trace_entry *ent = ring_buffer_event_data(event);
927
928         tracing_generic_entry_update(ent, type, trace_ctx);
929 }
930
931 static __always_inline struct ring_buffer_event *
932 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
933                           int type,
934                           unsigned long len,
935                           unsigned int trace_ctx)
936 {
937         struct ring_buffer_event *event;
938
939         event = ring_buffer_lock_reserve(buffer, len);
940         if (event != NULL)
941                 trace_event_setup(event, type, trace_ctx);
942
943         return event;
944 }
945
946 void tracer_tracing_on(struct trace_array *tr)
947 {
948         if (tr->array_buffer.buffer)
949                 ring_buffer_record_on(tr->array_buffer.buffer);
950         /*
951          * This flag is looked at when buffers haven't been allocated
952          * yet, or by some tracers (like irqsoff), that just want to
953          * know if the ring buffer has been disabled, but it can handle
954          * races of where it gets disabled but we still do a record.
955          * As the check is in the fast path of the tracers, it is more
956          * important to be fast than accurate.
957          */
958         tr->buffer_disabled = 0;
959         /* Make the flag seen by readers */
960         smp_wmb();
961 }
962
963 /**
964  * tracing_on - enable tracing buffers
965  *
966  * This function enables tracing buffers that may have been
967  * disabled with tracing_off.
968  */
969 void tracing_on(void)
970 {
971         tracer_tracing_on(&global_trace);
972 }
973 EXPORT_SYMBOL_GPL(tracing_on);
974
975
976 static __always_inline void
977 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
978 {
979         __this_cpu_write(trace_taskinfo_save, true);
980
981         /* If this is the temp buffer, we need to commit fully */
982         if (this_cpu_read(trace_buffered_event) == event) {
983                 /* Length is in event->array[0] */
984                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
985                 /* Release the temp buffer */
986                 this_cpu_dec(trace_buffered_event_cnt);
987                 /* ring_buffer_unlock_commit() enables preemption */
988                 preempt_enable_notrace();
989         } else
990                 ring_buffer_unlock_commit(buffer, event);
991 }
992
993 /**
994  * __trace_puts - write a constant string into the trace buffer.
995  * @ip:    The address of the caller
996  * @str:   The constant string to write
997  * @size:  The size of the string.
998  */
999 int __trace_puts(unsigned long ip, const char *str, int size)
1000 {
1001         struct ring_buffer_event *event;
1002         struct trace_buffer *buffer;
1003         struct print_entry *entry;
1004         unsigned int trace_ctx;
1005         int alloc;
1006
1007         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1008                 return 0;
1009
1010         if (unlikely(tracing_selftest_running || tracing_disabled))
1011                 return 0;
1012
1013         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1014
1015         trace_ctx = tracing_gen_ctx();
1016         buffer = global_trace.array_buffer.buffer;
1017         ring_buffer_nest_start(buffer);
1018         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1019                                             trace_ctx);
1020         if (!event) {
1021                 size = 0;
1022                 goto out;
1023         }
1024
1025         entry = ring_buffer_event_data(event);
1026         entry->ip = ip;
1027
1028         memcpy(&entry->buf, str, size);
1029
1030         /* Add a newline if necessary */
1031         if (entry->buf[size - 1] != '\n') {
1032                 entry->buf[size] = '\n';
1033                 entry->buf[size + 1] = '\0';
1034         } else
1035                 entry->buf[size] = '\0';
1036
1037         __buffer_unlock_commit(buffer, event);
1038         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1039  out:
1040         ring_buffer_nest_end(buffer);
1041         return size;
1042 }
1043 EXPORT_SYMBOL_GPL(__trace_puts);
1044
1045 /**
1046  * __trace_bputs - write the pointer to a constant string into trace buffer
1047  * @ip:    The address of the caller
1048  * @str:   The constant string to write to the buffer to
1049  */
1050 int __trace_bputs(unsigned long ip, const char *str)
1051 {
1052         struct ring_buffer_event *event;
1053         struct trace_buffer *buffer;
1054         struct bputs_entry *entry;
1055         unsigned int trace_ctx;
1056         int size = sizeof(struct bputs_entry);
1057         int ret = 0;
1058
1059         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1060                 return 0;
1061
1062         if (unlikely(tracing_selftest_running || tracing_disabled))
1063                 return 0;
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = global_trace.array_buffer.buffer;
1067
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070                                             trace_ctx);
1071         if (!event)
1072                 goto out;
1073
1074         entry = ring_buffer_event_data(event);
1075         entry->ip                       = ip;
1076         entry->str                      = str;
1077
1078         __buffer_unlock_commit(buffer, event);
1079         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1080
1081         ret = 1;
1082  out:
1083         ring_buffer_nest_end(buffer);
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090                                            void *cond_data)
1091 {
1092         struct tracer *tracer = tr->current_trace;
1093         unsigned long flags;
1094
1095         if (in_nmi()) {
1096                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1098                 return;
1099         }
1100
1101         if (!tr->allocated_snapshot) {
1102                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103                 internal_trace_puts("*** stopping trace here!   ***\n");
1104                 tracing_off();
1105                 return;
1106         }
1107
1108         /* Note, snapshot can not be used when the tracer uses it */
1109         if (tracer->use_max_tr) {
1110                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112                 return;
1113         }
1114
1115         local_irq_save(flags);
1116         update_max_tr(tr, current, smp_processor_id(), cond_data);
1117         local_irq_restore(flags);
1118 }
1119
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122         tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141         struct trace_array *tr = &global_trace;
1142
1143         tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:         The tracing instance to snapshot
1150  * @cond_data:  The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162         tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:         The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182         void *cond_data = NULL;
1183
1184         arch_spin_lock(&tr->max_lock);
1185
1186         if (tr->cond_snapshot)
1187                 cond_data = tr->cond_snapshot->cond_data;
1188
1189         arch_spin_unlock(&tr->max_lock);
1190
1191         return cond_data;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1194
1195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1196                                         struct array_buffer *size_buf, int cpu_id);
1197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1198
1199 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1200 {
1201         int ret;
1202
1203         if (!tr->allocated_snapshot) {
1204
1205                 /* allocate spare buffer */
1206                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1207                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1208                 if (ret < 0)
1209                         return ret;
1210
1211                 tr->allocated_snapshot = true;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static void free_snapshot(struct trace_array *tr)
1218 {
1219         /*
1220          * We don't free the ring buffer. instead, resize it because
1221          * The max_tr ring buffer has some state (e.g. ring->clock) and
1222          * we want preserve it.
1223          */
1224         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1225         set_buffer_entries(&tr->max_buffer, 1);
1226         tracing_reset_online_cpus(&tr->max_buffer);
1227         tr->allocated_snapshot = false;
1228 }
1229
1230 /**
1231  * tracing_alloc_snapshot - allocate snapshot buffer.
1232  *
1233  * This only allocates the snapshot buffer if it isn't already
1234  * allocated - it doesn't also take a snapshot.
1235  *
1236  * This is meant to be used in cases where the snapshot buffer needs
1237  * to be set up for events that can't sleep but need to be able to
1238  * trigger a snapshot.
1239  */
1240 int tracing_alloc_snapshot(void)
1241 {
1242         struct trace_array *tr = &global_trace;
1243         int ret;
1244
1245         ret = tracing_alloc_snapshot_instance(tr);
1246         WARN_ON(ret < 0);
1247
1248         return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1251
1252 /**
1253  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1254  *
1255  * This is similar to tracing_snapshot(), but it will allocate the
1256  * snapshot buffer if it isn't already allocated. Use this only
1257  * where it is safe to sleep, as the allocation may sleep.
1258  *
1259  * This causes a swap between the snapshot buffer and the current live
1260  * tracing buffer. You can use this to take snapshots of the live
1261  * trace when some condition is triggered, but continue to trace.
1262  */
1263 void tracing_snapshot_alloc(void)
1264 {
1265         int ret;
1266
1267         ret = tracing_alloc_snapshot();
1268         if (ret < 0)
1269                 return;
1270
1271         tracing_snapshot();
1272 }
1273 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1274
1275 /**
1276  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1277  * @tr:         The tracing instance
1278  * @cond_data:  User data to associate with the snapshot
1279  * @update:     Implementation of the cond_snapshot update function
1280  *
1281  * Check whether the conditional snapshot for the given instance has
1282  * already been enabled, or if the current tracer is already using a
1283  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1284  * save the cond_data and update function inside.
1285  *
1286  * Returns 0 if successful, error otherwise.
1287  */
1288 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1289                                  cond_update_fn_t update)
1290 {
1291         struct cond_snapshot *cond_snapshot;
1292         int ret = 0;
1293
1294         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1295         if (!cond_snapshot)
1296                 return -ENOMEM;
1297
1298         cond_snapshot->cond_data = cond_data;
1299         cond_snapshot->update = update;
1300
1301         mutex_lock(&trace_types_lock);
1302
1303         ret = tracing_alloc_snapshot_instance(tr);
1304         if (ret)
1305                 goto fail_unlock;
1306
1307         if (tr->current_trace->use_max_tr) {
1308                 ret = -EBUSY;
1309                 goto fail_unlock;
1310         }
1311
1312         /*
1313          * The cond_snapshot can only change to NULL without the
1314          * trace_types_lock. We don't care if we race with it going
1315          * to NULL, but we want to make sure that it's not set to
1316          * something other than NULL when we get here, which we can
1317          * do safely with only holding the trace_types_lock and not
1318          * having to take the max_lock.
1319          */
1320         if (tr->cond_snapshot) {
1321                 ret = -EBUSY;
1322                 goto fail_unlock;
1323         }
1324
1325         arch_spin_lock(&tr->max_lock);
1326         tr->cond_snapshot = cond_snapshot;
1327         arch_spin_unlock(&tr->max_lock);
1328
1329         mutex_unlock(&trace_types_lock);
1330
1331         return ret;
1332
1333  fail_unlock:
1334         mutex_unlock(&trace_types_lock);
1335         kfree(cond_snapshot);
1336         return ret;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1339
1340 /**
1341  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  *
1344  * Check whether the conditional snapshot for the given instance is
1345  * enabled; if so, free the cond_snapshot associated with it,
1346  * otherwise return -EINVAL.
1347  *
1348  * Returns 0 if successful, error otherwise.
1349  */
1350 int tracing_snapshot_cond_disable(struct trace_array *tr)
1351 {
1352         int ret = 0;
1353
1354         arch_spin_lock(&tr->max_lock);
1355
1356         if (!tr->cond_snapshot)
1357                 ret = -EINVAL;
1358         else {
1359                 kfree(tr->cond_snapshot);
1360                 tr->cond_snapshot = NULL;
1361         }
1362
1363         arch_spin_unlock(&tr->max_lock);
1364
1365         return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1368 #else
1369 void tracing_snapshot(void)
1370 {
1371         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot);
1374 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1375 {
1376         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1379 int tracing_alloc_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1382         return -ENODEV;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1385 void tracing_snapshot_alloc(void)
1386 {
1387         /* Give warning */
1388         tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 void *tracing_cond_snapshot_data(struct trace_array *tr)
1392 {
1393         return NULL;
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1396 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1397 {
1398         return -ENODEV;
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1401 int tracing_snapshot_cond_disable(struct trace_array *tr)
1402 {
1403         return false;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1406 #endif /* CONFIG_TRACER_SNAPSHOT */
1407
1408 void tracer_tracing_off(struct trace_array *tr)
1409 {
1410         if (tr->array_buffer.buffer)
1411                 ring_buffer_record_off(tr->array_buffer.buffer);
1412         /*
1413          * This flag is looked at when buffers haven't been allocated
1414          * yet, or by some tracers (like irqsoff), that just want to
1415          * know if the ring buffer has been disabled, but it can handle
1416          * races of where it gets disabled but we still do a record.
1417          * As the check is in the fast path of the tracers, it is more
1418          * important to be fast than accurate.
1419          */
1420         tr->buffer_disabled = 1;
1421         /* Make the flag seen by readers */
1422         smp_wmb();
1423 }
1424
1425 /**
1426  * tracing_off - turn off tracing buffers
1427  *
1428  * This function stops the tracing buffers from recording data.
1429  * It does not disable any overhead the tracers themselves may
1430  * be causing. This function simply causes all recording to
1431  * the ring buffers to fail.
1432  */
1433 void tracing_off(void)
1434 {
1435         tracer_tracing_off(&global_trace);
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_off);
1438
1439 void disable_trace_on_warning(void)
1440 {
1441         if (__disable_trace_on_warning) {
1442                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1443                         "Disabling tracing due to warning\n");
1444                 tracing_off();
1445         }
1446 }
1447
1448 /**
1449  * tracer_tracing_is_on - show real state of ring buffer enabled
1450  * @tr : the trace array to know if ring buffer is enabled
1451  *
1452  * Shows real state of the ring buffer if it is enabled or not.
1453  */
1454 bool tracer_tracing_is_on(struct trace_array *tr)
1455 {
1456         if (tr->array_buffer.buffer)
1457                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1458         return !tr->buffer_disabled;
1459 }
1460
1461 /**
1462  * tracing_is_on - show state of ring buffers enabled
1463  */
1464 int tracing_is_on(void)
1465 {
1466         return tracer_tracing_is_on(&global_trace);
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_is_on);
1469
1470 static int __init set_buf_size(char *str)
1471 {
1472         unsigned long buf_size;
1473
1474         if (!str)
1475                 return 0;
1476         buf_size = memparse(str, &str);
1477         /* nr_entries can not be zero */
1478         if (buf_size == 0)
1479                 return 0;
1480         trace_buf_size = buf_size;
1481         return 1;
1482 }
1483 __setup("trace_buf_size=", set_buf_size);
1484
1485 static int __init set_tracing_thresh(char *str)
1486 {
1487         unsigned long threshold;
1488         int ret;
1489
1490         if (!str)
1491                 return 0;
1492         ret = kstrtoul(str, 0, &threshold);
1493         if (ret < 0)
1494                 return 0;
1495         tracing_thresh = threshold * 1000;
1496         return 1;
1497 }
1498 __setup("tracing_thresh=", set_tracing_thresh);
1499
1500 unsigned long nsecs_to_usecs(unsigned long nsecs)
1501 {
1502         return nsecs / 1000;
1503 }
1504
1505 /*
1506  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1507  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1508  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1509  * of strings in the order that the evals (enum) were defined.
1510  */
1511 #undef C
1512 #define C(a, b) b
1513
1514 /* These must match the bit positions in trace_iterator_flags */
1515 static const char *trace_options[] = {
1516         TRACE_FLAGS
1517         NULL
1518 };
1519
1520 static struct {
1521         u64 (*func)(void);
1522         const char *name;
1523         int in_ns;              /* is this clock in nanoseconds? */
1524 } trace_clocks[] = {
1525         { trace_clock_local,            "local",        1 },
1526         { trace_clock_global,           "global",       1 },
1527         { trace_clock_counter,          "counter",      0 },
1528         { trace_clock_jiffies,          "uptime",       0 },
1529         { trace_clock,                  "perf",         1 },
1530         { ktime_get_mono_fast_ns,       "mono",         1 },
1531         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1532         { ktime_get_boot_fast_ns,       "boot",         1 },
1533         ARCH_TRACE_CLOCKS
1534 };
1535
1536 bool trace_clock_in_ns(struct trace_array *tr)
1537 {
1538         if (trace_clocks[tr->clock_id].in_ns)
1539                 return true;
1540
1541         return false;
1542 }
1543
1544 /*
1545  * trace_parser_get_init - gets the buffer for trace parser
1546  */
1547 int trace_parser_get_init(struct trace_parser *parser, int size)
1548 {
1549         memset(parser, 0, sizeof(*parser));
1550
1551         parser->buffer = kmalloc(size, GFP_KERNEL);
1552         if (!parser->buffer)
1553                 return 1;
1554
1555         parser->size = size;
1556         return 0;
1557 }
1558
1559 /*
1560  * trace_parser_put - frees the buffer for trace parser
1561  */
1562 void trace_parser_put(struct trace_parser *parser)
1563 {
1564         kfree(parser->buffer);
1565         parser->buffer = NULL;
1566 }
1567
1568 /*
1569  * trace_get_user - reads the user input string separated by  space
1570  * (matched by isspace(ch))
1571  *
1572  * For each string found the 'struct trace_parser' is updated,
1573  * and the function returns.
1574  *
1575  * Returns number of bytes read.
1576  *
1577  * See kernel/trace/trace.h for 'struct trace_parser' details.
1578  */
1579 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1580         size_t cnt, loff_t *ppos)
1581 {
1582         char ch;
1583         size_t read = 0;
1584         ssize_t ret;
1585
1586         if (!*ppos)
1587                 trace_parser_clear(parser);
1588
1589         ret = get_user(ch, ubuf++);
1590         if (ret)
1591                 goto out;
1592
1593         read++;
1594         cnt--;
1595
1596         /*
1597          * The parser is not finished with the last write,
1598          * continue reading the user input without skipping spaces.
1599          */
1600         if (!parser->cont) {
1601                 /* skip white space */
1602                 while (cnt && isspace(ch)) {
1603                         ret = get_user(ch, ubuf++);
1604                         if (ret)
1605                                 goto out;
1606                         read++;
1607                         cnt--;
1608                 }
1609
1610                 parser->idx = 0;
1611
1612                 /* only spaces were written */
1613                 if (isspace(ch) || !ch) {
1614                         *ppos += read;
1615                         ret = read;
1616                         goto out;
1617                 }
1618         }
1619
1620         /* read the non-space input */
1621         while (cnt && !isspace(ch) && ch) {
1622                 if (parser->idx < parser->size - 1)
1623                         parser->buffer[parser->idx++] = ch;
1624                 else {
1625                         ret = -EINVAL;
1626                         goto out;
1627                 }
1628                 ret = get_user(ch, ubuf++);
1629                 if (ret)
1630                         goto out;
1631                 read++;
1632                 cnt--;
1633         }
1634
1635         /* We either got finished input or we have to wait for another call. */
1636         if (isspace(ch) || !ch) {
1637                 parser->buffer[parser->idx] = 0;
1638                 parser->cont = false;
1639         } else if (parser->idx < parser->size - 1) {
1640                 parser->cont = true;
1641                 parser->buffer[parser->idx++] = ch;
1642                 /* Make sure the parsed string always terminates with '\0'. */
1643                 parser->buffer[parser->idx] = 0;
1644         } else {
1645                 ret = -EINVAL;
1646                 goto out;
1647         }
1648
1649         *ppos += read;
1650         ret = read;
1651
1652 out:
1653         return ret;
1654 }
1655
1656 /* TODO add a seq_buf_to_buffer() */
1657 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1658 {
1659         int len;
1660
1661         if (trace_seq_used(s) <= s->seq.readpos)
1662                 return -EBUSY;
1663
1664         len = trace_seq_used(s) - s->seq.readpos;
1665         if (cnt > len)
1666                 cnt = len;
1667         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1668
1669         s->seq.readpos += cnt;
1670         return cnt;
1671 }
1672
1673 unsigned long __read_mostly     tracing_thresh;
1674 static const struct file_operations tracing_max_lat_fops;
1675
1676 #ifdef LATENCY_FS_NOTIFY
1677
1678 static struct workqueue_struct *fsnotify_wq;
1679
1680 static void latency_fsnotify_workfn(struct work_struct *work)
1681 {
1682         struct trace_array *tr = container_of(work, struct trace_array,
1683                                               fsnotify_work);
1684         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1685 }
1686
1687 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1688 {
1689         struct trace_array *tr = container_of(iwork, struct trace_array,
1690                                               fsnotify_irqwork);
1691         queue_work(fsnotify_wq, &tr->fsnotify_work);
1692 }
1693
1694 static void trace_create_maxlat_file(struct trace_array *tr,
1695                                      struct dentry *d_tracer)
1696 {
1697         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1698         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1699         tr->d_max_latency = trace_create_file("tracing_max_latency",
1700                                               TRACE_MODE_WRITE,
1701                                               d_tracer, &tr->max_latency,
1702                                               &tracing_max_lat_fops);
1703 }
1704
1705 __init static int latency_fsnotify_init(void)
1706 {
1707         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1708                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1709         if (!fsnotify_wq) {
1710                 pr_err("Unable to allocate tr_max_lat_wq\n");
1711                 return -ENOMEM;
1712         }
1713         return 0;
1714 }
1715
1716 late_initcall_sync(latency_fsnotify_init);
1717
1718 void latency_fsnotify(struct trace_array *tr)
1719 {
1720         if (!fsnotify_wq)
1721                 return;
1722         /*
1723          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1724          * possible that we are called from __schedule() or do_idle(), which
1725          * could cause a deadlock.
1726          */
1727         irq_work_queue(&tr->fsnotify_irqwork);
1728 }
1729
1730 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1731         || defined(CONFIG_OSNOISE_TRACER)
1732
1733 #define trace_create_maxlat_file(tr, d_tracer)                          \
1734         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1735                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1736
1737 #else
1738 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1739 #endif
1740
1741 #ifdef CONFIG_TRACER_MAX_TRACE
1742 /*
1743  * Copy the new maximum trace into the separate maximum-trace
1744  * structure. (this way the maximum trace is permanently saved,
1745  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1746  */
1747 static void
1748 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1749 {
1750         struct array_buffer *trace_buf = &tr->array_buffer;
1751         struct array_buffer *max_buf = &tr->max_buffer;
1752         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1753         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1754
1755         max_buf->cpu = cpu;
1756         max_buf->time_start = data->preempt_timestamp;
1757
1758         max_data->saved_latency = tr->max_latency;
1759         max_data->critical_start = data->critical_start;
1760         max_data->critical_end = data->critical_end;
1761
1762         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1763         max_data->pid = tsk->pid;
1764         /*
1765          * If tsk == current, then use current_uid(), as that does not use
1766          * RCU. The irq tracer can be called out of RCU scope.
1767          */
1768         if (tsk == current)
1769                 max_data->uid = current_uid();
1770         else
1771                 max_data->uid = task_uid(tsk);
1772
1773         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1774         max_data->policy = tsk->policy;
1775         max_data->rt_priority = tsk->rt_priority;
1776
1777         /* record this tasks comm */
1778         tracing_record_cmdline(tsk);
1779         latency_fsnotify(tr);
1780 }
1781
1782 /**
1783  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1784  * @tr: tracer
1785  * @tsk: the task with the latency
1786  * @cpu: The cpu that initiated the trace.
1787  * @cond_data: User data associated with a conditional snapshot
1788  *
1789  * Flip the buffers between the @tr and the max_tr and record information
1790  * about which task was the cause of this latency.
1791  */
1792 void
1793 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1794               void *cond_data)
1795 {
1796         if (tr->stop_count)
1797                 return;
1798
1799         WARN_ON_ONCE(!irqs_disabled());
1800
1801         if (!tr->allocated_snapshot) {
1802                 /* Only the nop tracer should hit this when disabling */
1803                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1804                 return;
1805         }
1806
1807         arch_spin_lock(&tr->max_lock);
1808
1809         /* Inherit the recordable setting from array_buffer */
1810         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1811                 ring_buffer_record_on(tr->max_buffer.buffer);
1812         else
1813                 ring_buffer_record_off(tr->max_buffer.buffer);
1814
1815 #ifdef CONFIG_TRACER_SNAPSHOT
1816         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1817                 goto out_unlock;
1818 #endif
1819         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1820
1821         __update_max_tr(tr, tsk, cpu);
1822
1823  out_unlock:
1824         arch_spin_unlock(&tr->max_lock);
1825 }
1826
1827 /**
1828  * update_max_tr_single - only copy one trace over, and reset the rest
1829  * @tr: tracer
1830  * @tsk: task with the latency
1831  * @cpu: the cpu of the buffer to copy.
1832  *
1833  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1834  */
1835 void
1836 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1837 {
1838         int ret;
1839
1840         if (tr->stop_count)
1841                 return;
1842
1843         WARN_ON_ONCE(!irqs_disabled());
1844         if (!tr->allocated_snapshot) {
1845                 /* Only the nop tracer should hit this when disabling */
1846                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1847                 return;
1848         }
1849
1850         arch_spin_lock(&tr->max_lock);
1851
1852         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1853
1854         if (ret == -EBUSY) {
1855                 /*
1856                  * We failed to swap the buffer due to a commit taking
1857                  * place on this CPU. We fail to record, but we reset
1858                  * the max trace buffer (no one writes directly to it)
1859                  * and flag that it failed.
1860                  */
1861                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1862                         "Failed to swap buffers due to commit in progress\n");
1863         }
1864
1865         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1866
1867         __update_max_tr(tr, tsk, cpu);
1868         arch_spin_unlock(&tr->max_lock);
1869 }
1870 #endif /* CONFIG_TRACER_MAX_TRACE */
1871
1872 static int wait_on_pipe(struct trace_iterator *iter, int full)
1873 {
1874         /* Iterators are static, they should be filled or empty */
1875         if (trace_buffer_iter(iter, iter->cpu_file))
1876                 return 0;
1877
1878         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1879                                 full);
1880 }
1881
1882 #ifdef CONFIG_FTRACE_STARTUP_TEST
1883 static bool selftests_can_run;
1884
1885 struct trace_selftests {
1886         struct list_head                list;
1887         struct tracer                   *type;
1888 };
1889
1890 static LIST_HEAD(postponed_selftests);
1891
1892 static int save_selftest(struct tracer *type)
1893 {
1894         struct trace_selftests *selftest;
1895
1896         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1897         if (!selftest)
1898                 return -ENOMEM;
1899
1900         selftest->type = type;
1901         list_add(&selftest->list, &postponed_selftests);
1902         return 0;
1903 }
1904
1905 static int run_tracer_selftest(struct tracer *type)
1906 {
1907         struct trace_array *tr = &global_trace;
1908         struct tracer *saved_tracer = tr->current_trace;
1909         int ret;
1910
1911         if (!type->selftest || tracing_selftest_disabled)
1912                 return 0;
1913
1914         /*
1915          * If a tracer registers early in boot up (before scheduling is
1916          * initialized and such), then do not run its selftests yet.
1917          * Instead, run it a little later in the boot process.
1918          */
1919         if (!selftests_can_run)
1920                 return save_selftest(type);
1921
1922         if (!tracing_is_on()) {
1923                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1924                         type->name);
1925                 return 0;
1926         }
1927
1928         /*
1929          * Run a selftest on this tracer.
1930          * Here we reset the trace buffer, and set the current
1931          * tracer to be this tracer. The tracer can then run some
1932          * internal tracing to verify that everything is in order.
1933          * If we fail, we do not register this tracer.
1934          */
1935         tracing_reset_online_cpus(&tr->array_buffer);
1936
1937         tr->current_trace = type;
1938
1939 #ifdef CONFIG_TRACER_MAX_TRACE
1940         if (type->use_max_tr) {
1941                 /* If we expanded the buffers, make sure the max is expanded too */
1942                 if (ring_buffer_expanded)
1943                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1944                                            RING_BUFFER_ALL_CPUS);
1945                 tr->allocated_snapshot = true;
1946         }
1947 #endif
1948
1949         /* the test is responsible for initializing and enabling */
1950         pr_info("Testing tracer %s: ", type->name);
1951         ret = type->selftest(type, tr);
1952         /* the test is responsible for resetting too */
1953         tr->current_trace = saved_tracer;
1954         if (ret) {
1955                 printk(KERN_CONT "FAILED!\n");
1956                 /* Add the warning after printing 'FAILED' */
1957                 WARN_ON(1);
1958                 return -1;
1959         }
1960         /* Only reset on passing, to avoid touching corrupted buffers */
1961         tracing_reset_online_cpus(&tr->array_buffer);
1962
1963 #ifdef CONFIG_TRACER_MAX_TRACE
1964         if (type->use_max_tr) {
1965                 tr->allocated_snapshot = false;
1966
1967                 /* Shrink the max buffer again */
1968                 if (ring_buffer_expanded)
1969                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1970                                            RING_BUFFER_ALL_CPUS);
1971         }
1972 #endif
1973
1974         printk(KERN_CONT "PASSED\n");
1975         return 0;
1976 }
1977
1978 static __init int init_trace_selftests(void)
1979 {
1980         struct trace_selftests *p, *n;
1981         struct tracer *t, **last;
1982         int ret;
1983
1984         selftests_can_run = true;
1985
1986         mutex_lock(&trace_types_lock);
1987
1988         if (list_empty(&postponed_selftests))
1989                 goto out;
1990
1991         pr_info("Running postponed tracer tests:\n");
1992
1993         tracing_selftest_running = true;
1994         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1995                 /* This loop can take minutes when sanitizers are enabled, so
1996                  * lets make sure we allow RCU processing.
1997                  */
1998                 cond_resched();
1999                 ret = run_tracer_selftest(p->type);
2000                 /* If the test fails, then warn and remove from available_tracers */
2001                 if (ret < 0) {
2002                         WARN(1, "tracer: %s failed selftest, disabling\n",
2003                              p->type->name);
2004                         last = &trace_types;
2005                         for (t = trace_types; t; t = t->next) {
2006                                 if (t == p->type) {
2007                                         *last = t->next;
2008                                         break;
2009                                 }
2010                                 last = &t->next;
2011                         }
2012                 }
2013                 list_del(&p->list);
2014                 kfree(p);
2015         }
2016         tracing_selftest_running = false;
2017
2018  out:
2019         mutex_unlock(&trace_types_lock);
2020
2021         return 0;
2022 }
2023 core_initcall(init_trace_selftests);
2024 #else
2025 static inline int run_tracer_selftest(struct tracer *type)
2026 {
2027         return 0;
2028 }
2029 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2030
2031 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2032
2033 static void __init apply_trace_boot_options(void);
2034
2035 /**
2036  * register_tracer - register a tracer with the ftrace system.
2037  * @type: the plugin for the tracer
2038  *
2039  * Register a new plugin tracer.
2040  */
2041 int __init register_tracer(struct tracer *type)
2042 {
2043         struct tracer *t;
2044         int ret = 0;
2045
2046         if (!type->name) {
2047                 pr_info("Tracer must have a name\n");
2048                 return -1;
2049         }
2050
2051         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2052                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2053                 return -1;
2054         }
2055
2056         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2057                 pr_warn("Can not register tracer %s due to lockdown\n",
2058                            type->name);
2059                 return -EPERM;
2060         }
2061
2062         mutex_lock(&trace_types_lock);
2063
2064         tracing_selftest_running = true;
2065
2066         for (t = trace_types; t; t = t->next) {
2067                 if (strcmp(type->name, t->name) == 0) {
2068                         /* already found */
2069                         pr_info("Tracer %s already registered\n",
2070                                 type->name);
2071                         ret = -1;
2072                         goto out;
2073                 }
2074         }
2075
2076         if (!type->set_flag)
2077                 type->set_flag = &dummy_set_flag;
2078         if (!type->flags) {
2079                 /*allocate a dummy tracer_flags*/
2080                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2081                 if (!type->flags) {
2082                         ret = -ENOMEM;
2083                         goto out;
2084                 }
2085                 type->flags->val = 0;
2086                 type->flags->opts = dummy_tracer_opt;
2087         } else
2088                 if (!type->flags->opts)
2089                         type->flags->opts = dummy_tracer_opt;
2090
2091         /* store the tracer for __set_tracer_option */
2092         type->flags->trace = type;
2093
2094         ret = run_tracer_selftest(type);
2095         if (ret < 0)
2096                 goto out;
2097
2098         type->next = trace_types;
2099         trace_types = type;
2100         add_tracer_options(&global_trace, type);
2101
2102  out:
2103         tracing_selftest_running = false;
2104         mutex_unlock(&trace_types_lock);
2105
2106         if (ret || !default_bootup_tracer)
2107                 goto out_unlock;
2108
2109         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2110                 goto out_unlock;
2111
2112         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2113         /* Do we want this tracer to start on bootup? */
2114         tracing_set_tracer(&global_trace, type->name);
2115         default_bootup_tracer = NULL;
2116
2117         apply_trace_boot_options();
2118
2119         /* disable other selftests, since this will break it. */
2120         disable_tracing_selftest("running a tracer");
2121
2122  out_unlock:
2123         return ret;
2124 }
2125
2126 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2127 {
2128         struct trace_buffer *buffer = buf->buffer;
2129
2130         if (!buffer)
2131                 return;
2132
2133         ring_buffer_record_disable(buffer);
2134
2135         /* Make sure all commits have finished */
2136         synchronize_rcu();
2137         ring_buffer_reset_cpu(buffer, cpu);
2138
2139         ring_buffer_record_enable(buffer);
2140 }
2141
2142 void tracing_reset_online_cpus(struct array_buffer *buf)
2143 {
2144         struct trace_buffer *buffer = buf->buffer;
2145
2146         if (!buffer)
2147                 return;
2148
2149         ring_buffer_record_disable(buffer);
2150
2151         /* Make sure all commits have finished */
2152         synchronize_rcu();
2153
2154         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2155
2156         ring_buffer_reset_online_cpus(buffer);
2157
2158         ring_buffer_record_enable(buffer);
2159 }
2160
2161 /* Must have trace_types_lock held */
2162 void tracing_reset_all_online_cpus(void)
2163 {
2164         struct trace_array *tr;
2165
2166         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2167                 if (!tr->clear_trace)
2168                         continue;
2169                 tr->clear_trace = false;
2170                 tracing_reset_online_cpus(&tr->array_buffer);
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172                 tracing_reset_online_cpus(&tr->max_buffer);
2173 #endif
2174         }
2175 }
2176
2177 /*
2178  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2179  * is the tgid last observed corresponding to pid=i.
2180  */
2181 static int *tgid_map;
2182
2183 /* The maximum valid index into tgid_map. */
2184 static size_t tgid_map_max;
2185
2186 #define SAVED_CMDLINES_DEFAULT 128
2187 #define NO_CMDLINE_MAP UINT_MAX
2188 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2189 struct saved_cmdlines_buffer {
2190         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2191         unsigned *map_cmdline_to_pid;
2192         unsigned cmdline_num;
2193         int cmdline_idx;
2194         char *saved_cmdlines;
2195 };
2196 static struct saved_cmdlines_buffer *savedcmd;
2197
2198 static inline char *get_saved_cmdlines(int idx)
2199 {
2200         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2201 }
2202
2203 static inline void set_cmdline(int idx, const char *cmdline)
2204 {
2205         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2206 }
2207
2208 static int allocate_cmdlines_buffer(unsigned int val,
2209                                     struct saved_cmdlines_buffer *s)
2210 {
2211         s->map_cmdline_to_pid = kmalloc_array(val,
2212                                               sizeof(*s->map_cmdline_to_pid),
2213                                               GFP_KERNEL);
2214         if (!s->map_cmdline_to_pid)
2215                 return -ENOMEM;
2216
2217         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2218         if (!s->saved_cmdlines) {
2219                 kfree(s->map_cmdline_to_pid);
2220                 return -ENOMEM;
2221         }
2222
2223         s->cmdline_idx = 0;
2224         s->cmdline_num = val;
2225         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2226                sizeof(s->map_pid_to_cmdline));
2227         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2228                val * sizeof(*s->map_cmdline_to_pid));
2229
2230         return 0;
2231 }
2232
2233 static int trace_create_savedcmd(void)
2234 {
2235         int ret;
2236
2237         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2238         if (!savedcmd)
2239                 return -ENOMEM;
2240
2241         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2242         if (ret < 0) {
2243                 kfree(savedcmd);
2244                 savedcmd = NULL;
2245                 return -ENOMEM;
2246         }
2247
2248         return 0;
2249 }
2250
2251 int is_tracing_stopped(void)
2252 {
2253         return global_trace.stop_count;
2254 }
2255
2256 /**
2257  * tracing_start - quick start of the tracer
2258  *
2259  * If tracing is enabled but was stopped by tracing_stop,
2260  * this will start the tracer back up.
2261  */
2262 void tracing_start(void)
2263 {
2264         struct trace_buffer *buffer;
2265         unsigned long flags;
2266
2267         if (tracing_disabled)
2268                 return;
2269
2270         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2271         if (--global_trace.stop_count) {
2272                 if (global_trace.stop_count < 0) {
2273                         /* Someone screwed up their debugging */
2274                         WARN_ON_ONCE(1);
2275                         global_trace.stop_count = 0;
2276                 }
2277                 goto out;
2278         }
2279
2280         /* Prevent the buffers from switching */
2281         arch_spin_lock(&global_trace.max_lock);
2282
2283         buffer = global_trace.array_buffer.buffer;
2284         if (buffer)
2285                 ring_buffer_record_enable(buffer);
2286
2287 #ifdef CONFIG_TRACER_MAX_TRACE
2288         buffer = global_trace.max_buffer.buffer;
2289         if (buffer)
2290                 ring_buffer_record_enable(buffer);
2291 #endif
2292
2293         arch_spin_unlock(&global_trace.max_lock);
2294
2295  out:
2296         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2297 }
2298
2299 static void tracing_start_tr(struct trace_array *tr)
2300 {
2301         struct trace_buffer *buffer;
2302         unsigned long flags;
2303
2304         if (tracing_disabled)
2305                 return;
2306
2307         /* If global, we need to also start the max tracer */
2308         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2309                 return tracing_start();
2310
2311         raw_spin_lock_irqsave(&tr->start_lock, flags);
2312
2313         if (--tr->stop_count) {
2314                 if (tr->stop_count < 0) {
2315                         /* Someone screwed up their debugging */
2316                         WARN_ON_ONCE(1);
2317                         tr->stop_count = 0;
2318                 }
2319                 goto out;
2320         }
2321
2322         buffer = tr->array_buffer.buffer;
2323         if (buffer)
2324                 ring_buffer_record_enable(buffer);
2325
2326  out:
2327         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2328 }
2329
2330 /**
2331  * tracing_stop - quick stop of the tracer
2332  *
2333  * Light weight way to stop tracing. Use in conjunction with
2334  * tracing_start.
2335  */
2336 void tracing_stop(void)
2337 {
2338         struct trace_buffer *buffer;
2339         unsigned long flags;
2340
2341         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2342         if (global_trace.stop_count++)
2343                 goto out;
2344
2345         /* Prevent the buffers from switching */
2346         arch_spin_lock(&global_trace.max_lock);
2347
2348         buffer = global_trace.array_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_disable(buffer);
2351
2352 #ifdef CONFIG_TRACER_MAX_TRACE
2353         buffer = global_trace.max_buffer.buffer;
2354         if (buffer)
2355                 ring_buffer_record_disable(buffer);
2356 #endif
2357
2358         arch_spin_unlock(&global_trace.max_lock);
2359
2360  out:
2361         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2362 }
2363
2364 static void tracing_stop_tr(struct trace_array *tr)
2365 {
2366         struct trace_buffer *buffer;
2367         unsigned long flags;
2368
2369         /* If global, we need to also stop the max tracer */
2370         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2371                 return tracing_stop();
2372
2373         raw_spin_lock_irqsave(&tr->start_lock, flags);
2374         if (tr->stop_count++)
2375                 goto out;
2376
2377         buffer = tr->array_buffer.buffer;
2378         if (buffer)
2379                 ring_buffer_record_disable(buffer);
2380
2381  out:
2382         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2383 }
2384
2385 static int trace_save_cmdline(struct task_struct *tsk)
2386 {
2387         unsigned tpid, idx;
2388
2389         /* treat recording of idle task as a success */
2390         if (!tsk->pid)
2391                 return 1;
2392
2393         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2394
2395         /*
2396          * It's not the end of the world if we don't get
2397          * the lock, but we also don't want to spin
2398          * nor do we want to disable interrupts,
2399          * so if we miss here, then better luck next time.
2400          */
2401         if (!arch_spin_trylock(&trace_cmdline_lock))
2402                 return 0;
2403
2404         idx = savedcmd->map_pid_to_cmdline[tpid];
2405         if (idx == NO_CMDLINE_MAP) {
2406                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2407
2408                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2409                 savedcmd->cmdline_idx = idx;
2410         }
2411
2412         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2413         set_cmdline(idx, tsk->comm);
2414
2415         arch_spin_unlock(&trace_cmdline_lock);
2416
2417         return 1;
2418 }
2419
2420 static void __trace_find_cmdline(int pid, char comm[])
2421 {
2422         unsigned map;
2423         int tpid;
2424
2425         if (!pid) {
2426                 strcpy(comm, "<idle>");
2427                 return;
2428         }
2429
2430         if (WARN_ON_ONCE(pid < 0)) {
2431                 strcpy(comm, "<XXX>");
2432                 return;
2433         }
2434
2435         tpid = pid & (PID_MAX_DEFAULT - 1);
2436         map = savedcmd->map_pid_to_cmdline[tpid];
2437         if (map != NO_CMDLINE_MAP) {
2438                 tpid = savedcmd->map_cmdline_to_pid[map];
2439                 if (tpid == pid) {
2440                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2441                         return;
2442                 }
2443         }
2444         strcpy(comm, "<...>");
2445 }
2446
2447 void trace_find_cmdline(int pid, char comm[])
2448 {
2449         preempt_disable();
2450         arch_spin_lock(&trace_cmdline_lock);
2451
2452         __trace_find_cmdline(pid, comm);
2453
2454         arch_spin_unlock(&trace_cmdline_lock);
2455         preempt_enable();
2456 }
2457
2458 static int *trace_find_tgid_ptr(int pid)
2459 {
2460         /*
2461          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2462          * if we observe a non-NULL tgid_map then we also observe the correct
2463          * tgid_map_max.
2464          */
2465         int *map = smp_load_acquire(&tgid_map);
2466
2467         if (unlikely(!map || pid > tgid_map_max))
2468                 return NULL;
2469
2470         return &map[pid];
2471 }
2472
2473 int trace_find_tgid(int pid)
2474 {
2475         int *ptr = trace_find_tgid_ptr(pid);
2476
2477         return ptr ? *ptr : 0;
2478 }
2479
2480 static int trace_save_tgid(struct task_struct *tsk)
2481 {
2482         int *ptr;
2483
2484         /* treat recording of idle task as a success */
2485         if (!tsk->pid)
2486                 return 1;
2487
2488         ptr = trace_find_tgid_ptr(tsk->pid);
2489         if (!ptr)
2490                 return 0;
2491
2492         *ptr = tsk->tgid;
2493         return 1;
2494 }
2495
2496 static bool tracing_record_taskinfo_skip(int flags)
2497 {
2498         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2499                 return true;
2500         if (!__this_cpu_read(trace_taskinfo_save))
2501                 return true;
2502         return false;
2503 }
2504
2505 /**
2506  * tracing_record_taskinfo - record the task info of a task
2507  *
2508  * @task:  task to record
2509  * @flags: TRACE_RECORD_CMDLINE for recording comm
2510  *         TRACE_RECORD_TGID for recording tgid
2511  */
2512 void tracing_record_taskinfo(struct task_struct *task, int flags)
2513 {
2514         bool done;
2515
2516         if (tracing_record_taskinfo_skip(flags))
2517                 return;
2518
2519         /*
2520          * Record as much task information as possible. If some fail, continue
2521          * to try to record the others.
2522          */
2523         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2524         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2525
2526         /* If recording any information failed, retry again soon. */
2527         if (!done)
2528                 return;
2529
2530         __this_cpu_write(trace_taskinfo_save, false);
2531 }
2532
2533 /**
2534  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2535  *
2536  * @prev: previous task during sched_switch
2537  * @next: next task during sched_switch
2538  * @flags: TRACE_RECORD_CMDLINE for recording comm
2539  *         TRACE_RECORD_TGID for recording tgid
2540  */
2541 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2542                                           struct task_struct *next, int flags)
2543 {
2544         bool done;
2545
2546         if (tracing_record_taskinfo_skip(flags))
2547                 return;
2548
2549         /*
2550          * Record as much task information as possible. If some fail, continue
2551          * to try to record the others.
2552          */
2553         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2554         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2555         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2556         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2557
2558         /* If recording any information failed, retry again soon. */
2559         if (!done)
2560                 return;
2561
2562         __this_cpu_write(trace_taskinfo_save, false);
2563 }
2564
2565 /* Helpers to record a specific task information */
2566 void tracing_record_cmdline(struct task_struct *task)
2567 {
2568         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2569 }
2570
2571 void tracing_record_tgid(struct task_struct *task)
2572 {
2573         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2574 }
2575
2576 /*
2577  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579  * simplifies those functions and keeps them in sync.
2580  */
2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583         return trace_seq_has_overflowed(s) ?
2584                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587
2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591         return current->migration_disabled;
2592 #else
2593         return 0;
2594 #endif
2595 }
2596
2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599         unsigned int trace_flags = irqs_status;
2600         unsigned int pc;
2601
2602         pc = preempt_count();
2603
2604         if (pc & NMI_MASK)
2605                 trace_flags |= TRACE_FLAG_NMI;
2606         if (pc & HARDIRQ_MASK)
2607                 trace_flags |= TRACE_FLAG_HARDIRQ;
2608         if (in_serving_softirq())
2609                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2610         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611                 trace_flags |= TRACE_FLAG_BH_OFF;
2612
2613         if (tif_need_resched())
2614                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615         if (test_preempt_need_resched())
2616                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2618                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2619 }
2620
2621 struct ring_buffer_event *
2622 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2623                           int type,
2624                           unsigned long len,
2625                           unsigned int trace_ctx)
2626 {
2627         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2628 }
2629
2630 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2631 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2632 static int trace_buffered_event_ref;
2633
2634 /**
2635  * trace_buffered_event_enable - enable buffering events
2636  *
2637  * When events are being filtered, it is quicker to use a temporary
2638  * buffer to write the event data into if there's a likely chance
2639  * that it will not be committed. The discard of the ring buffer
2640  * is not as fast as committing, and is much slower than copying
2641  * a commit.
2642  *
2643  * When an event is to be filtered, allocate per cpu buffers to
2644  * write the event data into, and if the event is filtered and discarded
2645  * it is simply dropped, otherwise, the entire data is to be committed
2646  * in one shot.
2647  */
2648 void trace_buffered_event_enable(void)
2649 {
2650         struct ring_buffer_event *event;
2651         struct page *page;
2652         int cpu;
2653
2654         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2655
2656         if (trace_buffered_event_ref++)
2657                 return;
2658
2659         for_each_tracing_cpu(cpu) {
2660                 page = alloc_pages_node(cpu_to_node(cpu),
2661                                         GFP_KERNEL | __GFP_NORETRY, 0);
2662                 if (!page)
2663                         goto failed;
2664
2665                 event = page_address(page);
2666                 memset(event, 0, sizeof(*event));
2667
2668                 per_cpu(trace_buffered_event, cpu) = event;
2669
2670                 preempt_disable();
2671                 if (cpu == smp_processor_id() &&
2672                     __this_cpu_read(trace_buffered_event) !=
2673                     per_cpu(trace_buffered_event, cpu))
2674                         WARN_ON_ONCE(1);
2675                 preempt_enable();
2676         }
2677
2678         return;
2679  failed:
2680         trace_buffered_event_disable();
2681 }
2682
2683 static void enable_trace_buffered_event(void *data)
2684 {
2685         /* Probably not needed, but do it anyway */
2686         smp_rmb();
2687         this_cpu_dec(trace_buffered_event_cnt);
2688 }
2689
2690 static void disable_trace_buffered_event(void *data)
2691 {
2692         this_cpu_inc(trace_buffered_event_cnt);
2693 }
2694
2695 /**
2696  * trace_buffered_event_disable - disable buffering events
2697  *
2698  * When a filter is removed, it is faster to not use the buffered
2699  * events, and to commit directly into the ring buffer. Free up
2700  * the temp buffers when there are no more users. This requires
2701  * special synchronization with current events.
2702  */
2703 void trace_buffered_event_disable(void)
2704 {
2705         int cpu;
2706
2707         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2708
2709         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2710                 return;
2711
2712         if (--trace_buffered_event_ref)
2713                 return;
2714
2715         preempt_disable();
2716         /* For each CPU, set the buffer as used. */
2717         smp_call_function_many(tracing_buffer_mask,
2718                                disable_trace_buffered_event, NULL, 1);
2719         preempt_enable();
2720
2721         /* Wait for all current users to finish */
2722         synchronize_rcu();
2723
2724         for_each_tracing_cpu(cpu) {
2725                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2726                 per_cpu(trace_buffered_event, cpu) = NULL;
2727         }
2728         /*
2729          * Make sure trace_buffered_event is NULL before clearing
2730          * trace_buffered_event_cnt.
2731          */
2732         smp_wmb();
2733
2734         preempt_disable();
2735         /* Do the work on each cpu */
2736         smp_call_function_many(tracing_buffer_mask,
2737                                enable_trace_buffered_event, NULL, 1);
2738         preempt_enable();
2739 }
2740
2741 static struct trace_buffer *temp_buffer;
2742
2743 struct ring_buffer_event *
2744 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2745                           struct trace_event_file *trace_file,
2746                           int type, unsigned long len,
2747                           unsigned int trace_ctx)
2748 {
2749         struct ring_buffer_event *entry;
2750         struct trace_array *tr = trace_file->tr;
2751         int val;
2752
2753         *current_rb = tr->array_buffer.buffer;
2754
2755         if (!tr->no_filter_buffering_ref &&
2756             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2757                 preempt_disable_notrace();
2758                 /*
2759                  * Filtering is on, so try to use the per cpu buffer first.
2760                  * This buffer will simulate a ring_buffer_event,
2761                  * where the type_len is zero and the array[0] will
2762                  * hold the full length.
2763                  * (see include/linux/ring-buffer.h for details on
2764                  *  how the ring_buffer_event is structured).
2765                  *
2766                  * Using a temp buffer during filtering and copying it
2767                  * on a matched filter is quicker than writing directly
2768                  * into the ring buffer and then discarding it when
2769                  * it doesn't match. That is because the discard
2770                  * requires several atomic operations to get right.
2771                  * Copying on match and doing nothing on a failed match
2772                  * is still quicker than no copy on match, but having
2773                  * to discard out of the ring buffer on a failed match.
2774                  */
2775                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2776                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2777
2778                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2779
2780                         /*
2781                          * Preemption is disabled, but interrupts and NMIs
2782                          * can still come in now. If that happens after
2783                          * the above increment, then it will have to go
2784                          * back to the old method of allocating the event
2785                          * on the ring buffer, and if the filter fails, it
2786                          * will have to call ring_buffer_discard_commit()
2787                          * to remove it.
2788                          *
2789                          * Need to also check the unlikely case that the
2790                          * length is bigger than the temp buffer size.
2791                          * If that happens, then the reserve is pretty much
2792                          * guaranteed to fail, as the ring buffer currently
2793                          * only allows events less than a page. But that may
2794                          * change in the future, so let the ring buffer reserve
2795                          * handle the failure in that case.
2796                          */
2797                         if (val == 1 && likely(len <= max_len)) {
2798                                 trace_event_setup(entry, type, trace_ctx);
2799                                 entry->array[0] = len;
2800                                 /* Return with preemption disabled */
2801                                 return entry;
2802                         }
2803                         this_cpu_dec(trace_buffered_event_cnt);
2804                 }
2805                 /* __trace_buffer_lock_reserve() disables preemption */
2806                 preempt_enable_notrace();
2807         }
2808
2809         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810                                             trace_ctx);
2811         /*
2812          * If tracing is off, but we have triggers enabled
2813          * we still need to look at the event data. Use the temp_buffer
2814          * to store the trace event for the trigger to use. It's recursive
2815          * safe and will not be recorded anywhere.
2816          */
2817         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2818                 *current_rb = temp_buffer;
2819                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820                                                     trace_ctx);
2821         }
2822         return entry;
2823 }
2824 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2825
2826 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2827 static DEFINE_MUTEX(tracepoint_printk_mutex);
2828
2829 static void output_printk(struct trace_event_buffer *fbuffer)
2830 {
2831         struct trace_event_call *event_call;
2832         struct trace_event_file *file;
2833         struct trace_event *event;
2834         unsigned long flags;
2835         struct trace_iterator *iter = tracepoint_print_iter;
2836
2837         /* We should never get here if iter is NULL */
2838         if (WARN_ON_ONCE(!iter))
2839                 return;
2840
2841         event_call = fbuffer->trace_file->event_call;
2842         if (!event_call || !event_call->event.funcs ||
2843             !event_call->event.funcs->trace)
2844                 return;
2845
2846         file = fbuffer->trace_file;
2847         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2848             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2849              !filter_match_preds(file->filter, fbuffer->entry)))
2850                 return;
2851
2852         event = &fbuffer->trace_file->event_call->event;
2853
2854         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2855         trace_seq_init(&iter->seq);
2856         iter->ent = fbuffer->entry;
2857         event_call->event.funcs->trace(iter, 0, event);
2858         trace_seq_putc(&iter->seq, 0);
2859         printk("%s", iter->seq.buffer);
2860
2861         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2862 }
2863
2864 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2865                              void *buffer, size_t *lenp,
2866                              loff_t *ppos)
2867 {
2868         int save_tracepoint_printk;
2869         int ret;
2870
2871         mutex_lock(&tracepoint_printk_mutex);
2872         save_tracepoint_printk = tracepoint_printk;
2873
2874         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2875
2876         /*
2877          * This will force exiting early, as tracepoint_printk
2878          * is always zero when tracepoint_printk_iter is not allocated
2879          */
2880         if (!tracepoint_print_iter)
2881                 tracepoint_printk = 0;
2882
2883         if (save_tracepoint_printk == tracepoint_printk)
2884                 goto out;
2885
2886         if (tracepoint_printk)
2887                 static_key_enable(&tracepoint_printk_key.key);
2888         else
2889                 static_key_disable(&tracepoint_printk_key.key);
2890
2891  out:
2892         mutex_unlock(&tracepoint_printk_mutex);
2893
2894         return ret;
2895 }
2896
2897 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2898 {
2899         enum event_trigger_type tt = ETT_NONE;
2900         struct trace_event_file *file = fbuffer->trace_file;
2901
2902         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2903                         fbuffer->entry, &tt))
2904                 goto discard;
2905
2906         if (static_key_false(&tracepoint_printk_key.key))
2907                 output_printk(fbuffer);
2908
2909         if (static_branch_unlikely(&trace_event_exports_enabled))
2910                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2911
2912         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2913                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2914
2915 discard:
2916         if (tt)
2917                 event_triggers_post_call(file, tt);
2918
2919 }
2920 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2921
2922 /*
2923  * Skip 3:
2924  *
2925  *   trace_buffer_unlock_commit_regs()
2926  *   trace_event_buffer_commit()
2927  *   trace_event_raw_event_xxx()
2928  */
2929 # define STACK_SKIP 3
2930
2931 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2932                                      struct trace_buffer *buffer,
2933                                      struct ring_buffer_event *event,
2934                                      unsigned int trace_ctx,
2935                                      struct pt_regs *regs)
2936 {
2937         __buffer_unlock_commit(buffer, event);
2938
2939         /*
2940          * If regs is not set, then skip the necessary functions.
2941          * Note, we can still get here via blktrace, wakeup tracer
2942          * and mmiotrace, but that's ok if they lose a function or
2943          * two. They are not that meaningful.
2944          */
2945         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2946         ftrace_trace_userstack(tr, buffer, trace_ctx);
2947 }
2948
2949 /*
2950  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2951  */
2952 void
2953 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2954                                    struct ring_buffer_event *event)
2955 {
2956         __buffer_unlock_commit(buffer, event);
2957 }
2958
2959 void
2960 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2961                parent_ip, unsigned int trace_ctx)
2962 {
2963         struct trace_event_call *call = &event_function;
2964         struct trace_buffer *buffer = tr->array_buffer.buffer;
2965         struct ring_buffer_event *event;
2966         struct ftrace_entry *entry;
2967
2968         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2969                                             trace_ctx);
2970         if (!event)
2971                 return;
2972         entry   = ring_buffer_event_data(event);
2973         entry->ip                       = ip;
2974         entry->parent_ip                = parent_ip;
2975
2976         if (!call_filter_check_discard(call, entry, buffer, event)) {
2977                 if (static_branch_unlikely(&trace_function_exports_enabled))
2978                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2979                 __buffer_unlock_commit(buffer, event);
2980         }
2981 }
2982
2983 #ifdef CONFIG_STACKTRACE
2984
2985 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2986 #define FTRACE_KSTACK_NESTING   4
2987
2988 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2989
2990 struct ftrace_stack {
2991         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2992 };
2993
2994
2995 struct ftrace_stacks {
2996         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2997 };
2998
2999 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3000 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3001
3002 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3003                                  unsigned int trace_ctx,
3004                                  int skip, struct pt_regs *regs)
3005 {
3006         struct trace_event_call *call = &event_kernel_stack;
3007         struct ring_buffer_event *event;
3008         unsigned int size, nr_entries;
3009         struct ftrace_stack *fstack;
3010         struct stack_entry *entry;
3011         int stackidx;
3012
3013         /*
3014          * Add one, for this function and the call to save_stack_trace()
3015          * If regs is set, then these functions will not be in the way.
3016          */
3017 #ifndef CONFIG_UNWINDER_ORC
3018         if (!regs)
3019                 skip++;
3020 #endif
3021
3022         preempt_disable_notrace();
3023
3024         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3025
3026         /* This should never happen. If it does, yell once and skip */
3027         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3028                 goto out;
3029
3030         /*
3031          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3032          * interrupt will either see the value pre increment or post
3033          * increment. If the interrupt happens pre increment it will have
3034          * restored the counter when it returns.  We just need a barrier to
3035          * keep gcc from moving things around.
3036          */
3037         barrier();
3038
3039         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3040         size = ARRAY_SIZE(fstack->calls);
3041
3042         if (regs) {
3043                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3044                                                    size, skip);
3045         } else {
3046                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3047         }
3048
3049         size = nr_entries * sizeof(unsigned long);
3050         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3051                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3052                                     trace_ctx);
3053         if (!event)
3054                 goto out;
3055         entry = ring_buffer_event_data(event);
3056
3057         memcpy(&entry->caller, fstack->calls, size);
3058         entry->size = nr_entries;
3059
3060         if (!call_filter_check_discard(call, entry, buffer, event))
3061                 __buffer_unlock_commit(buffer, event);
3062
3063  out:
3064         /* Again, don't let gcc optimize things here */
3065         barrier();
3066         __this_cpu_dec(ftrace_stack_reserve);
3067         preempt_enable_notrace();
3068
3069 }
3070
3071 static inline void ftrace_trace_stack(struct trace_array *tr,
3072                                       struct trace_buffer *buffer,
3073                                       unsigned int trace_ctx,
3074                                       int skip, struct pt_regs *regs)
3075 {
3076         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3077                 return;
3078
3079         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3080 }
3081
3082 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3083                    int skip)
3084 {
3085         struct trace_buffer *buffer = tr->array_buffer.buffer;
3086
3087         if (rcu_is_watching()) {
3088                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3089                 return;
3090         }
3091
3092         /*
3093          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3094          * but if the above rcu_is_watching() failed, then the NMI
3095          * triggered someplace critical, and rcu_irq_enter() should
3096          * not be called from NMI.
3097          */
3098         if (unlikely(in_nmi()))
3099                 return;
3100
3101         rcu_irq_enter_irqson();
3102         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3103         rcu_irq_exit_irqson();
3104 }
3105
3106 /**
3107  * trace_dump_stack - record a stack back trace in the trace buffer
3108  * @skip: Number of functions to skip (helper handlers)
3109  */
3110 void trace_dump_stack(int skip)
3111 {
3112         if (tracing_disabled || tracing_selftest_running)
3113                 return;
3114
3115 #ifndef CONFIG_UNWINDER_ORC
3116         /* Skip 1 to skip this function. */
3117         skip++;
3118 #endif
3119         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3120                              tracing_gen_ctx(), skip, NULL);
3121 }
3122 EXPORT_SYMBOL_GPL(trace_dump_stack);
3123
3124 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3125 static DEFINE_PER_CPU(int, user_stack_count);
3126
3127 static void
3128 ftrace_trace_userstack(struct trace_array *tr,
3129                        struct trace_buffer *buffer, unsigned int trace_ctx)
3130 {
3131         struct trace_event_call *call = &event_user_stack;
3132         struct ring_buffer_event *event;
3133         struct userstack_entry *entry;
3134
3135         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3136                 return;
3137
3138         /*
3139          * NMIs can not handle page faults, even with fix ups.
3140          * The save user stack can (and often does) fault.
3141          */
3142         if (unlikely(in_nmi()))
3143                 return;
3144
3145         /*
3146          * prevent recursion, since the user stack tracing may
3147          * trigger other kernel events.
3148          */
3149         preempt_disable();
3150         if (__this_cpu_read(user_stack_count))
3151                 goto out;
3152
3153         __this_cpu_inc(user_stack_count);
3154
3155         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3156                                             sizeof(*entry), trace_ctx);
3157         if (!event)
3158                 goto out_drop_count;
3159         entry   = ring_buffer_event_data(event);
3160
3161         entry->tgid             = current->tgid;
3162         memset(&entry->caller, 0, sizeof(entry->caller));
3163
3164         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3165         if (!call_filter_check_discard(call, entry, buffer, event))
3166                 __buffer_unlock_commit(buffer, event);
3167
3168  out_drop_count:
3169         __this_cpu_dec(user_stack_count);
3170  out:
3171         preempt_enable();
3172 }
3173 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3174 static void ftrace_trace_userstack(struct trace_array *tr,
3175                                    struct trace_buffer *buffer,
3176                                    unsigned int trace_ctx)
3177 {
3178 }
3179 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3180
3181 #endif /* CONFIG_STACKTRACE */
3182
3183 static inline void
3184 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3185                           unsigned long long delta)
3186 {
3187         entry->bottom_delta_ts = delta & U32_MAX;
3188         entry->top_delta_ts = (delta >> 32);
3189 }
3190
3191 void trace_last_func_repeats(struct trace_array *tr,
3192                              struct trace_func_repeats *last_info,
3193                              unsigned int trace_ctx)
3194 {
3195         struct trace_buffer *buffer = tr->array_buffer.buffer;
3196         struct func_repeats_entry *entry;
3197         struct ring_buffer_event *event;
3198         u64 delta;
3199
3200         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3201                                             sizeof(*entry), trace_ctx);
3202         if (!event)
3203                 return;
3204
3205         delta = ring_buffer_event_time_stamp(buffer, event) -
3206                 last_info->ts_last_call;
3207
3208         entry = ring_buffer_event_data(event);
3209         entry->ip = last_info->ip;
3210         entry->parent_ip = last_info->parent_ip;
3211         entry->count = last_info->count;
3212         func_repeats_set_delta_ts(entry, delta);
3213
3214         __buffer_unlock_commit(buffer, event);
3215 }
3216
3217 /* created for use with alloc_percpu */
3218 struct trace_buffer_struct {
3219         int nesting;
3220         char buffer[4][TRACE_BUF_SIZE];
3221 };
3222
3223 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3224
3225 /*
3226  * This allows for lockless recording.  If we're nested too deeply, then
3227  * this returns NULL.
3228  */
3229 static char *get_trace_buf(void)
3230 {
3231         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3232
3233         if (!trace_percpu_buffer || buffer->nesting >= 4)
3234                 return NULL;
3235
3236         buffer->nesting++;
3237
3238         /* Interrupts must see nesting incremented before we use the buffer */
3239         barrier();
3240         return &buffer->buffer[buffer->nesting - 1][0];
3241 }
3242
3243 static void put_trace_buf(void)
3244 {
3245         /* Don't let the decrement of nesting leak before this */
3246         barrier();
3247         this_cpu_dec(trace_percpu_buffer->nesting);
3248 }
3249
3250 static int alloc_percpu_trace_buffer(void)
3251 {
3252         struct trace_buffer_struct __percpu *buffers;
3253
3254         if (trace_percpu_buffer)
3255                 return 0;
3256
3257         buffers = alloc_percpu(struct trace_buffer_struct);
3258         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3259                 return -ENOMEM;
3260
3261         trace_percpu_buffer = buffers;
3262         return 0;
3263 }
3264
3265 static int buffers_allocated;
3266
3267 void trace_printk_init_buffers(void)
3268 {
3269         if (buffers_allocated)
3270                 return;
3271
3272         if (alloc_percpu_trace_buffer())
3273                 return;
3274
3275         /* trace_printk() is for debug use only. Don't use it in production. */
3276
3277         pr_warn("\n");
3278         pr_warn("**********************************************************\n");
3279         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3280         pr_warn("**                                                      **\n");
3281         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3282         pr_warn("**                                                      **\n");
3283         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3284         pr_warn("** unsafe for production use.                           **\n");
3285         pr_warn("**                                                      **\n");
3286         pr_warn("** If you see this message and you are not debugging    **\n");
3287         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3288         pr_warn("**                                                      **\n");
3289         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290         pr_warn("**********************************************************\n");
3291
3292         /* Expand the buffers to set size */
3293         tracing_update_buffers();
3294
3295         buffers_allocated = 1;
3296
3297         /*
3298          * trace_printk_init_buffers() can be called by modules.
3299          * If that happens, then we need to start cmdline recording
3300          * directly here. If the global_trace.buffer is already
3301          * allocated here, then this was called by module code.
3302          */
3303         if (global_trace.array_buffer.buffer)
3304                 tracing_start_cmdline_record();
3305 }
3306 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3307
3308 void trace_printk_start_comm(void)
3309 {
3310         /* Start tracing comms if trace printk is set */
3311         if (!buffers_allocated)
3312                 return;
3313         tracing_start_cmdline_record();
3314 }
3315
3316 static void trace_printk_start_stop_comm(int enabled)
3317 {
3318         if (!buffers_allocated)
3319                 return;
3320
3321         if (enabled)
3322                 tracing_start_cmdline_record();
3323         else
3324                 tracing_stop_cmdline_record();
3325 }
3326
3327 /**
3328  * trace_vbprintk - write binary msg to tracing buffer
3329  * @ip:    The address of the caller
3330  * @fmt:   The string format to write to the buffer
3331  * @args:  Arguments for @fmt
3332  */
3333 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3334 {
3335         struct trace_event_call *call = &event_bprint;
3336         struct ring_buffer_event *event;
3337         struct trace_buffer *buffer;
3338         struct trace_array *tr = &global_trace;
3339         struct bprint_entry *entry;
3340         unsigned int trace_ctx;
3341         char *tbuffer;
3342         int len = 0, size;
3343
3344         if (unlikely(tracing_selftest_running || tracing_disabled))
3345                 return 0;
3346
3347         /* Don't pollute graph traces with trace_vprintk internals */
3348         pause_graph_tracing();
3349
3350         trace_ctx = tracing_gen_ctx();
3351         preempt_disable_notrace();
3352
3353         tbuffer = get_trace_buf();
3354         if (!tbuffer) {
3355                 len = 0;
3356                 goto out_nobuffer;
3357         }
3358
3359         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3360
3361         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3362                 goto out_put;
3363
3364         size = sizeof(*entry) + sizeof(u32) * len;
3365         buffer = tr->array_buffer.buffer;
3366         ring_buffer_nest_start(buffer);
3367         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3368                                             trace_ctx);
3369         if (!event)
3370                 goto out;
3371         entry = ring_buffer_event_data(event);
3372         entry->ip                       = ip;
3373         entry->fmt                      = fmt;
3374
3375         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3376         if (!call_filter_check_discard(call, entry, buffer, event)) {
3377                 __buffer_unlock_commit(buffer, event);
3378                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3379         }
3380
3381 out:
3382         ring_buffer_nest_end(buffer);
3383 out_put:
3384         put_trace_buf();
3385
3386 out_nobuffer:
3387         preempt_enable_notrace();
3388         unpause_graph_tracing();
3389
3390         return len;
3391 }
3392 EXPORT_SYMBOL_GPL(trace_vbprintk);
3393
3394 __printf(3, 0)
3395 static int
3396 __trace_array_vprintk(struct trace_buffer *buffer,
3397                       unsigned long ip, const char *fmt, va_list args)
3398 {
3399         struct trace_event_call *call = &event_print;
3400         struct ring_buffer_event *event;
3401         int len = 0, size;
3402         struct print_entry *entry;
3403         unsigned int trace_ctx;
3404         char *tbuffer;
3405
3406         if (tracing_disabled || tracing_selftest_running)
3407                 return 0;
3408
3409         /* Don't pollute graph traces with trace_vprintk internals */
3410         pause_graph_tracing();
3411
3412         trace_ctx = tracing_gen_ctx();
3413         preempt_disable_notrace();
3414
3415
3416         tbuffer = get_trace_buf();
3417         if (!tbuffer) {
3418                 len = 0;
3419                 goto out_nobuffer;
3420         }
3421
3422         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3423
3424         size = sizeof(*entry) + len + 1;
3425         ring_buffer_nest_start(buffer);
3426         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3427                                             trace_ctx);
3428         if (!event)
3429                 goto out;
3430         entry = ring_buffer_event_data(event);
3431         entry->ip = ip;
3432
3433         memcpy(&entry->buf, tbuffer, len + 1);
3434         if (!call_filter_check_discard(call, entry, buffer, event)) {
3435                 __buffer_unlock_commit(buffer, event);
3436                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3437         }
3438
3439 out:
3440         ring_buffer_nest_end(buffer);
3441         put_trace_buf();
3442
3443 out_nobuffer:
3444         preempt_enable_notrace();
3445         unpause_graph_tracing();
3446
3447         return len;
3448 }
3449
3450 __printf(3, 0)
3451 int trace_array_vprintk(struct trace_array *tr,
3452                         unsigned long ip, const char *fmt, va_list args)
3453 {
3454         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3455 }
3456
3457 /**
3458  * trace_array_printk - Print a message to a specific instance
3459  * @tr: The instance trace_array descriptor
3460  * @ip: The instruction pointer that this is called from.
3461  * @fmt: The format to print (printf format)
3462  *
3463  * If a subsystem sets up its own instance, they have the right to
3464  * printk strings into their tracing instance buffer using this
3465  * function. Note, this function will not write into the top level
3466  * buffer (use trace_printk() for that), as writing into the top level
3467  * buffer should only have events that can be individually disabled.
3468  * trace_printk() is only used for debugging a kernel, and should not
3469  * be ever incorporated in normal use.
3470  *
3471  * trace_array_printk() can be used, as it will not add noise to the
3472  * top level tracing buffer.
3473  *
3474  * Note, trace_array_init_printk() must be called on @tr before this
3475  * can be used.
3476  */
3477 __printf(3, 0)
3478 int trace_array_printk(struct trace_array *tr,
3479                        unsigned long ip, const char *fmt, ...)
3480 {
3481         int ret;
3482         va_list ap;
3483
3484         if (!tr)
3485                 return -ENOENT;
3486
3487         /* This is only allowed for created instances */
3488         if (tr == &global_trace)
3489                 return 0;
3490
3491         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3492                 return 0;
3493
3494         va_start(ap, fmt);
3495         ret = trace_array_vprintk(tr, ip, fmt, ap);
3496         va_end(ap);
3497         return ret;
3498 }
3499 EXPORT_SYMBOL_GPL(trace_array_printk);
3500
3501 /**
3502  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3503  * @tr: The trace array to initialize the buffers for
3504  *
3505  * As trace_array_printk() only writes into instances, they are OK to
3506  * have in the kernel (unlike trace_printk()). This needs to be called
3507  * before trace_array_printk() can be used on a trace_array.
3508  */
3509 int trace_array_init_printk(struct trace_array *tr)
3510 {
3511         if (!tr)
3512                 return -ENOENT;
3513
3514         /* This is only allowed for created instances */
3515         if (tr == &global_trace)
3516                 return -EINVAL;
3517
3518         return alloc_percpu_trace_buffer();
3519 }
3520 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3521
3522 __printf(3, 4)
3523 int trace_array_printk_buf(struct trace_buffer *buffer,
3524                            unsigned long ip, const char *fmt, ...)
3525 {
3526         int ret;
3527         va_list ap;
3528
3529         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3530                 return 0;
3531
3532         va_start(ap, fmt);
3533         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3534         va_end(ap);
3535         return ret;
3536 }
3537
3538 __printf(2, 0)
3539 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3540 {
3541         return trace_array_vprintk(&global_trace, ip, fmt, args);
3542 }
3543 EXPORT_SYMBOL_GPL(trace_vprintk);
3544
3545 static void trace_iterator_increment(struct trace_iterator *iter)
3546 {
3547         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3548
3549         iter->idx++;
3550         if (buf_iter)
3551                 ring_buffer_iter_advance(buf_iter);
3552 }
3553
3554 static struct trace_entry *
3555 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3556                 unsigned long *lost_events)
3557 {
3558         struct ring_buffer_event *event;
3559         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3560
3561         if (buf_iter) {
3562                 event = ring_buffer_iter_peek(buf_iter, ts);
3563                 if (lost_events)
3564                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3565                                 (unsigned long)-1 : 0;
3566         } else {
3567                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3568                                          lost_events);
3569         }
3570
3571         if (event) {
3572                 iter->ent_size = ring_buffer_event_length(event);
3573                 return ring_buffer_event_data(event);
3574         }
3575         iter->ent_size = 0;
3576         return NULL;
3577 }
3578
3579 static struct trace_entry *
3580 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3581                   unsigned long *missing_events, u64 *ent_ts)
3582 {
3583         struct trace_buffer *buffer = iter->array_buffer->buffer;
3584         struct trace_entry *ent, *next = NULL;
3585         unsigned long lost_events = 0, next_lost = 0;
3586         int cpu_file = iter->cpu_file;
3587         u64 next_ts = 0, ts;
3588         int next_cpu = -1;
3589         int next_size = 0;
3590         int cpu;
3591
3592         /*
3593          * If we are in a per_cpu trace file, don't bother by iterating over
3594          * all cpu and peek directly.
3595          */
3596         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3597                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3598                         return NULL;
3599                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3600                 if (ent_cpu)
3601                         *ent_cpu = cpu_file;
3602
3603                 return ent;
3604         }
3605
3606         for_each_tracing_cpu(cpu) {
3607
3608                 if (ring_buffer_empty_cpu(buffer, cpu))
3609                         continue;
3610
3611                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3612
3613                 /*
3614                  * Pick the entry with the smallest timestamp:
3615                  */
3616                 if (ent && (!next || ts < next_ts)) {
3617                         next = ent;
3618                         next_cpu = cpu;
3619                         next_ts = ts;
3620                         next_lost = lost_events;
3621                         next_size = iter->ent_size;
3622                 }
3623         }
3624
3625         iter->ent_size = next_size;
3626
3627         if (ent_cpu)
3628                 *ent_cpu = next_cpu;
3629
3630         if (ent_ts)
3631                 *ent_ts = next_ts;
3632
3633         if (missing_events)
3634                 *missing_events = next_lost;
3635
3636         return next;
3637 }
3638
3639 #define STATIC_FMT_BUF_SIZE     128
3640 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3641
3642 static char *trace_iter_expand_format(struct trace_iterator *iter)
3643 {
3644         char *tmp;
3645
3646         /*
3647          * iter->tr is NULL when used with tp_printk, which makes
3648          * this get called where it is not safe to call krealloc().
3649          */
3650         if (!iter->tr || iter->fmt == static_fmt_buf)
3651                 return NULL;
3652
3653         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3654                        GFP_KERNEL);
3655         if (tmp) {
3656                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3657                 iter->fmt = tmp;
3658         }
3659
3660         return tmp;
3661 }
3662
3663 /* Returns true if the string is safe to dereference from an event */
3664 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3665 {
3666         unsigned long addr = (unsigned long)str;
3667         struct trace_event *trace_event;
3668         struct trace_event_call *event;
3669
3670         /* OK if part of the event data */
3671         if ((addr >= (unsigned long)iter->ent) &&
3672             (addr < (unsigned long)iter->ent + iter->ent_size))
3673                 return true;
3674
3675         /* OK if part of the temp seq buffer */
3676         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3677             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3678                 return true;
3679
3680         /* Core rodata can not be freed */
3681         if (is_kernel_rodata(addr))
3682                 return true;
3683
3684         if (trace_is_tracepoint_string(str))
3685                 return true;
3686
3687         /*
3688          * Now this could be a module event, referencing core module
3689          * data, which is OK.
3690          */
3691         if (!iter->ent)
3692                 return false;
3693
3694         trace_event = ftrace_find_event(iter->ent->type);
3695         if (!trace_event)
3696                 return false;
3697
3698         event = container_of(trace_event, struct trace_event_call, event);
3699         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3700                 return false;
3701
3702         /* Would rather have rodata, but this will suffice */
3703         if (within_module_core(addr, event->module))
3704                 return true;
3705
3706         return false;
3707 }
3708
3709 static const char *show_buffer(struct trace_seq *s)
3710 {
3711         struct seq_buf *seq = &s->seq;
3712
3713         seq_buf_terminate(seq);
3714
3715         return seq->buffer;
3716 }
3717
3718 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3719
3720 static int test_can_verify_check(const char *fmt, ...)
3721 {
3722         char buf[16];
3723         va_list ap;
3724         int ret;
3725
3726         /*
3727          * The verifier is dependent on vsnprintf() modifies the va_list
3728          * passed to it, where it is sent as a reference. Some architectures
3729          * (like x86_32) passes it by value, which means that vsnprintf()
3730          * does not modify the va_list passed to it, and the verifier
3731          * would then need to be able to understand all the values that
3732          * vsnprintf can use. If it is passed by value, then the verifier
3733          * is disabled.
3734          */
3735         va_start(ap, fmt);
3736         vsnprintf(buf, 16, "%d", ap);
3737         ret = va_arg(ap, int);
3738         va_end(ap);
3739
3740         return ret;
3741 }
3742
3743 static void test_can_verify(void)
3744 {
3745         if (!test_can_verify_check("%d %d", 0, 1)) {
3746                 pr_info("trace event string verifier disabled\n");
3747                 static_branch_inc(&trace_no_verify);
3748         }
3749 }
3750
3751 /**
3752  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3753  * @iter: The iterator that holds the seq buffer and the event being printed
3754  * @fmt: The format used to print the event
3755  * @ap: The va_list holding the data to print from @fmt.
3756  *
3757  * This writes the data into the @iter->seq buffer using the data from
3758  * @fmt and @ap. If the format has a %s, then the source of the string
3759  * is examined to make sure it is safe to print, otherwise it will
3760  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3761  * pointer.
3762  */
3763 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3764                          va_list ap)
3765 {
3766         const char *p = fmt;
3767         const char *str;
3768         int i, j;
3769
3770         if (WARN_ON_ONCE(!fmt))
3771                 return;
3772
3773         if (static_branch_unlikely(&trace_no_verify))
3774                 goto print;
3775
3776         /* Don't bother checking when doing a ftrace_dump() */
3777         if (iter->fmt == static_fmt_buf)
3778                 goto print;
3779
3780         while (*p) {
3781                 bool star = false;
3782                 int len = 0;
3783
3784                 j = 0;
3785
3786                 /* We only care about %s and variants */
3787                 for (i = 0; p[i]; i++) {
3788                         if (i + 1 >= iter->fmt_size) {
3789                                 /*
3790                                  * If we can't expand the copy buffer,
3791                                  * just print it.
3792                                  */
3793                                 if (!trace_iter_expand_format(iter))
3794                                         goto print;
3795                         }
3796
3797                         if (p[i] == '\\' && p[i+1]) {
3798                                 i++;
3799                                 continue;
3800                         }
3801                         if (p[i] == '%') {
3802                                 /* Need to test cases like %08.*s */
3803                                 for (j = 1; p[i+j]; j++) {
3804                                         if (isdigit(p[i+j]) ||
3805                                             p[i+j] == '.')
3806                                                 continue;
3807                                         if (p[i+j] == '*') {
3808                                                 star = true;
3809                                                 continue;
3810                                         }
3811                                         break;
3812                                 }
3813                                 if (p[i+j] == 's')
3814                                         break;
3815                                 star = false;
3816                         }
3817                         j = 0;
3818                 }
3819                 /* If no %s found then just print normally */
3820                 if (!p[i])
3821                         break;
3822
3823                 /* Copy up to the %s, and print that */
3824                 strncpy(iter->fmt, p, i);
3825                 iter->fmt[i] = '\0';
3826                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3827
3828                 /*
3829                  * If iter->seq is full, the above call no longer guarantees
3830                  * that ap is in sync with fmt processing, and further calls
3831                  * to va_arg() can return wrong positional arguments.
3832                  *
3833                  * Ensure that ap is no longer used in this case.
3834                  */
3835                 if (iter->seq.full) {
3836                         p = "";
3837                         break;
3838                 }
3839
3840                 if (star)
3841                         len = va_arg(ap, int);
3842
3843                 /* The ap now points to the string data of the %s */
3844                 str = va_arg(ap, const char *);
3845
3846                 /*
3847                  * If you hit this warning, it is likely that the
3848                  * trace event in question used %s on a string that
3849                  * was saved at the time of the event, but may not be
3850                  * around when the trace is read. Use __string(),
3851                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3852                  * instead. See samples/trace_events/trace-events-sample.h
3853                  * for reference.
3854                  */
3855                 if (WARN_ONCE(!trace_safe_str(iter, str),
3856                               "fmt: '%s' current_buffer: '%s'",
3857                               fmt, show_buffer(&iter->seq))) {
3858                         int ret;
3859
3860                         /* Try to safely read the string */
3861                         if (star) {
3862                                 if (len + 1 > iter->fmt_size)
3863                                         len = iter->fmt_size - 1;
3864                                 if (len < 0)
3865                                         len = 0;
3866                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3867                                 iter->fmt[len] = 0;
3868                                 star = false;
3869                         } else {
3870                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3871                                                                   iter->fmt_size);
3872                         }
3873                         if (ret < 0)
3874                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3875                         else
3876                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3877                                                  str, iter->fmt);
3878                         str = "[UNSAFE-MEMORY]";
3879                         strcpy(iter->fmt, "%s");
3880                 } else {
3881                         strncpy(iter->fmt, p + i, j + 1);
3882                         iter->fmt[j+1] = '\0';
3883                 }
3884                 if (star)
3885                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3886                 else
3887                         trace_seq_printf(&iter->seq, iter->fmt, str);
3888
3889                 p += i + j + 1;
3890         }
3891  print:
3892         if (*p)
3893                 trace_seq_vprintf(&iter->seq, p, ap);
3894 }
3895
3896 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3897 {
3898         const char *p, *new_fmt;
3899         char *q;
3900
3901         if (WARN_ON_ONCE(!fmt))
3902                 return fmt;
3903
3904         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3905                 return fmt;
3906
3907         p = fmt;
3908         new_fmt = q = iter->fmt;
3909         while (*p) {
3910                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3911                         if (!trace_iter_expand_format(iter))
3912                                 return fmt;
3913
3914                         q += iter->fmt - new_fmt;
3915                         new_fmt = iter->fmt;
3916                 }
3917
3918                 *q++ = *p++;
3919
3920                 /* Replace %p with %px */
3921                 if (p[-1] == '%') {
3922                         if (p[0] == '%') {
3923                                 *q++ = *p++;
3924                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3925                                 *q++ = *p++;
3926                                 *q++ = 'x';
3927                         }
3928                 }
3929         }
3930         *q = '\0';
3931
3932         return new_fmt;
3933 }
3934
3935 #define STATIC_TEMP_BUF_SIZE    128
3936 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3937
3938 /* Find the next real entry, without updating the iterator itself */
3939 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3940                                           int *ent_cpu, u64 *ent_ts)
3941 {
3942         /* __find_next_entry will reset ent_size */
3943         int ent_size = iter->ent_size;
3944         struct trace_entry *entry;
3945
3946         /*
3947          * If called from ftrace_dump(), then the iter->temp buffer
3948          * will be the static_temp_buf and not created from kmalloc.
3949          * If the entry size is greater than the buffer, we can
3950          * not save it. Just return NULL in that case. This is only
3951          * used to add markers when two consecutive events' time
3952          * stamps have a large delta. See trace_print_lat_context()
3953          */
3954         if (iter->temp == static_temp_buf &&
3955             STATIC_TEMP_BUF_SIZE < ent_size)
3956                 return NULL;
3957
3958         /*
3959          * The __find_next_entry() may call peek_next_entry(), which may
3960          * call ring_buffer_peek() that may make the contents of iter->ent
3961          * undefined. Need to copy iter->ent now.
3962          */
3963         if (iter->ent && iter->ent != iter->temp) {
3964                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3965                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3966                         void *temp;
3967                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3968                         if (!temp)
3969                                 return NULL;
3970                         kfree(iter->temp);
3971                         iter->temp = temp;
3972                         iter->temp_size = iter->ent_size;
3973                 }
3974                 memcpy(iter->temp, iter->ent, iter->ent_size);
3975                 iter->ent = iter->temp;
3976         }
3977         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3978         /* Put back the original ent_size */
3979         iter->ent_size = ent_size;
3980
3981         return entry;
3982 }
3983
3984 /* Find the next real entry, and increment the iterator to the next entry */
3985 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3986 {
3987         iter->ent = __find_next_entry(iter, &iter->cpu,
3988                                       &iter->lost_events, &iter->ts);
3989
3990         if (iter->ent)
3991                 trace_iterator_increment(iter);
3992
3993         return iter->ent ? iter : NULL;
3994 }
3995
3996 static void trace_consume(struct trace_iterator *iter)
3997 {
3998         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3999                             &iter->lost_events);
4000 }
4001
4002 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4003 {
4004         struct trace_iterator *iter = m->private;
4005         int i = (int)*pos;
4006         void *ent;
4007
4008         WARN_ON_ONCE(iter->leftover);
4009
4010         (*pos)++;
4011
4012         /* can't go backwards */
4013         if (iter->idx > i)
4014                 return NULL;
4015
4016         if (iter->idx < 0)
4017                 ent = trace_find_next_entry_inc(iter);
4018         else
4019                 ent = iter;
4020
4021         while (ent && iter->idx < i)
4022                 ent = trace_find_next_entry_inc(iter);
4023
4024         iter->pos = *pos;
4025
4026         return ent;
4027 }
4028
4029 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4030 {
4031         struct ring_buffer_iter *buf_iter;
4032         unsigned long entries = 0;
4033         u64 ts;
4034
4035         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4036
4037         buf_iter = trace_buffer_iter(iter, cpu);
4038         if (!buf_iter)
4039                 return;
4040
4041         ring_buffer_iter_reset(buf_iter);
4042
4043         /*
4044          * We could have the case with the max latency tracers
4045          * that a reset never took place on a cpu. This is evident
4046          * by the timestamp being before the start of the buffer.
4047          */
4048         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4049                 if (ts >= iter->array_buffer->time_start)
4050                         break;
4051                 entries++;
4052                 ring_buffer_iter_advance(buf_iter);
4053         }
4054
4055         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4056 }
4057
4058 /*
4059  * The current tracer is copied to avoid a global locking
4060  * all around.
4061  */
4062 static void *s_start(struct seq_file *m, loff_t *pos)
4063 {
4064         struct trace_iterator *iter = m->private;
4065         struct trace_array *tr = iter->tr;
4066         int cpu_file = iter->cpu_file;
4067         void *p = NULL;
4068         loff_t l = 0;
4069         int cpu;
4070
4071         /*
4072          * copy the tracer to avoid using a global lock all around.
4073          * iter->trace is a copy of current_trace, the pointer to the
4074          * name may be used instead of a strcmp(), as iter->trace->name
4075          * will point to the same string as current_trace->name.
4076          */
4077         mutex_lock(&trace_types_lock);
4078         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4079                 *iter->trace = *tr->current_trace;
4080         mutex_unlock(&trace_types_lock);
4081
4082 #ifdef CONFIG_TRACER_MAX_TRACE
4083         if (iter->snapshot && iter->trace->use_max_tr)
4084                 return ERR_PTR(-EBUSY);
4085 #endif
4086
4087         if (*pos != iter->pos) {
4088                 iter->ent = NULL;
4089                 iter->cpu = 0;
4090                 iter->idx = -1;
4091
4092                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4093                         for_each_tracing_cpu(cpu)
4094                                 tracing_iter_reset(iter, cpu);
4095                 } else
4096                         tracing_iter_reset(iter, cpu_file);
4097
4098                 iter->leftover = 0;
4099                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4100                         ;
4101
4102         } else {
4103                 /*
4104                  * If we overflowed the seq_file before, then we want
4105                  * to just reuse the trace_seq buffer again.
4106                  */
4107                 if (iter->leftover)
4108                         p = iter;
4109                 else {
4110                         l = *pos - 1;
4111                         p = s_next(m, p, &l);
4112                 }
4113         }
4114
4115         trace_event_read_lock();
4116         trace_access_lock(cpu_file);
4117         return p;
4118 }
4119
4120 static void s_stop(struct seq_file *m, void *p)
4121 {
4122         struct trace_iterator *iter = m->private;
4123
4124 #ifdef CONFIG_TRACER_MAX_TRACE
4125         if (iter->snapshot && iter->trace->use_max_tr)
4126                 return;
4127 #endif
4128
4129         trace_access_unlock(iter->cpu_file);
4130         trace_event_read_unlock();
4131 }
4132
4133 static void
4134 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4135                       unsigned long *entries, int cpu)
4136 {
4137         unsigned long count;
4138
4139         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4140         /*
4141          * If this buffer has skipped entries, then we hold all
4142          * entries for the trace and we need to ignore the
4143          * ones before the time stamp.
4144          */
4145         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4146                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4147                 /* total is the same as the entries */
4148                 *total = count;
4149         } else
4150                 *total = count +
4151                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4152         *entries = count;
4153 }
4154
4155 static void
4156 get_total_entries(struct array_buffer *buf,
4157                   unsigned long *total, unsigned long *entries)
4158 {
4159         unsigned long t, e;
4160         int cpu;
4161
4162         *total = 0;
4163         *entries = 0;
4164
4165         for_each_tracing_cpu(cpu) {
4166                 get_total_entries_cpu(buf, &t, &e, cpu);
4167                 *total += t;
4168                 *entries += e;
4169         }
4170 }
4171
4172 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4173 {
4174         unsigned long total, entries;
4175
4176         if (!tr)
4177                 tr = &global_trace;
4178
4179         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4180
4181         return entries;
4182 }
4183
4184 unsigned long trace_total_entries(struct trace_array *tr)
4185 {
4186         unsigned long total, entries;
4187
4188         if (!tr)
4189                 tr = &global_trace;
4190
4191         get_total_entries(&tr->array_buffer, &total, &entries);
4192
4193         return entries;
4194 }
4195
4196 static void print_lat_help_header(struct seq_file *m)
4197 {
4198         seq_puts(m, "#                    _------=> CPU#            \n"
4199                     "#                   / _-----=> irqs-off/BH-disabled\n"
4200                     "#                  | / _----=> need-resched    \n"
4201                     "#                  || / _---=> hardirq/softirq \n"
4202                     "#                  ||| / _--=> preempt-depth   \n"
4203                     "#                  |||| / _-=> migrate-disable \n"
4204                     "#                  ||||| /     delay           \n"
4205                     "#  cmd     pid     |||||| time  |   caller     \n"
4206                     "#     \\   /        ||||||  \\    |    /       \n");
4207 }
4208
4209 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4210 {
4211         unsigned long total;
4212         unsigned long entries;
4213
4214         get_total_entries(buf, &total, &entries);
4215         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4216                    entries, total, num_online_cpus());
4217         seq_puts(m, "#\n");
4218 }
4219
4220 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4221                                    unsigned int flags)
4222 {
4223         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4224
4225         print_event_info(buf, m);
4226
4227         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4228         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4229 }
4230
4231 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4232                                        unsigned int flags)
4233 {
4234         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4235         const char *space = "            ";
4236         int prec = tgid ? 12 : 2;
4237
4238         print_event_info(buf, m);
4239
4240         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4241         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4242         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4243         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4244         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4245         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4246         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4247         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4248 }
4249
4250 void
4251 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4252 {
4253         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4254         struct array_buffer *buf = iter->array_buffer;
4255         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4256         struct tracer *type = iter->trace;
4257         unsigned long entries;
4258         unsigned long total;
4259         const char *name = "preemption";
4260
4261         name = type->name;
4262
4263         get_total_entries(buf, &total, &entries);
4264
4265         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4266                    name, UTS_RELEASE);
4267         seq_puts(m, "# -----------------------------------"
4268                  "---------------------------------\n");
4269         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4270                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4271                    nsecs_to_usecs(data->saved_latency),
4272                    entries,
4273                    total,
4274                    buf->cpu,
4275 #if defined(CONFIG_PREEMPT_NONE)
4276                    "server",
4277 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4278                    "desktop",
4279 #elif defined(CONFIG_PREEMPT)
4280                    "preempt",
4281 #elif defined(CONFIG_PREEMPT_RT)
4282                    "preempt_rt",
4283 #else
4284                    "unknown",
4285 #endif
4286                    /* These are reserved for later use */
4287                    0, 0, 0, 0);
4288 #ifdef CONFIG_SMP
4289         seq_printf(m, " #P:%d)\n", num_online_cpus());
4290 #else
4291         seq_puts(m, ")\n");
4292 #endif
4293         seq_puts(m, "#    -----------------\n");
4294         seq_printf(m, "#    | task: %.16s-%d "
4295                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4296                    data->comm, data->pid,
4297                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4298                    data->policy, data->rt_priority);
4299         seq_puts(m, "#    -----------------\n");
4300
4301         if (data->critical_start) {
4302                 seq_puts(m, "#  => started at: ");
4303                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4304                 trace_print_seq(m, &iter->seq);
4305                 seq_puts(m, "\n#  => ended at:   ");
4306                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4307                 trace_print_seq(m, &iter->seq);
4308                 seq_puts(m, "\n#\n");
4309         }
4310
4311         seq_puts(m, "#\n");
4312 }
4313
4314 static void test_cpu_buff_start(struct trace_iterator *iter)
4315 {
4316         struct trace_seq *s = &iter->seq;
4317         struct trace_array *tr = iter->tr;
4318
4319         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4320                 return;
4321
4322         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4323                 return;
4324
4325         if (cpumask_available(iter->started) &&
4326             cpumask_test_cpu(iter->cpu, iter->started))
4327                 return;
4328
4329         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4330                 return;
4331
4332         if (cpumask_available(iter->started))
4333                 cpumask_set_cpu(iter->cpu, iter->started);
4334
4335         /* Don't print started cpu buffer for the first entry of the trace */
4336         if (iter->idx > 1)
4337                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4338                                 iter->cpu);
4339 }
4340
4341 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4342 {
4343         struct trace_array *tr = iter->tr;
4344         struct trace_seq *s = &iter->seq;
4345         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4346         struct trace_entry *entry;
4347         struct trace_event *event;
4348
4349         entry = iter->ent;
4350
4351         test_cpu_buff_start(iter);
4352
4353         event = ftrace_find_event(entry->type);
4354
4355         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4356                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4357                         trace_print_lat_context(iter);
4358                 else
4359                         trace_print_context(iter);
4360         }
4361
4362         if (trace_seq_has_overflowed(s))
4363                 return TRACE_TYPE_PARTIAL_LINE;
4364
4365         if (event)
4366                 return event->funcs->trace(iter, sym_flags, event);
4367
4368         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4369
4370         return trace_handle_return(s);
4371 }
4372
4373 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4374 {
4375         struct trace_array *tr = iter->tr;
4376         struct trace_seq *s = &iter->seq;
4377         struct trace_entry *entry;
4378         struct trace_event *event;
4379
4380         entry = iter->ent;
4381
4382         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4383                 trace_seq_printf(s, "%d %d %llu ",
4384                                  entry->pid, iter->cpu, iter->ts);
4385
4386         if (trace_seq_has_overflowed(s))
4387                 return TRACE_TYPE_PARTIAL_LINE;
4388
4389         event = ftrace_find_event(entry->type);
4390         if (event)
4391                 return event->funcs->raw(iter, 0, event);
4392
4393         trace_seq_printf(s, "%d ?\n", entry->type);
4394
4395         return trace_handle_return(s);
4396 }
4397
4398 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4399 {
4400         struct trace_array *tr = iter->tr;
4401         struct trace_seq *s = &iter->seq;
4402         unsigned char newline = '\n';
4403         struct trace_entry *entry;
4404         struct trace_event *event;
4405
4406         entry = iter->ent;
4407
4408         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4409                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4410                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4411                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4412                 if (trace_seq_has_overflowed(s))
4413                         return TRACE_TYPE_PARTIAL_LINE;
4414         }
4415
4416         event = ftrace_find_event(entry->type);
4417         if (event) {
4418                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4419                 if (ret != TRACE_TYPE_HANDLED)
4420                         return ret;
4421         }
4422
4423         SEQ_PUT_FIELD(s, newline);
4424
4425         return trace_handle_return(s);
4426 }
4427
4428 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4429 {
4430         struct trace_array *tr = iter->tr;
4431         struct trace_seq *s = &iter->seq;
4432         struct trace_entry *entry;
4433         struct trace_event *event;
4434
4435         entry = iter->ent;
4436
4437         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4438                 SEQ_PUT_FIELD(s, entry->pid);
4439                 SEQ_PUT_FIELD(s, iter->cpu);
4440                 SEQ_PUT_FIELD(s, iter->ts);
4441                 if (trace_seq_has_overflowed(s))
4442                         return TRACE_TYPE_PARTIAL_LINE;
4443         }
4444
4445         event = ftrace_find_event(entry->type);
4446         return event ? event->funcs->binary(iter, 0, event) :
4447                 TRACE_TYPE_HANDLED;
4448 }
4449
4450 int trace_empty(struct trace_iterator *iter)
4451 {
4452         struct ring_buffer_iter *buf_iter;
4453         int cpu;
4454
4455         /* If we are looking at one CPU buffer, only check that one */
4456         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4457                 cpu = iter->cpu_file;
4458                 buf_iter = trace_buffer_iter(iter, cpu);
4459                 if (buf_iter) {
4460                         if (!ring_buffer_iter_empty(buf_iter))
4461                                 return 0;
4462                 } else {
4463                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4464                                 return 0;
4465                 }
4466                 return 1;
4467         }
4468
4469         for_each_tracing_cpu(cpu) {
4470                 buf_iter = trace_buffer_iter(iter, cpu);
4471                 if (buf_iter) {
4472                         if (!ring_buffer_iter_empty(buf_iter))
4473                                 return 0;
4474                 } else {
4475                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476                                 return 0;
4477                 }
4478         }
4479
4480         return 1;
4481 }
4482
4483 /*  Called with trace_event_read_lock() held. */
4484 enum print_line_t print_trace_line(struct trace_iterator *iter)
4485 {
4486         struct trace_array *tr = iter->tr;
4487         unsigned long trace_flags = tr->trace_flags;
4488         enum print_line_t ret;
4489
4490         if (iter->lost_events) {
4491                 if (iter->lost_events == (unsigned long)-1)
4492                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4493                                          iter->cpu);
4494                 else
4495                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4496                                          iter->cpu, iter->lost_events);
4497                 if (trace_seq_has_overflowed(&iter->seq))
4498                         return TRACE_TYPE_PARTIAL_LINE;
4499         }
4500
4501         if (iter->trace && iter->trace->print_line) {
4502                 ret = iter->trace->print_line(iter);
4503                 if (ret != TRACE_TYPE_UNHANDLED)
4504                         return ret;
4505         }
4506
4507         if (iter->ent->type == TRACE_BPUTS &&
4508                         trace_flags & TRACE_ITER_PRINTK &&
4509                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4510                 return trace_print_bputs_msg_only(iter);
4511
4512         if (iter->ent->type == TRACE_BPRINT &&
4513                         trace_flags & TRACE_ITER_PRINTK &&
4514                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4515                 return trace_print_bprintk_msg_only(iter);
4516
4517         if (iter->ent->type == TRACE_PRINT &&
4518                         trace_flags & TRACE_ITER_PRINTK &&
4519                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4520                 return trace_print_printk_msg_only(iter);
4521
4522         if (trace_flags & TRACE_ITER_BIN)
4523                 return print_bin_fmt(iter);
4524
4525         if (trace_flags & TRACE_ITER_HEX)
4526                 return print_hex_fmt(iter);
4527
4528         if (trace_flags & TRACE_ITER_RAW)
4529                 return print_raw_fmt(iter);
4530
4531         return print_trace_fmt(iter);
4532 }
4533
4534 void trace_latency_header(struct seq_file *m)
4535 {
4536         struct trace_iterator *iter = m->private;
4537         struct trace_array *tr = iter->tr;
4538
4539         /* print nothing if the buffers are empty */
4540         if (trace_empty(iter))
4541                 return;
4542
4543         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4544                 print_trace_header(m, iter);
4545
4546         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4547                 print_lat_help_header(m);
4548 }
4549
4550 void trace_default_header(struct seq_file *m)
4551 {
4552         struct trace_iterator *iter = m->private;
4553         struct trace_array *tr = iter->tr;
4554         unsigned long trace_flags = tr->trace_flags;
4555
4556         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4557                 return;
4558
4559         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4560                 /* print nothing if the buffers are empty */
4561                 if (trace_empty(iter))
4562                         return;
4563                 print_trace_header(m, iter);
4564                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4565                         print_lat_help_header(m);
4566         } else {
4567                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4568                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4569                                 print_func_help_header_irq(iter->array_buffer,
4570                                                            m, trace_flags);
4571                         else
4572                                 print_func_help_header(iter->array_buffer, m,
4573                                                        trace_flags);
4574                 }
4575         }
4576 }
4577
4578 static void test_ftrace_alive(struct seq_file *m)
4579 {
4580         if (!ftrace_is_dead())
4581                 return;
4582         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4583                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4584 }
4585
4586 #ifdef CONFIG_TRACER_MAX_TRACE
4587 static void show_snapshot_main_help(struct seq_file *m)
4588 {
4589         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4590                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4591                     "#                      Takes a snapshot of the main buffer.\n"
4592                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4593                     "#                      (Doesn't have to be '2' works with any number that\n"
4594                     "#                       is not a '0' or '1')\n");
4595 }
4596
4597 static void show_snapshot_percpu_help(struct seq_file *m)
4598 {
4599         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4600 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4601         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4602                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4603 #else
4604         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4605                     "#                     Must use main snapshot file to allocate.\n");
4606 #endif
4607         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4608                     "#                      (Doesn't have to be '2' works with any number that\n"
4609                     "#                       is not a '0' or '1')\n");
4610 }
4611
4612 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4613 {
4614         if (iter->tr->allocated_snapshot)
4615                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4616         else
4617                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4618
4619         seq_puts(m, "# Snapshot commands:\n");
4620         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4621                 show_snapshot_main_help(m);
4622         else
4623                 show_snapshot_percpu_help(m);
4624 }
4625 #else
4626 /* Should never be called */
4627 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4628 #endif
4629
4630 static int s_show(struct seq_file *m, void *v)
4631 {
4632         struct trace_iterator *iter = v;
4633         int ret;
4634
4635         if (iter->ent == NULL) {
4636                 if (iter->tr) {
4637                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4638                         seq_puts(m, "#\n");
4639                         test_ftrace_alive(m);
4640                 }
4641                 if (iter->snapshot && trace_empty(iter))
4642                         print_snapshot_help(m, iter);
4643                 else if (iter->trace && iter->trace->print_header)
4644                         iter->trace->print_header(m);
4645                 else
4646                         trace_default_header(m);
4647
4648         } else if (iter->leftover) {
4649                 /*
4650                  * If we filled the seq_file buffer earlier, we
4651                  * want to just show it now.
4652                  */
4653                 ret = trace_print_seq(m, &iter->seq);
4654
4655                 /* ret should this time be zero, but you never know */
4656                 iter->leftover = ret;
4657
4658         } else {
4659                 print_trace_line(iter);
4660                 ret = trace_print_seq(m, &iter->seq);
4661                 /*
4662                  * If we overflow the seq_file buffer, then it will
4663                  * ask us for this data again at start up.
4664                  * Use that instead.
4665                  *  ret is 0 if seq_file write succeeded.
4666                  *        -1 otherwise.
4667                  */
4668                 iter->leftover = ret;
4669         }
4670
4671         return 0;
4672 }
4673
4674 /*
4675  * Should be used after trace_array_get(), trace_types_lock
4676  * ensures that i_cdev was already initialized.
4677  */
4678 static inline int tracing_get_cpu(struct inode *inode)
4679 {
4680         if (inode->i_cdev) /* See trace_create_cpu_file() */
4681                 return (long)inode->i_cdev - 1;
4682         return RING_BUFFER_ALL_CPUS;
4683 }
4684
4685 static const struct seq_operations tracer_seq_ops = {
4686         .start          = s_start,
4687         .next           = s_next,
4688         .stop           = s_stop,
4689         .show           = s_show,
4690 };
4691
4692 static struct trace_iterator *
4693 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4694 {
4695         struct trace_array *tr = inode->i_private;
4696         struct trace_iterator *iter;
4697         int cpu;
4698
4699         if (tracing_disabled)
4700                 return ERR_PTR(-ENODEV);
4701
4702         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4703         if (!iter)
4704                 return ERR_PTR(-ENOMEM);
4705
4706         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4707                                     GFP_KERNEL);
4708         if (!iter->buffer_iter)
4709                 goto release;
4710
4711         /*
4712          * trace_find_next_entry() may need to save off iter->ent.
4713          * It will place it into the iter->temp buffer. As most
4714          * events are less than 128, allocate a buffer of that size.
4715          * If one is greater, then trace_find_next_entry() will
4716          * allocate a new buffer to adjust for the bigger iter->ent.
4717          * It's not critical if it fails to get allocated here.
4718          */
4719         iter->temp = kmalloc(128, GFP_KERNEL);
4720         if (iter->temp)
4721                 iter->temp_size = 128;
4722
4723         /*
4724          * trace_event_printf() may need to modify given format
4725          * string to replace %p with %px so that it shows real address
4726          * instead of hash value. However, that is only for the event
4727          * tracing, other tracer may not need. Defer the allocation
4728          * until it is needed.
4729          */
4730         iter->fmt = NULL;
4731         iter->fmt_size = 0;
4732
4733         /*
4734          * We make a copy of the current tracer to avoid concurrent
4735          * changes on it while we are reading.
4736          */
4737         mutex_lock(&trace_types_lock);
4738         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4739         if (!iter->trace)
4740                 goto fail;
4741
4742         *iter->trace = *tr->current_trace;
4743
4744         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4745                 goto fail;
4746
4747         iter->tr = tr;
4748
4749 #ifdef CONFIG_TRACER_MAX_TRACE
4750         /* Currently only the top directory has a snapshot */
4751         if (tr->current_trace->print_max || snapshot)
4752                 iter->array_buffer = &tr->max_buffer;
4753         else
4754 #endif
4755                 iter->array_buffer = &tr->array_buffer;
4756         iter->snapshot = snapshot;
4757         iter->pos = -1;
4758         iter->cpu_file = tracing_get_cpu(inode);
4759         mutex_init(&iter->mutex);
4760
4761         /* Notify the tracer early; before we stop tracing. */
4762         if (iter->trace->open)
4763                 iter->trace->open(iter);
4764
4765         /* Annotate start of buffers if we had overruns */
4766         if (ring_buffer_overruns(iter->array_buffer->buffer))
4767                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4768
4769         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4770         if (trace_clocks[tr->clock_id].in_ns)
4771                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4772
4773         /*
4774          * If pause-on-trace is enabled, then stop the trace while
4775          * dumping, unless this is the "snapshot" file
4776          */
4777         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4778                 tracing_stop_tr(tr);
4779
4780         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4781                 for_each_tracing_cpu(cpu) {
4782                         iter->buffer_iter[cpu] =
4783                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4784                                                          cpu, GFP_KERNEL);
4785                 }
4786                 ring_buffer_read_prepare_sync();
4787                 for_each_tracing_cpu(cpu) {
4788                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4789                         tracing_iter_reset(iter, cpu);
4790                 }
4791         } else {
4792                 cpu = iter->cpu_file;
4793                 iter->buffer_iter[cpu] =
4794                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4795                                                  cpu, GFP_KERNEL);
4796                 ring_buffer_read_prepare_sync();
4797                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4798                 tracing_iter_reset(iter, cpu);
4799         }
4800
4801         mutex_unlock(&trace_types_lock);
4802
4803         return iter;
4804
4805  fail:
4806         mutex_unlock(&trace_types_lock);
4807         kfree(iter->trace);
4808         kfree(iter->temp);
4809         kfree(iter->buffer_iter);
4810 release:
4811         seq_release_private(inode, file);
4812         return ERR_PTR(-ENOMEM);
4813 }
4814
4815 int tracing_open_generic(struct inode *inode, struct file *filp)
4816 {
4817         int ret;
4818
4819         ret = tracing_check_open_get_tr(NULL);
4820         if (ret)
4821                 return ret;
4822
4823         filp->private_data = inode->i_private;
4824         return 0;
4825 }
4826
4827 bool tracing_is_disabled(void)
4828 {
4829         return (tracing_disabled) ? true: false;
4830 }
4831
4832 /*
4833  * Open and update trace_array ref count.
4834  * Must have the current trace_array passed to it.
4835  */
4836 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4837 {
4838         struct trace_array *tr = inode->i_private;
4839         int ret;
4840
4841         ret = tracing_check_open_get_tr(tr);
4842         if (ret)
4843                 return ret;
4844
4845         filp->private_data = inode->i_private;
4846
4847         return 0;
4848 }
4849
4850 static int tracing_mark_open(struct inode *inode, struct file *filp)
4851 {
4852         stream_open(inode, filp);
4853         return tracing_open_generic_tr(inode, filp);
4854 }
4855
4856 static int tracing_release(struct inode *inode, struct file *file)
4857 {
4858         struct trace_array *tr = inode->i_private;
4859         struct seq_file *m = file->private_data;
4860         struct trace_iterator *iter;
4861         int cpu;
4862
4863         if (!(file->f_mode & FMODE_READ)) {
4864                 trace_array_put(tr);
4865                 return 0;
4866         }
4867
4868         /* Writes do not use seq_file */
4869         iter = m->private;
4870         mutex_lock(&trace_types_lock);
4871
4872         for_each_tracing_cpu(cpu) {
4873                 if (iter->buffer_iter[cpu])
4874                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4875         }
4876
4877         if (iter->trace && iter->trace->close)
4878                 iter->trace->close(iter);
4879
4880         if (!iter->snapshot && tr->stop_count)
4881                 /* reenable tracing if it was previously enabled */
4882                 tracing_start_tr(tr);
4883
4884         __trace_array_put(tr);
4885
4886         mutex_unlock(&trace_types_lock);
4887
4888         mutex_destroy(&iter->mutex);
4889         free_cpumask_var(iter->started);
4890         kfree(iter->fmt);
4891         kfree(iter->temp);
4892         kfree(iter->trace);
4893         kfree(iter->buffer_iter);
4894         seq_release_private(inode, file);
4895
4896         return 0;
4897 }
4898
4899 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4900 {
4901         struct trace_array *tr = inode->i_private;
4902
4903         trace_array_put(tr);
4904         return 0;
4905 }
4906
4907 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4908 {
4909         struct trace_array *tr = inode->i_private;
4910
4911         trace_array_put(tr);
4912
4913         return single_release(inode, file);
4914 }
4915
4916 static int tracing_open(struct inode *inode, struct file *file)
4917 {
4918         struct trace_array *tr = inode->i_private;
4919         struct trace_iterator *iter;
4920         int ret;
4921
4922         ret = tracing_check_open_get_tr(tr);
4923         if (ret)
4924                 return ret;
4925
4926         /* If this file was open for write, then erase contents */
4927         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4928                 int cpu = tracing_get_cpu(inode);
4929                 struct array_buffer *trace_buf = &tr->array_buffer;
4930
4931 #ifdef CONFIG_TRACER_MAX_TRACE
4932                 if (tr->current_trace->print_max)
4933                         trace_buf = &tr->max_buffer;
4934 #endif
4935
4936                 if (cpu == RING_BUFFER_ALL_CPUS)
4937                         tracing_reset_online_cpus(trace_buf);
4938                 else
4939                         tracing_reset_cpu(trace_buf, cpu);
4940         }
4941
4942         if (file->f_mode & FMODE_READ) {
4943                 iter = __tracing_open(inode, file, false);
4944                 if (IS_ERR(iter))
4945                         ret = PTR_ERR(iter);
4946                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4947                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4948         }
4949
4950         if (ret < 0)
4951                 trace_array_put(tr);
4952
4953         return ret;
4954 }
4955
4956 /*
4957  * Some tracers are not suitable for instance buffers.
4958  * A tracer is always available for the global array (toplevel)
4959  * or if it explicitly states that it is.
4960  */
4961 static bool
4962 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4963 {
4964         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4965 }
4966
4967 /* Find the next tracer that this trace array may use */
4968 static struct tracer *
4969 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4970 {
4971         while (t && !trace_ok_for_array(t, tr))
4972                 t = t->next;
4973
4974         return t;
4975 }
4976
4977 static void *
4978 t_next(struct seq_file *m, void *v, loff_t *pos)
4979 {
4980         struct trace_array *tr = m->private;
4981         struct tracer *t = v;
4982
4983         (*pos)++;
4984
4985         if (t)
4986                 t = get_tracer_for_array(tr, t->next);
4987
4988         return t;
4989 }
4990
4991 static void *t_start(struct seq_file *m, loff_t *pos)
4992 {
4993         struct trace_array *tr = m->private;
4994         struct tracer *t;
4995         loff_t l = 0;
4996
4997         mutex_lock(&trace_types_lock);
4998
4999         t = get_tracer_for_array(tr, trace_types);
5000         for (; t && l < *pos; t = t_next(m, t, &l))
5001                         ;
5002
5003         return t;
5004 }
5005
5006 static void t_stop(struct seq_file *m, void *p)
5007 {
5008         mutex_unlock(&trace_types_lock);
5009 }
5010
5011 static int t_show(struct seq_file *m, void *v)
5012 {
5013         struct tracer *t = v;
5014
5015         if (!t)
5016                 return 0;
5017
5018         seq_puts(m, t->name);
5019         if (t->next)
5020                 seq_putc(m, ' ');
5021         else
5022                 seq_putc(m, '\n');
5023
5024         return 0;
5025 }
5026
5027 static const struct seq_operations show_traces_seq_ops = {
5028         .start          = t_start,
5029         .next           = t_next,
5030         .stop           = t_stop,
5031         .show           = t_show,
5032 };
5033
5034 static int show_traces_open(struct inode *inode, struct file *file)
5035 {
5036         struct trace_array *tr = inode->i_private;
5037         struct seq_file *m;
5038         int ret;
5039
5040         ret = tracing_check_open_get_tr(tr);
5041         if (ret)
5042                 return ret;
5043
5044         ret = seq_open(file, &show_traces_seq_ops);
5045         if (ret) {
5046                 trace_array_put(tr);
5047                 return ret;
5048         }
5049
5050         m = file->private_data;
5051         m->private = tr;
5052
5053         return 0;
5054 }
5055
5056 static int show_traces_release(struct inode *inode, struct file *file)
5057 {
5058         struct trace_array *tr = inode->i_private;
5059
5060         trace_array_put(tr);
5061         return seq_release(inode, file);
5062 }
5063
5064 static ssize_t
5065 tracing_write_stub(struct file *filp, const char __user *ubuf,
5066                    size_t count, loff_t *ppos)
5067 {
5068         return count;
5069 }
5070
5071 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5072 {
5073         int ret;
5074
5075         if (file->f_mode & FMODE_READ)
5076                 ret = seq_lseek(file, offset, whence);
5077         else
5078                 file->f_pos = ret = 0;
5079
5080         return ret;
5081 }
5082
5083 static const struct file_operations tracing_fops = {
5084         .open           = tracing_open,
5085         .read           = seq_read,
5086         .write          = tracing_write_stub,
5087         .llseek         = tracing_lseek,
5088         .release        = tracing_release,
5089 };
5090
5091 static const struct file_operations show_traces_fops = {
5092         .open           = show_traces_open,
5093         .read           = seq_read,
5094         .llseek         = seq_lseek,
5095         .release        = show_traces_release,
5096 };
5097
5098 static ssize_t
5099 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5100                      size_t count, loff_t *ppos)
5101 {
5102         struct trace_array *tr = file_inode(filp)->i_private;
5103         char *mask_str;
5104         int len;
5105
5106         len = snprintf(NULL, 0, "%*pb\n",
5107                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5108         mask_str = kmalloc(len, GFP_KERNEL);
5109         if (!mask_str)
5110                 return -ENOMEM;
5111
5112         len = snprintf(mask_str, len, "%*pb\n",
5113                        cpumask_pr_args(tr->tracing_cpumask));
5114         if (len >= count) {
5115                 count = -EINVAL;
5116                 goto out_err;
5117         }
5118         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5119
5120 out_err:
5121         kfree(mask_str);
5122
5123         return count;
5124 }
5125
5126 int tracing_set_cpumask(struct trace_array *tr,
5127                         cpumask_var_t tracing_cpumask_new)
5128 {
5129         int cpu;
5130
5131         if (!tr)
5132                 return -EINVAL;
5133
5134         local_irq_disable();
5135         arch_spin_lock(&tr->max_lock);
5136         for_each_tracing_cpu(cpu) {
5137                 /*
5138                  * Increase/decrease the disabled counter if we are
5139                  * about to flip a bit in the cpumask:
5140                  */
5141                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5142                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5143                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5144                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5145                 }
5146                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5147                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5148                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5149                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5150                 }
5151         }
5152         arch_spin_unlock(&tr->max_lock);
5153         local_irq_enable();
5154
5155         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5156
5157         return 0;
5158 }
5159
5160 static ssize_t
5161 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5162                       size_t count, loff_t *ppos)
5163 {
5164         struct trace_array *tr = file_inode(filp)->i_private;
5165         cpumask_var_t tracing_cpumask_new;
5166         int err;
5167
5168         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5169                 return -ENOMEM;
5170
5171         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5172         if (err)
5173                 goto err_free;
5174
5175         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5176         if (err)
5177                 goto err_free;
5178
5179         free_cpumask_var(tracing_cpumask_new);
5180
5181         return count;
5182
5183 err_free:
5184         free_cpumask_var(tracing_cpumask_new);
5185
5186         return err;
5187 }
5188
5189 static const struct file_operations tracing_cpumask_fops = {
5190         .open           = tracing_open_generic_tr,
5191         .read           = tracing_cpumask_read,
5192         .write          = tracing_cpumask_write,
5193         .release        = tracing_release_generic_tr,
5194         .llseek         = generic_file_llseek,
5195 };
5196
5197 static int tracing_trace_options_show(struct seq_file *m, void *v)
5198 {
5199         struct tracer_opt *trace_opts;
5200         struct trace_array *tr = m->private;
5201         u32 tracer_flags;
5202         int i;
5203
5204         mutex_lock(&trace_types_lock);
5205         tracer_flags = tr->current_trace->flags->val;
5206         trace_opts = tr->current_trace->flags->opts;
5207
5208         for (i = 0; trace_options[i]; i++) {
5209                 if (tr->trace_flags & (1 << i))
5210                         seq_printf(m, "%s\n", trace_options[i]);
5211                 else
5212                         seq_printf(m, "no%s\n", trace_options[i]);
5213         }
5214
5215         for (i = 0; trace_opts[i].name; i++) {
5216                 if (tracer_flags & trace_opts[i].bit)
5217                         seq_printf(m, "%s\n", trace_opts[i].name);
5218                 else
5219                         seq_printf(m, "no%s\n", trace_opts[i].name);
5220         }
5221         mutex_unlock(&trace_types_lock);
5222
5223         return 0;
5224 }
5225
5226 static int __set_tracer_option(struct trace_array *tr,
5227                                struct tracer_flags *tracer_flags,
5228                                struct tracer_opt *opts, int neg)
5229 {
5230         struct tracer *trace = tracer_flags->trace;
5231         int ret;
5232
5233         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5234         if (ret)
5235                 return ret;
5236
5237         if (neg)
5238                 tracer_flags->val &= ~opts->bit;
5239         else
5240                 tracer_flags->val |= opts->bit;
5241         return 0;
5242 }
5243
5244 /* Try to assign a tracer specific option */
5245 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5246 {
5247         struct tracer *trace = tr->current_trace;
5248         struct tracer_flags *tracer_flags = trace->flags;
5249         struct tracer_opt *opts = NULL;
5250         int i;
5251
5252         for (i = 0; tracer_flags->opts[i].name; i++) {
5253                 opts = &tracer_flags->opts[i];
5254
5255                 if (strcmp(cmp, opts->name) == 0)
5256                         return __set_tracer_option(tr, trace->flags, opts, neg);
5257         }
5258
5259         return -EINVAL;
5260 }
5261
5262 /* Some tracers require overwrite to stay enabled */
5263 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5264 {
5265         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5266                 return -1;
5267
5268         return 0;
5269 }
5270
5271 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5272 {
5273         int *map;
5274
5275         if ((mask == TRACE_ITER_RECORD_TGID) ||
5276             (mask == TRACE_ITER_RECORD_CMD))
5277                 lockdep_assert_held(&event_mutex);
5278
5279         /* do nothing if flag is already set */
5280         if (!!(tr->trace_flags & mask) == !!enabled)
5281                 return 0;
5282
5283         /* Give the tracer a chance to approve the change */
5284         if (tr->current_trace->flag_changed)
5285                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5286                         return -EINVAL;
5287
5288         if (enabled)
5289                 tr->trace_flags |= mask;
5290         else
5291                 tr->trace_flags &= ~mask;
5292
5293         if (mask == TRACE_ITER_RECORD_CMD)
5294                 trace_event_enable_cmd_record(enabled);
5295
5296         if (mask == TRACE_ITER_RECORD_TGID) {
5297                 if (!tgid_map) {
5298                         tgid_map_max = pid_max;
5299                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5300                                        GFP_KERNEL);
5301
5302                         /*
5303                          * Pairs with smp_load_acquire() in
5304                          * trace_find_tgid_ptr() to ensure that if it observes
5305                          * the tgid_map we just allocated then it also observes
5306                          * the corresponding tgid_map_max value.
5307                          */
5308                         smp_store_release(&tgid_map, map);
5309                 }
5310                 if (!tgid_map) {
5311                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5312                         return -ENOMEM;
5313                 }
5314
5315                 trace_event_enable_tgid_record(enabled);
5316         }
5317
5318         if (mask == TRACE_ITER_EVENT_FORK)
5319                 trace_event_follow_fork(tr, enabled);
5320
5321         if (mask == TRACE_ITER_FUNC_FORK)
5322                 ftrace_pid_follow_fork(tr, enabled);
5323
5324         if (mask == TRACE_ITER_OVERWRITE) {
5325                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5328 #endif
5329         }
5330
5331         if (mask == TRACE_ITER_PRINTK) {
5332                 trace_printk_start_stop_comm(enabled);
5333                 trace_printk_control(enabled);
5334         }
5335
5336         return 0;
5337 }
5338
5339 int trace_set_options(struct trace_array *tr, char *option)
5340 {
5341         char *cmp;
5342         int neg = 0;
5343         int ret;
5344         size_t orig_len = strlen(option);
5345         int len;
5346
5347         cmp = strstrip(option);
5348
5349         len = str_has_prefix(cmp, "no");
5350         if (len)
5351                 neg = 1;
5352
5353         cmp += len;
5354
5355         mutex_lock(&event_mutex);
5356         mutex_lock(&trace_types_lock);
5357
5358         ret = match_string(trace_options, -1, cmp);
5359         /* If no option could be set, test the specific tracer options */
5360         if (ret < 0)
5361                 ret = set_tracer_option(tr, cmp, neg);
5362         else
5363                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5364
5365         mutex_unlock(&trace_types_lock);
5366         mutex_unlock(&event_mutex);
5367
5368         /*
5369          * If the first trailing whitespace is replaced with '\0' by strstrip,
5370          * turn it back into a space.
5371          */
5372         if (orig_len > strlen(option))
5373                 option[strlen(option)] = ' ';
5374
5375         return ret;
5376 }
5377
5378 static void __init apply_trace_boot_options(void)
5379 {
5380         char *buf = trace_boot_options_buf;
5381         char *option;
5382
5383         while (true) {
5384                 option = strsep(&buf, ",");
5385
5386                 if (!option)
5387                         break;
5388
5389                 if (*option)
5390                         trace_set_options(&global_trace, option);
5391
5392                 /* Put back the comma to allow this to be called again */
5393                 if (buf)
5394                         *(buf - 1) = ',';
5395         }
5396 }
5397
5398 static ssize_t
5399 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5400                         size_t cnt, loff_t *ppos)
5401 {
5402         struct seq_file *m = filp->private_data;
5403         struct trace_array *tr = m->private;
5404         char buf[64];
5405         int ret;
5406
5407         if (cnt >= sizeof(buf))
5408                 return -EINVAL;
5409
5410         if (copy_from_user(buf, ubuf, cnt))
5411                 return -EFAULT;
5412
5413         buf[cnt] = 0;
5414
5415         ret = trace_set_options(tr, buf);
5416         if (ret < 0)
5417                 return ret;
5418
5419         *ppos += cnt;
5420
5421         return cnt;
5422 }
5423
5424 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5425 {
5426         struct trace_array *tr = inode->i_private;
5427         int ret;
5428
5429         ret = tracing_check_open_get_tr(tr);
5430         if (ret)
5431                 return ret;
5432
5433         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5434         if (ret < 0)
5435                 trace_array_put(tr);
5436
5437         return ret;
5438 }
5439
5440 static const struct file_operations tracing_iter_fops = {
5441         .open           = tracing_trace_options_open,
5442         .read           = seq_read,
5443         .llseek         = seq_lseek,
5444         .release        = tracing_single_release_tr,
5445         .write          = tracing_trace_options_write,
5446 };
5447
5448 static const char readme_msg[] =
5449         "tracing mini-HOWTO:\n\n"
5450         "# echo 0 > tracing_on : quick way to disable tracing\n"
5451         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5452         " Important files:\n"
5453         "  trace\t\t\t- The static contents of the buffer\n"
5454         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5455         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5456         "  current_tracer\t- function and latency tracers\n"
5457         "  available_tracers\t- list of configured tracers for current_tracer\n"
5458         "  error_log\t- error log for failed commands (that support it)\n"
5459         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5460         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5461         "  trace_clock\t\t-change the clock used to order events\n"
5462         "       local:   Per cpu clock but may not be synced across CPUs\n"
5463         "      global:   Synced across CPUs but slows tracing down.\n"
5464         "     counter:   Not a clock, but just an increment\n"
5465         "      uptime:   Jiffy counter from time of boot\n"
5466         "        perf:   Same clock that perf events use\n"
5467 #ifdef CONFIG_X86_64
5468         "     x86-tsc:   TSC cycle counter\n"
5469 #endif
5470         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5471         "       delta:   Delta difference against a buffer-wide timestamp\n"
5472         "    absolute:   Absolute (standalone) timestamp\n"
5473         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5474         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5475         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5476         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5477         "\t\t\t  Remove sub-buffer with rmdir\n"
5478         "  trace_options\t\t- Set format or modify how tracing happens\n"
5479         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5480         "\t\t\t  option name\n"
5481         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5482 #ifdef CONFIG_DYNAMIC_FTRACE
5483         "\n  available_filter_functions - list of functions that can be filtered on\n"
5484         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5485         "\t\t\t  functions\n"
5486         "\t     accepts: func_full_name or glob-matching-pattern\n"
5487         "\t     modules: Can select a group via module\n"
5488         "\t      Format: :mod:<module-name>\n"
5489         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5490         "\t    triggers: a command to perform when function is hit\n"
5491         "\t      Format: <function>:<trigger>[:count]\n"
5492         "\t     trigger: traceon, traceoff\n"
5493         "\t\t      enable_event:<system>:<event>\n"
5494         "\t\t      disable_event:<system>:<event>\n"
5495 #ifdef CONFIG_STACKTRACE
5496         "\t\t      stacktrace\n"
5497 #endif
5498 #ifdef CONFIG_TRACER_SNAPSHOT
5499         "\t\t      snapshot\n"
5500 #endif
5501         "\t\t      dump\n"
5502         "\t\t      cpudump\n"
5503         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5504         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5505         "\t     The first one will disable tracing every time do_fault is hit\n"
5506         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5507         "\t       The first time do trap is hit and it disables tracing, the\n"
5508         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5509         "\t       the counter will not decrement. It only decrements when the\n"
5510         "\t       trigger did work\n"
5511         "\t     To remove trigger without count:\n"
5512         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5513         "\t     To remove trigger with a count:\n"
5514         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5515         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5516         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5517         "\t    modules: Can select a group via module command :mod:\n"
5518         "\t    Does not accept triggers\n"
5519 #endif /* CONFIG_DYNAMIC_FTRACE */
5520 #ifdef CONFIG_FUNCTION_TRACER
5521         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5522         "\t\t    (function)\n"
5523         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5524         "\t\t    (function)\n"
5525 #endif
5526 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5527         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5528         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5529         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5530 #endif
5531 #ifdef CONFIG_TRACER_SNAPSHOT
5532         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5533         "\t\t\t  snapshot buffer. Read the contents for more\n"
5534         "\t\t\t  information\n"
5535 #endif
5536 #ifdef CONFIG_STACK_TRACER
5537         "  stack_trace\t\t- Shows the max stack trace when active\n"
5538         "  stack_max_size\t- Shows current max stack size that was traced\n"
5539         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5540         "\t\t\t  new trace)\n"
5541 #ifdef CONFIG_DYNAMIC_FTRACE
5542         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5543         "\t\t\t  traces\n"
5544 #endif
5545 #endif /* CONFIG_STACK_TRACER */
5546 #ifdef CONFIG_DYNAMIC_EVENTS
5547         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5548         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5549 #endif
5550 #ifdef CONFIG_KPROBE_EVENTS
5551         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5552         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5553 #endif
5554 #ifdef CONFIG_UPROBE_EVENTS
5555         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5556         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5557 #endif
5558 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5559         "\t  accepts: event-definitions (one definition per line)\n"
5560         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5561         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5562 #ifdef CONFIG_HIST_TRIGGERS
5563         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5564 #endif
5565         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5566         "\t           -:[<group>/]<event>\n"
5567 #ifdef CONFIG_KPROBE_EVENTS
5568         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5569   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5570 #endif
5571 #ifdef CONFIG_UPROBE_EVENTS
5572   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5573 #endif
5574         "\t     args: <name>=fetcharg[:type]\n"
5575         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5576 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5577         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5578 #else
5579         "\t           $stack<index>, $stack, $retval, $comm,\n"
5580 #endif
5581         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5582         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5583         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5584         "\t           <type>\\[<array-size>\\]\n"
5585 #ifdef CONFIG_HIST_TRIGGERS
5586         "\t    field: <stype> <name>;\n"
5587         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5588         "\t           [unsigned] char/int/long\n"
5589 #endif
5590         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5591         "\t            of the <attached-group>/<attached-event>.\n"
5592 #endif
5593         "  events/\t\t- Directory containing all trace event subsystems:\n"
5594         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5595         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5596         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5597         "\t\t\t  events\n"
5598         "      filter\t\t- If set, only events passing filter are traced\n"
5599         "  events/<system>/<event>/\t- Directory containing control files for\n"
5600         "\t\t\t  <event>:\n"
5601         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5602         "      filter\t\t- If set, only events passing filter are traced\n"
5603         "      trigger\t\t- If set, a command to perform when event is hit\n"
5604         "\t    Format: <trigger>[:count][if <filter>]\n"
5605         "\t   trigger: traceon, traceoff\n"
5606         "\t            enable_event:<system>:<event>\n"
5607         "\t            disable_event:<system>:<event>\n"
5608 #ifdef CONFIG_HIST_TRIGGERS
5609         "\t            enable_hist:<system>:<event>\n"
5610         "\t            disable_hist:<system>:<event>\n"
5611 #endif
5612 #ifdef CONFIG_STACKTRACE
5613         "\t\t    stacktrace\n"
5614 #endif
5615 #ifdef CONFIG_TRACER_SNAPSHOT
5616         "\t\t    snapshot\n"
5617 #endif
5618 #ifdef CONFIG_HIST_TRIGGERS
5619         "\t\t    hist (see below)\n"
5620 #endif
5621         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5622         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5623         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5624         "\t                  events/block/block_unplug/trigger\n"
5625         "\t   The first disables tracing every time block_unplug is hit.\n"
5626         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5627         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5628         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5629         "\t   Like function triggers, the counter is only decremented if it\n"
5630         "\t    enabled or disabled tracing.\n"
5631         "\t   To remove a trigger without a count:\n"
5632         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5633         "\t   To remove a trigger with a count:\n"
5634         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5635         "\t   Filters can be ignored when removing a trigger.\n"
5636 #ifdef CONFIG_HIST_TRIGGERS
5637         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5638         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5639         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5640         "\t            [:values=<field1[,field2,...]>]\n"
5641         "\t            [:sort=<field1[,field2,...]>]\n"
5642         "\t            [:size=#entries]\n"
5643         "\t            [:pause][:continue][:clear]\n"
5644         "\t            [:name=histname1]\n"
5645         "\t            [:<handler>.<action>]\n"
5646         "\t            [if <filter>]\n\n"
5647         "\t    Note, special fields can be used as well:\n"
5648         "\t            common_timestamp - to record current timestamp\n"
5649         "\t            common_cpu - to record the CPU the event happened on\n"
5650         "\n"
5651         "\t    A hist trigger variable can be:\n"
5652         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5653         "\t        - a reference to another variable e.g. y=$x,\n"
5654         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5655         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5656         "\n"
5657         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5658         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5659         "\t    variable reference, field or numeric literal.\n"
5660         "\n"
5661         "\t    When a matching event is hit, an entry is added to a hash\n"
5662         "\t    table using the key(s) and value(s) named, and the value of a\n"
5663         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5664         "\t    correspond to fields in the event's format description.  Keys\n"
5665         "\t    can be any field, or the special string 'stacktrace'.\n"
5666         "\t    Compound keys consisting of up to two fields can be specified\n"
5667         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5668         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5669         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5670         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5671         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5672         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5673         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5674         "\t    its histogram data will be shared with other triggers of the\n"
5675         "\t    same name, and trigger hits will update this common data.\n\n"
5676         "\t    Reading the 'hist' file for the event will dump the hash\n"
5677         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5678         "\t    triggers attached to an event, there will be a table for each\n"
5679         "\t    trigger in the output.  The table displayed for a named\n"
5680         "\t    trigger will be the same as any other instance having the\n"
5681         "\t    same name.  The default format used to display a given field\n"
5682         "\t    can be modified by appending any of the following modifiers\n"
5683         "\t    to the field name, as applicable:\n\n"
5684         "\t            .hex        display a number as a hex value\n"
5685         "\t            .sym        display an address as a symbol\n"
5686         "\t            .sym-offset display an address as a symbol and offset\n"
5687         "\t            .execname   display a common_pid as a program name\n"
5688         "\t            .syscall    display a syscall id as a syscall name\n"
5689         "\t            .log2       display log2 value rather than raw number\n"
5690         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5691         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5692         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5693         "\t    trigger or to start a hist trigger but not log any events\n"
5694         "\t    until told to do so.  'continue' can be used to start or\n"
5695         "\t    restart a paused hist trigger.\n\n"
5696         "\t    The 'clear' parameter will clear the contents of a running\n"
5697         "\t    hist trigger and leave its current paused/active state\n"
5698         "\t    unchanged.\n\n"
5699         "\t    The enable_hist and disable_hist triggers can be used to\n"
5700         "\t    have one event conditionally start and stop another event's\n"
5701         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5702         "\t    the enable_event and disable_event triggers.\n\n"
5703         "\t    Hist trigger handlers and actions are executed whenever a\n"
5704         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5705         "\t        <handler>.<action>\n\n"
5706         "\t    The available handlers are:\n\n"
5707         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5708         "\t        onmax(var)               - invoke if var exceeds current max\n"
5709         "\t        onchange(var)            - invoke action if var changes\n\n"
5710         "\t    The available actions are:\n\n"
5711         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5712         "\t        save(field,...)                      - save current event fields\n"
5713 #ifdef CONFIG_TRACER_SNAPSHOT
5714         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5715 #endif
5716 #ifdef CONFIG_SYNTH_EVENTS
5717         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5718         "\t  Write into this file to define/undefine new synthetic events.\n"
5719         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5720 #endif
5721 #endif
5722 ;
5723
5724 static ssize_t
5725 tracing_readme_read(struct file *filp, char __user *ubuf,
5726                        size_t cnt, loff_t *ppos)
5727 {
5728         return simple_read_from_buffer(ubuf, cnt, ppos,
5729                                         readme_msg, strlen(readme_msg));
5730 }
5731
5732 static const struct file_operations tracing_readme_fops = {
5733         .open           = tracing_open_generic,
5734         .read           = tracing_readme_read,
5735         .llseek         = generic_file_llseek,
5736 };
5737
5738 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5739 {
5740         int pid = ++(*pos);
5741
5742         return trace_find_tgid_ptr(pid);
5743 }
5744
5745 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5746 {
5747         int pid = *pos;
5748
5749         return trace_find_tgid_ptr(pid);
5750 }
5751
5752 static void saved_tgids_stop(struct seq_file *m, void *v)
5753 {
5754 }
5755
5756 static int saved_tgids_show(struct seq_file *m, void *v)
5757 {
5758         int *entry = (int *)v;
5759         int pid = entry - tgid_map;
5760         int tgid = *entry;
5761
5762         if (tgid == 0)
5763                 return SEQ_SKIP;
5764
5765         seq_printf(m, "%d %d\n", pid, tgid);
5766         return 0;
5767 }
5768
5769 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5770         .start          = saved_tgids_start,
5771         .stop           = saved_tgids_stop,
5772         .next           = saved_tgids_next,
5773         .show           = saved_tgids_show,
5774 };
5775
5776 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5777 {
5778         int ret;
5779
5780         ret = tracing_check_open_get_tr(NULL);
5781         if (ret)
5782                 return ret;
5783
5784         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5785 }
5786
5787
5788 static const struct file_operations tracing_saved_tgids_fops = {
5789         .open           = tracing_saved_tgids_open,
5790         .read           = seq_read,
5791         .llseek         = seq_lseek,
5792         .release        = seq_release,
5793 };
5794
5795 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5796 {
5797         unsigned int *ptr = v;
5798
5799         if (*pos || m->count)
5800                 ptr++;
5801
5802         (*pos)++;
5803
5804         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5805              ptr++) {
5806                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5807                         continue;
5808
5809                 return ptr;
5810         }
5811
5812         return NULL;
5813 }
5814
5815 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5816 {
5817         void *v;
5818         loff_t l = 0;
5819
5820         preempt_disable();
5821         arch_spin_lock(&trace_cmdline_lock);
5822
5823         v = &savedcmd->map_cmdline_to_pid[0];
5824         while (l <= *pos) {
5825                 v = saved_cmdlines_next(m, v, &l);
5826                 if (!v)
5827                         return NULL;
5828         }
5829
5830         return v;
5831 }
5832
5833 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5834 {
5835         arch_spin_unlock(&trace_cmdline_lock);
5836         preempt_enable();
5837 }
5838
5839 static int saved_cmdlines_show(struct seq_file *m, void *v)
5840 {
5841         char buf[TASK_COMM_LEN];
5842         unsigned int *pid = v;
5843
5844         __trace_find_cmdline(*pid, buf);
5845         seq_printf(m, "%d %s\n", *pid, buf);
5846         return 0;
5847 }
5848
5849 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5850         .start          = saved_cmdlines_start,
5851         .next           = saved_cmdlines_next,
5852         .stop           = saved_cmdlines_stop,
5853         .show           = saved_cmdlines_show,
5854 };
5855
5856 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5857 {
5858         int ret;
5859
5860         ret = tracing_check_open_get_tr(NULL);
5861         if (ret)
5862                 return ret;
5863
5864         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5865 }
5866
5867 static const struct file_operations tracing_saved_cmdlines_fops = {
5868         .open           = tracing_saved_cmdlines_open,
5869         .read           = seq_read,
5870         .llseek         = seq_lseek,
5871         .release        = seq_release,
5872 };
5873
5874 static ssize_t
5875 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5876                                  size_t cnt, loff_t *ppos)
5877 {
5878         char buf[64];
5879         int r;
5880
5881         arch_spin_lock(&trace_cmdline_lock);
5882         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5883         arch_spin_unlock(&trace_cmdline_lock);
5884
5885         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5886 }
5887
5888 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5889 {
5890         kfree(s->saved_cmdlines);
5891         kfree(s->map_cmdline_to_pid);
5892         kfree(s);
5893 }
5894
5895 static int tracing_resize_saved_cmdlines(unsigned int val)
5896 {
5897         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5898
5899         s = kmalloc(sizeof(*s), GFP_KERNEL);
5900         if (!s)
5901                 return -ENOMEM;
5902
5903         if (allocate_cmdlines_buffer(val, s) < 0) {
5904                 kfree(s);
5905                 return -ENOMEM;
5906         }
5907
5908         arch_spin_lock(&trace_cmdline_lock);
5909         savedcmd_temp = savedcmd;
5910         savedcmd = s;
5911         arch_spin_unlock(&trace_cmdline_lock);
5912         free_saved_cmdlines_buffer(savedcmd_temp);
5913
5914         return 0;
5915 }
5916
5917 static ssize_t
5918 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5919                                   size_t cnt, loff_t *ppos)
5920 {
5921         unsigned long val;
5922         int ret;
5923
5924         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5925         if (ret)
5926                 return ret;
5927
5928         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5929         if (!val || val > PID_MAX_DEFAULT)
5930                 return -EINVAL;
5931
5932         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5933         if (ret < 0)
5934                 return ret;
5935
5936         *ppos += cnt;
5937
5938         return cnt;
5939 }
5940
5941 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5942         .open           = tracing_open_generic,
5943         .read           = tracing_saved_cmdlines_size_read,
5944         .write          = tracing_saved_cmdlines_size_write,
5945 };
5946
5947 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5948 static union trace_eval_map_item *
5949 update_eval_map(union trace_eval_map_item *ptr)
5950 {
5951         if (!ptr->map.eval_string) {
5952                 if (ptr->tail.next) {
5953                         ptr = ptr->tail.next;
5954                         /* Set ptr to the next real item (skip head) */
5955                         ptr++;
5956                 } else
5957                         return NULL;
5958         }
5959         return ptr;
5960 }
5961
5962 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5963 {
5964         union trace_eval_map_item *ptr = v;
5965
5966         /*
5967          * Paranoid! If ptr points to end, we don't want to increment past it.
5968          * This really should never happen.
5969          */
5970         (*pos)++;
5971         ptr = update_eval_map(ptr);
5972         if (WARN_ON_ONCE(!ptr))
5973                 return NULL;
5974
5975         ptr++;
5976         ptr = update_eval_map(ptr);
5977
5978         return ptr;
5979 }
5980
5981 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5982 {
5983         union trace_eval_map_item *v;
5984         loff_t l = 0;
5985
5986         mutex_lock(&trace_eval_mutex);
5987
5988         v = trace_eval_maps;
5989         if (v)
5990                 v++;
5991
5992         while (v && l < *pos) {
5993                 v = eval_map_next(m, v, &l);
5994         }
5995
5996         return v;
5997 }
5998
5999 static void eval_map_stop(struct seq_file *m, void *v)
6000 {
6001         mutex_unlock(&trace_eval_mutex);
6002 }
6003
6004 static int eval_map_show(struct seq_file *m, void *v)
6005 {
6006         union trace_eval_map_item *ptr = v;
6007
6008         seq_printf(m, "%s %ld (%s)\n",
6009                    ptr->map.eval_string, ptr->map.eval_value,
6010                    ptr->map.system);
6011
6012         return 0;
6013 }
6014
6015 static const struct seq_operations tracing_eval_map_seq_ops = {
6016         .start          = eval_map_start,
6017         .next           = eval_map_next,
6018         .stop           = eval_map_stop,
6019         .show           = eval_map_show,
6020 };
6021
6022 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6023 {
6024         int ret;
6025
6026         ret = tracing_check_open_get_tr(NULL);
6027         if (ret)
6028                 return ret;
6029
6030         return seq_open(filp, &tracing_eval_map_seq_ops);
6031 }
6032
6033 static const struct file_operations tracing_eval_map_fops = {
6034         .open           = tracing_eval_map_open,
6035         .read           = seq_read,
6036         .llseek         = seq_lseek,
6037         .release        = seq_release,
6038 };
6039
6040 static inline union trace_eval_map_item *
6041 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6042 {
6043         /* Return tail of array given the head */
6044         return ptr + ptr->head.length + 1;
6045 }
6046
6047 static void
6048 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6049                            int len)
6050 {
6051         struct trace_eval_map **stop;
6052         struct trace_eval_map **map;
6053         union trace_eval_map_item *map_array;
6054         union trace_eval_map_item *ptr;
6055
6056         stop = start + len;
6057
6058         /*
6059          * The trace_eval_maps contains the map plus a head and tail item,
6060          * where the head holds the module and length of array, and the
6061          * tail holds a pointer to the next list.
6062          */
6063         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6064         if (!map_array) {
6065                 pr_warn("Unable to allocate trace eval mapping\n");
6066                 return;
6067         }
6068
6069         mutex_lock(&trace_eval_mutex);
6070
6071         if (!trace_eval_maps)
6072                 trace_eval_maps = map_array;
6073         else {
6074                 ptr = trace_eval_maps;
6075                 for (;;) {
6076                         ptr = trace_eval_jmp_to_tail(ptr);
6077                         if (!ptr->tail.next)
6078                                 break;
6079                         ptr = ptr->tail.next;
6080
6081                 }
6082                 ptr->tail.next = map_array;
6083         }
6084         map_array->head.mod = mod;
6085         map_array->head.length = len;
6086         map_array++;
6087
6088         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6089                 map_array->map = **map;
6090                 map_array++;
6091         }
6092         memset(map_array, 0, sizeof(*map_array));
6093
6094         mutex_unlock(&trace_eval_mutex);
6095 }
6096
6097 static void trace_create_eval_file(struct dentry *d_tracer)
6098 {
6099         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6100                           NULL, &tracing_eval_map_fops);
6101 }
6102
6103 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6104 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6105 static inline void trace_insert_eval_map_file(struct module *mod,
6106                               struct trace_eval_map **start, int len) { }
6107 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6108
6109 static void trace_insert_eval_map(struct module *mod,
6110                                   struct trace_eval_map **start, int len)
6111 {
6112         struct trace_eval_map **map;
6113
6114         if (len <= 0)
6115                 return;
6116
6117         map = start;
6118
6119         trace_event_eval_update(map, len);
6120
6121         trace_insert_eval_map_file(mod, start, len);
6122 }
6123
6124 static ssize_t
6125 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6126                        size_t cnt, loff_t *ppos)
6127 {
6128         struct trace_array *tr = filp->private_data;
6129         char buf[MAX_TRACER_SIZE+2];
6130         int r;
6131
6132         mutex_lock(&trace_types_lock);
6133         r = sprintf(buf, "%s\n", tr->current_trace->name);
6134         mutex_unlock(&trace_types_lock);
6135
6136         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6137 }
6138
6139 int tracer_init(struct tracer *t, struct trace_array *tr)
6140 {
6141         tracing_reset_online_cpus(&tr->array_buffer);
6142         return t->init(tr);
6143 }
6144
6145 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6146 {
6147         int cpu;
6148
6149         for_each_tracing_cpu(cpu)
6150                 per_cpu_ptr(buf->data, cpu)->entries = val;
6151 }
6152
6153 #ifdef CONFIG_TRACER_MAX_TRACE
6154 /* resize @tr's buffer to the size of @size_tr's entries */
6155 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6156                                         struct array_buffer *size_buf, int cpu_id)
6157 {
6158         int cpu, ret = 0;
6159
6160         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6161                 for_each_tracing_cpu(cpu) {
6162                         ret = ring_buffer_resize(trace_buf->buffer,
6163                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6164                         if (ret < 0)
6165                                 break;
6166                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6167                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6168                 }
6169         } else {
6170                 ret = ring_buffer_resize(trace_buf->buffer,
6171                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6172                 if (ret == 0)
6173                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6174                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6175         }
6176
6177         return ret;
6178 }
6179 #endif /* CONFIG_TRACER_MAX_TRACE */
6180
6181 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6182                                         unsigned long size, int cpu)
6183 {
6184         int ret;
6185
6186         /*
6187          * If kernel or user changes the size of the ring buffer
6188          * we use the size that was given, and we can forget about
6189          * expanding it later.
6190          */
6191         ring_buffer_expanded = true;
6192
6193         /* May be called before buffers are initialized */
6194         if (!tr->array_buffer.buffer)
6195                 return 0;
6196
6197         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6198         if (ret < 0)
6199                 return ret;
6200
6201 #ifdef CONFIG_TRACER_MAX_TRACE
6202         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6203             !tr->current_trace->use_max_tr)
6204                 goto out;
6205
6206         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6207         if (ret < 0) {
6208                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6209                                                      &tr->array_buffer, cpu);
6210                 if (r < 0) {
6211                         /*
6212                          * AARGH! We are left with different
6213                          * size max buffer!!!!
6214                          * The max buffer is our "snapshot" buffer.
6215                          * When a tracer needs a snapshot (one of the
6216                          * latency tracers), it swaps the max buffer
6217                          * with the saved snap shot. We succeeded to
6218                          * update the size of the main buffer, but failed to
6219                          * update the size of the max buffer. But when we tried
6220                          * to reset the main buffer to the original size, we
6221                          * failed there too. This is very unlikely to
6222                          * happen, but if it does, warn and kill all
6223                          * tracing.
6224                          */
6225                         WARN_ON(1);
6226                         tracing_disabled = 1;
6227                 }
6228                 return ret;
6229         }
6230
6231         if (cpu == RING_BUFFER_ALL_CPUS)
6232                 set_buffer_entries(&tr->max_buffer, size);
6233         else
6234                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6235
6236  out:
6237 #endif /* CONFIG_TRACER_MAX_TRACE */
6238
6239         if (cpu == RING_BUFFER_ALL_CPUS)
6240                 set_buffer_entries(&tr->array_buffer, size);
6241         else
6242                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6243
6244         return ret;
6245 }
6246
6247 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6248                                   unsigned long size, int cpu_id)
6249 {
6250         int ret;
6251
6252         mutex_lock(&trace_types_lock);
6253
6254         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6255                 /* make sure, this cpu is enabled in the mask */
6256                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6257                         ret = -EINVAL;
6258                         goto out;
6259                 }
6260         }
6261
6262         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6263         if (ret < 0)
6264                 ret = -ENOMEM;
6265
6266 out:
6267         mutex_unlock(&trace_types_lock);
6268
6269         return ret;
6270 }
6271
6272
6273 /**
6274  * tracing_update_buffers - used by tracing facility to expand ring buffers
6275  *
6276  * To save on memory when the tracing is never used on a system with it
6277  * configured in. The ring buffers are set to a minimum size. But once
6278  * a user starts to use the tracing facility, then they need to grow
6279  * to their default size.
6280  *
6281  * This function is to be called when a tracer is about to be used.
6282  */
6283 int tracing_update_buffers(void)
6284 {
6285         int ret = 0;
6286
6287         mutex_lock(&trace_types_lock);
6288         if (!ring_buffer_expanded)
6289                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6290                                                 RING_BUFFER_ALL_CPUS);
6291         mutex_unlock(&trace_types_lock);
6292
6293         return ret;
6294 }
6295
6296 struct trace_option_dentry;
6297
6298 static void
6299 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6300
6301 /*
6302  * Used to clear out the tracer before deletion of an instance.
6303  * Must have trace_types_lock held.
6304  */
6305 static void tracing_set_nop(struct trace_array *tr)
6306 {
6307         if (tr->current_trace == &nop_trace)
6308                 return;
6309         
6310         tr->current_trace->enabled--;
6311
6312         if (tr->current_trace->reset)
6313                 tr->current_trace->reset(tr);
6314
6315         tr->current_trace = &nop_trace;
6316 }
6317
6318 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6319 {
6320         /* Only enable if the directory has been created already. */
6321         if (!tr->dir)
6322                 return;
6323
6324         create_trace_option_files(tr, t);
6325 }
6326
6327 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6328 {
6329         struct tracer *t;
6330 #ifdef CONFIG_TRACER_MAX_TRACE
6331         bool had_max_tr;
6332 #endif
6333         int ret = 0;
6334
6335         mutex_lock(&trace_types_lock);
6336
6337         if (!ring_buffer_expanded) {
6338                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6339                                                 RING_BUFFER_ALL_CPUS);
6340                 if (ret < 0)
6341                         goto out;
6342                 ret = 0;
6343         }
6344
6345         for (t = trace_types; t; t = t->next) {
6346                 if (strcmp(t->name, buf) == 0)
6347                         break;
6348         }
6349         if (!t) {
6350                 ret = -EINVAL;
6351                 goto out;
6352         }
6353         if (t == tr->current_trace)
6354                 goto out;
6355
6356 #ifdef CONFIG_TRACER_SNAPSHOT
6357         if (t->use_max_tr) {
6358                 arch_spin_lock(&tr->max_lock);
6359                 if (tr->cond_snapshot)
6360                         ret = -EBUSY;
6361                 arch_spin_unlock(&tr->max_lock);
6362                 if (ret)
6363                         goto out;
6364         }
6365 #endif
6366         /* Some tracers won't work on kernel command line */
6367         if (system_state < SYSTEM_RUNNING && t->noboot) {
6368                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6369                         t->name);
6370                 goto out;
6371         }
6372
6373         /* Some tracers are only allowed for the top level buffer */
6374         if (!trace_ok_for_array(t, tr)) {
6375                 ret = -EINVAL;
6376                 goto out;
6377         }
6378
6379         /* If trace pipe files are being read, we can't change the tracer */
6380         if (tr->trace_ref) {
6381                 ret = -EBUSY;
6382                 goto out;
6383         }
6384
6385         trace_branch_disable();
6386
6387         tr->current_trace->enabled--;
6388
6389         if (tr->current_trace->reset)
6390                 tr->current_trace->reset(tr);
6391
6392         /* Current trace needs to be nop_trace before synchronize_rcu */
6393         tr->current_trace = &nop_trace;
6394
6395 #ifdef CONFIG_TRACER_MAX_TRACE
6396         had_max_tr = tr->allocated_snapshot;
6397
6398         if (had_max_tr && !t->use_max_tr) {
6399                 /*
6400                  * We need to make sure that the update_max_tr sees that
6401                  * current_trace changed to nop_trace to keep it from
6402                  * swapping the buffers after we resize it.
6403                  * The update_max_tr is called from interrupts disabled
6404                  * so a synchronized_sched() is sufficient.
6405                  */
6406                 synchronize_rcu();
6407                 free_snapshot(tr);
6408         }
6409 #endif
6410
6411 #ifdef CONFIG_TRACER_MAX_TRACE
6412         if (t->use_max_tr && !had_max_tr) {
6413                 ret = tracing_alloc_snapshot_instance(tr);
6414                 if (ret < 0)
6415                         goto out;
6416         }
6417 #endif
6418
6419         if (t->init) {
6420                 ret = tracer_init(t, tr);
6421                 if (ret)
6422                         goto out;
6423         }
6424
6425         tr->current_trace = t;
6426         tr->current_trace->enabled++;
6427         trace_branch_enable(tr);
6428  out:
6429         mutex_unlock(&trace_types_lock);
6430
6431         return ret;
6432 }
6433
6434 static ssize_t
6435 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6436                         size_t cnt, loff_t *ppos)
6437 {
6438         struct trace_array *tr = filp->private_data;
6439         char buf[MAX_TRACER_SIZE+1];
6440         int i;
6441         size_t ret;
6442         int err;
6443
6444         ret = cnt;
6445
6446         if (cnt > MAX_TRACER_SIZE)
6447                 cnt = MAX_TRACER_SIZE;
6448
6449         if (copy_from_user(buf, ubuf, cnt))
6450                 return -EFAULT;
6451
6452         buf[cnt] = 0;
6453
6454         /* strip ending whitespace. */
6455         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6456                 buf[i] = 0;
6457
6458         err = tracing_set_tracer(tr, buf);
6459         if (err)
6460                 return err;
6461
6462         *ppos += ret;
6463
6464         return ret;
6465 }
6466
6467 static ssize_t
6468 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6469                    size_t cnt, loff_t *ppos)
6470 {
6471         char buf[64];
6472         int r;
6473
6474         r = snprintf(buf, sizeof(buf), "%ld\n",
6475                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6476         if (r > sizeof(buf))
6477                 r = sizeof(buf);
6478         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6479 }
6480
6481 static ssize_t
6482 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6483                     size_t cnt, loff_t *ppos)
6484 {
6485         unsigned long val;
6486         int ret;
6487
6488         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6489         if (ret)
6490                 return ret;
6491
6492         *ptr = val * 1000;
6493
6494         return cnt;
6495 }
6496
6497 static ssize_t
6498 tracing_thresh_read(struct file *filp, char __user *ubuf,
6499                     size_t cnt, loff_t *ppos)
6500 {
6501         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6502 }
6503
6504 static ssize_t
6505 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6506                      size_t cnt, loff_t *ppos)
6507 {
6508         struct trace_array *tr = filp->private_data;
6509         int ret;
6510
6511         mutex_lock(&trace_types_lock);
6512         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6513         if (ret < 0)
6514                 goto out;
6515
6516         if (tr->current_trace->update_thresh) {
6517                 ret = tr->current_trace->update_thresh(tr);
6518                 if (ret < 0)
6519                         goto out;
6520         }
6521
6522         ret = cnt;
6523 out:
6524         mutex_unlock(&trace_types_lock);
6525
6526         return ret;
6527 }
6528
6529 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6530
6531 static ssize_t
6532 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6533                      size_t cnt, loff_t *ppos)
6534 {
6535         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6536 }
6537
6538 static ssize_t
6539 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6540                       size_t cnt, loff_t *ppos)
6541 {
6542         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6543 }
6544
6545 #endif
6546
6547 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6548 {
6549         struct trace_array *tr = inode->i_private;
6550         struct trace_iterator *iter;
6551         int ret;
6552
6553         ret = tracing_check_open_get_tr(tr);
6554         if (ret)
6555                 return ret;
6556
6557         mutex_lock(&trace_types_lock);
6558
6559         /* create a buffer to store the information to pass to userspace */
6560         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6561         if (!iter) {
6562                 ret = -ENOMEM;
6563                 __trace_array_put(tr);
6564                 goto out;
6565         }
6566
6567         trace_seq_init(&iter->seq);
6568         iter->trace = tr->current_trace;
6569
6570         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6571                 ret = -ENOMEM;
6572                 goto fail;
6573         }
6574
6575         /* trace pipe does not show start of buffer */
6576         cpumask_setall(iter->started);
6577
6578         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6579                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6580
6581         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6582         if (trace_clocks[tr->clock_id].in_ns)
6583                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6584
6585         iter->tr = tr;
6586         iter->array_buffer = &tr->array_buffer;
6587         iter->cpu_file = tracing_get_cpu(inode);
6588         mutex_init(&iter->mutex);
6589         filp->private_data = iter;
6590
6591         if (iter->trace->pipe_open)
6592                 iter->trace->pipe_open(iter);
6593
6594         nonseekable_open(inode, filp);
6595
6596         tr->trace_ref++;
6597 out:
6598         mutex_unlock(&trace_types_lock);
6599         return ret;
6600
6601 fail:
6602         kfree(iter);
6603         __trace_array_put(tr);
6604         mutex_unlock(&trace_types_lock);
6605         return ret;
6606 }
6607
6608 static int tracing_release_pipe(struct inode *inode, struct file *file)
6609 {
6610         struct trace_iterator *iter = file->private_data;
6611         struct trace_array *tr = inode->i_private;
6612
6613         mutex_lock(&trace_types_lock);
6614
6615         tr->trace_ref--;
6616
6617         if (iter->trace->pipe_close)
6618                 iter->trace->pipe_close(iter);
6619
6620         mutex_unlock(&trace_types_lock);
6621
6622         free_cpumask_var(iter->started);
6623         mutex_destroy(&iter->mutex);
6624         kfree(iter);
6625
6626         trace_array_put(tr);
6627
6628         return 0;
6629 }
6630
6631 static __poll_t
6632 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6633 {
6634         struct trace_array *tr = iter->tr;
6635
6636         /* Iterators are static, they should be filled or empty */
6637         if (trace_buffer_iter(iter, iter->cpu_file))
6638                 return EPOLLIN | EPOLLRDNORM;
6639
6640         if (tr->trace_flags & TRACE_ITER_BLOCK)
6641                 /*
6642                  * Always select as readable when in blocking mode
6643                  */
6644                 return EPOLLIN | EPOLLRDNORM;
6645         else
6646                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6647                                              filp, poll_table);
6648 }
6649
6650 static __poll_t
6651 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6652 {
6653         struct trace_iterator *iter = filp->private_data;
6654
6655         return trace_poll(iter, filp, poll_table);
6656 }
6657
6658 /* Must be called with iter->mutex held. */
6659 static int tracing_wait_pipe(struct file *filp)
6660 {
6661         struct trace_iterator *iter = filp->private_data;
6662         int ret;
6663
6664         while (trace_empty(iter)) {
6665
6666                 if ((filp->f_flags & O_NONBLOCK)) {
6667                         return -EAGAIN;
6668                 }
6669
6670                 /*
6671                  * We block until we read something and tracing is disabled.
6672                  * We still block if tracing is disabled, but we have never
6673                  * read anything. This allows a user to cat this file, and
6674                  * then enable tracing. But after we have read something,
6675                  * we give an EOF when tracing is again disabled.
6676                  *
6677                  * iter->pos will be 0 if we haven't read anything.
6678                  */
6679                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6680                         break;
6681
6682                 mutex_unlock(&iter->mutex);
6683
6684                 ret = wait_on_pipe(iter, 0);
6685
6686                 mutex_lock(&iter->mutex);
6687
6688                 if (ret)
6689                         return ret;
6690         }
6691
6692         return 1;
6693 }
6694
6695 /*
6696  * Consumer reader.
6697  */
6698 static ssize_t
6699 tracing_read_pipe(struct file *filp, char __user *ubuf,
6700                   size_t cnt, loff_t *ppos)
6701 {
6702         struct trace_iterator *iter = filp->private_data;
6703         ssize_t sret;
6704
6705         /*
6706          * Avoid more than one consumer on a single file descriptor
6707          * This is just a matter of traces coherency, the ring buffer itself
6708          * is protected.
6709          */
6710         mutex_lock(&iter->mutex);
6711
6712         /* return any leftover data */
6713         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6714         if (sret != -EBUSY)
6715                 goto out;
6716
6717         trace_seq_init(&iter->seq);
6718
6719         if (iter->trace->read) {
6720                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6721                 if (sret)
6722                         goto out;
6723         }
6724
6725 waitagain:
6726         sret = tracing_wait_pipe(filp);
6727         if (sret <= 0)
6728                 goto out;
6729
6730         /* stop when tracing is finished */
6731         if (trace_empty(iter)) {
6732                 sret = 0;
6733                 goto out;
6734         }
6735
6736         if (cnt >= PAGE_SIZE)
6737                 cnt = PAGE_SIZE - 1;
6738
6739         /* reset all but tr, trace, and overruns */
6740         trace_iterator_reset(iter);
6741         cpumask_clear(iter->started);
6742         trace_seq_init(&iter->seq);
6743
6744         trace_event_read_lock();
6745         trace_access_lock(iter->cpu_file);
6746         while (trace_find_next_entry_inc(iter) != NULL) {
6747                 enum print_line_t ret;
6748                 int save_len = iter->seq.seq.len;
6749
6750                 ret = print_trace_line(iter);
6751                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6752                         /* don't print partial lines */
6753                         iter->seq.seq.len = save_len;
6754                         break;
6755                 }
6756                 if (ret != TRACE_TYPE_NO_CONSUME)
6757                         trace_consume(iter);
6758
6759                 if (trace_seq_used(&iter->seq) >= cnt)
6760                         break;
6761
6762                 /*
6763                  * Setting the full flag means we reached the trace_seq buffer
6764                  * size and we should leave by partial output condition above.
6765                  * One of the trace_seq_* functions is not used properly.
6766                  */
6767                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6768                           iter->ent->type);
6769         }
6770         trace_access_unlock(iter->cpu_file);
6771         trace_event_read_unlock();
6772
6773         /* Now copy what we have to the user */
6774         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6775         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6776                 trace_seq_init(&iter->seq);
6777
6778         /*
6779          * If there was nothing to send to user, in spite of consuming trace
6780          * entries, go back to wait for more entries.
6781          */
6782         if (sret == -EBUSY)
6783                 goto waitagain;
6784
6785 out:
6786         mutex_unlock(&iter->mutex);
6787
6788         return sret;
6789 }
6790
6791 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6792                                      unsigned int idx)
6793 {
6794         __free_page(spd->pages[idx]);
6795 }
6796
6797 static size_t
6798 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6799 {
6800         size_t count;
6801         int save_len;
6802         int ret;
6803
6804         /* Seq buffer is page-sized, exactly what we need. */
6805         for (;;) {
6806                 save_len = iter->seq.seq.len;
6807                 ret = print_trace_line(iter);
6808
6809                 if (trace_seq_has_overflowed(&iter->seq)) {
6810                         iter->seq.seq.len = save_len;
6811                         break;
6812                 }
6813
6814                 /*
6815                  * This should not be hit, because it should only
6816                  * be set if the iter->seq overflowed. But check it
6817                  * anyway to be safe.
6818                  */
6819                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6820                         iter->seq.seq.len = save_len;
6821                         break;
6822                 }
6823
6824                 count = trace_seq_used(&iter->seq) - save_len;
6825                 if (rem < count) {
6826                         rem = 0;
6827                         iter->seq.seq.len = save_len;
6828                         break;
6829                 }
6830
6831                 if (ret != TRACE_TYPE_NO_CONSUME)
6832                         trace_consume(iter);
6833                 rem -= count;
6834                 if (!trace_find_next_entry_inc(iter))   {
6835                         rem = 0;
6836                         iter->ent = NULL;
6837                         break;
6838                 }
6839         }
6840
6841         return rem;
6842 }
6843
6844 static ssize_t tracing_splice_read_pipe(struct file *filp,
6845                                         loff_t *ppos,
6846                                         struct pipe_inode_info *pipe,
6847                                         size_t len,
6848                                         unsigned int flags)
6849 {
6850         struct page *pages_def[PIPE_DEF_BUFFERS];
6851         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6852         struct trace_iterator *iter = filp->private_data;
6853         struct splice_pipe_desc spd = {
6854                 .pages          = pages_def,
6855                 .partial        = partial_def,
6856                 .nr_pages       = 0, /* This gets updated below. */
6857                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6858                 .ops            = &default_pipe_buf_ops,
6859                 .spd_release    = tracing_spd_release_pipe,
6860         };
6861         ssize_t ret;
6862         size_t rem;
6863         unsigned int i;
6864
6865         if (splice_grow_spd(pipe, &spd))
6866                 return -ENOMEM;
6867
6868         mutex_lock(&iter->mutex);
6869
6870         if (iter->trace->splice_read) {
6871                 ret = iter->trace->splice_read(iter, filp,
6872                                                ppos, pipe, len, flags);
6873                 if (ret)
6874                         goto out_err;
6875         }
6876
6877         ret = tracing_wait_pipe(filp);
6878         if (ret <= 0)
6879                 goto out_err;
6880
6881         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6882                 ret = -EFAULT;
6883                 goto out_err;
6884         }
6885
6886         trace_event_read_lock();
6887         trace_access_lock(iter->cpu_file);
6888
6889         /* Fill as many pages as possible. */
6890         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6891                 spd.pages[i] = alloc_page(GFP_KERNEL);
6892                 if (!spd.pages[i])
6893                         break;
6894
6895                 rem = tracing_fill_pipe_page(rem, iter);
6896
6897                 /* Copy the data into the page, so we can start over. */
6898                 ret = trace_seq_to_buffer(&iter->seq,
6899                                           page_address(spd.pages[i]),
6900                                           trace_seq_used(&iter->seq));
6901                 if (ret < 0) {
6902                         __free_page(spd.pages[i]);
6903                         break;
6904                 }
6905                 spd.partial[i].offset = 0;
6906                 spd.partial[i].len = trace_seq_used(&iter->seq);
6907
6908                 trace_seq_init(&iter->seq);
6909         }
6910
6911         trace_access_unlock(iter->cpu_file);
6912         trace_event_read_unlock();
6913         mutex_unlock(&iter->mutex);
6914
6915         spd.nr_pages = i;
6916
6917         if (i)
6918                 ret = splice_to_pipe(pipe, &spd);
6919         else
6920                 ret = 0;
6921 out:
6922         splice_shrink_spd(&spd);
6923         return ret;
6924
6925 out_err:
6926         mutex_unlock(&iter->mutex);
6927         goto out;
6928 }
6929
6930 static ssize_t
6931 tracing_entries_read(struct file *filp, char __user *ubuf,
6932                      size_t cnt, loff_t *ppos)
6933 {
6934         struct inode *inode = file_inode(filp);
6935         struct trace_array *tr = inode->i_private;
6936         int cpu = tracing_get_cpu(inode);
6937         char buf[64];
6938         int r = 0;
6939         ssize_t ret;
6940
6941         mutex_lock(&trace_types_lock);
6942
6943         if (cpu == RING_BUFFER_ALL_CPUS) {
6944                 int cpu, buf_size_same;
6945                 unsigned long size;
6946
6947                 size = 0;
6948                 buf_size_same = 1;
6949                 /* check if all cpu sizes are same */
6950                 for_each_tracing_cpu(cpu) {
6951                         /* fill in the size from first enabled cpu */
6952                         if (size == 0)
6953                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6954                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6955                                 buf_size_same = 0;
6956                                 break;
6957                         }
6958                 }
6959
6960                 if (buf_size_same) {
6961                         if (!ring_buffer_expanded)
6962                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6963                                             size >> 10,
6964                                             trace_buf_size >> 10);
6965                         else
6966                                 r = sprintf(buf, "%lu\n", size >> 10);
6967                 } else
6968                         r = sprintf(buf, "X\n");
6969         } else
6970                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6971
6972         mutex_unlock(&trace_types_lock);
6973
6974         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6975         return ret;
6976 }
6977
6978 static ssize_t
6979 tracing_entries_write(struct file *filp, const char __user *ubuf,
6980                       size_t cnt, loff_t *ppos)
6981 {
6982         struct inode *inode = file_inode(filp);
6983         struct trace_array *tr = inode->i_private;
6984         unsigned long val;
6985         int ret;
6986
6987         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6988         if (ret)
6989                 return ret;
6990
6991         /* must have at least 1 entry */
6992         if (!val)
6993                 return -EINVAL;
6994
6995         /* value is in KB */
6996         val <<= 10;
6997         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6998         if (ret < 0)
6999                 return ret;
7000
7001         *ppos += cnt;
7002
7003         return cnt;
7004 }
7005
7006 static ssize_t
7007 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7008                                 size_t cnt, loff_t *ppos)
7009 {
7010         struct trace_array *tr = filp->private_data;
7011         char buf[64];
7012         int r, cpu;
7013         unsigned long size = 0, expanded_size = 0;
7014
7015         mutex_lock(&trace_types_lock);
7016         for_each_tracing_cpu(cpu) {
7017                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7018                 if (!ring_buffer_expanded)
7019                         expanded_size += trace_buf_size >> 10;
7020         }
7021         if (ring_buffer_expanded)
7022                 r = sprintf(buf, "%lu\n", size);
7023         else
7024                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7025         mutex_unlock(&trace_types_lock);
7026
7027         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7028 }
7029
7030 static ssize_t
7031 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7032                           size_t cnt, loff_t *ppos)
7033 {
7034         /*
7035          * There is no need to read what the user has written, this function
7036          * is just to make sure that there is no error when "echo" is used
7037          */
7038
7039         *ppos += cnt;
7040
7041         return cnt;
7042 }
7043
7044 static int
7045 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7046 {
7047         struct trace_array *tr = inode->i_private;
7048
7049         /* disable tracing ? */
7050         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7051                 tracer_tracing_off(tr);
7052         /* resize the ring buffer to 0 */
7053         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7054
7055         trace_array_put(tr);
7056
7057         return 0;
7058 }
7059
7060 static ssize_t
7061 tracing_mark_write(struct file *filp, const char __user *ubuf,
7062                                         size_t cnt, loff_t *fpos)
7063 {
7064         struct trace_array *tr = filp->private_data;
7065         struct ring_buffer_event *event;
7066         enum event_trigger_type tt = ETT_NONE;
7067         struct trace_buffer *buffer;
7068         struct print_entry *entry;
7069         ssize_t written;
7070         int size;
7071         int len;
7072
7073 /* Used in tracing_mark_raw_write() as well */
7074 #define FAULTED_STR "<faulted>"
7075 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7076
7077         if (tracing_disabled)
7078                 return -EINVAL;
7079
7080         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7081                 return -EINVAL;
7082
7083         if (cnt > TRACE_BUF_SIZE)
7084                 cnt = TRACE_BUF_SIZE;
7085
7086         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7087
7088         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7089
7090         /* If less than "<faulted>", then make sure we can still add that */
7091         if (cnt < FAULTED_SIZE)
7092                 size += FAULTED_SIZE - cnt;
7093
7094         buffer = tr->array_buffer.buffer;
7095         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7096                                             tracing_gen_ctx());
7097         if (unlikely(!event))
7098                 /* Ring buffer disabled, return as if not open for write */
7099                 return -EBADF;
7100
7101         entry = ring_buffer_event_data(event);
7102         entry->ip = _THIS_IP_;
7103
7104         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7105         if (len) {
7106                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7107                 cnt = FAULTED_SIZE;
7108                 written = -EFAULT;
7109         } else
7110                 written = cnt;
7111
7112         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7113                 /* do not add \n before testing triggers, but add \0 */
7114                 entry->buf[cnt] = '\0';
7115                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7116         }
7117
7118         if (entry->buf[cnt - 1] != '\n') {
7119                 entry->buf[cnt] = '\n';
7120                 entry->buf[cnt + 1] = '\0';
7121         } else
7122                 entry->buf[cnt] = '\0';
7123
7124         if (static_branch_unlikely(&trace_marker_exports_enabled))
7125                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7126         __buffer_unlock_commit(buffer, event);
7127
7128         if (tt)
7129                 event_triggers_post_call(tr->trace_marker_file, tt);
7130
7131         return written;
7132 }
7133
7134 /* Limit it for now to 3K (including tag) */
7135 #define RAW_DATA_MAX_SIZE (1024*3)
7136
7137 static ssize_t
7138 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7139                                         size_t cnt, loff_t *fpos)
7140 {
7141         struct trace_array *tr = filp->private_data;
7142         struct ring_buffer_event *event;
7143         struct trace_buffer *buffer;
7144         struct raw_data_entry *entry;
7145         ssize_t written;
7146         int size;
7147         int len;
7148
7149 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7150
7151         if (tracing_disabled)
7152                 return -EINVAL;
7153
7154         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7155                 return -EINVAL;
7156
7157         /* The marker must at least have a tag id */
7158         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7159                 return -EINVAL;
7160
7161         if (cnt > TRACE_BUF_SIZE)
7162                 cnt = TRACE_BUF_SIZE;
7163
7164         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7165
7166         size = sizeof(*entry) + cnt;
7167         if (cnt < FAULT_SIZE_ID)
7168                 size += FAULT_SIZE_ID - cnt;
7169
7170         buffer = tr->array_buffer.buffer;
7171         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7172                                             tracing_gen_ctx());
7173         if (!event)
7174                 /* Ring buffer disabled, return as if not open for write */
7175                 return -EBADF;
7176
7177         entry = ring_buffer_event_data(event);
7178
7179         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7180         if (len) {
7181                 entry->id = -1;
7182                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7183                 written = -EFAULT;
7184         } else
7185                 written = cnt;
7186
7187         __buffer_unlock_commit(buffer, event);
7188
7189         return written;
7190 }
7191
7192 static int tracing_clock_show(struct seq_file *m, void *v)
7193 {
7194         struct trace_array *tr = m->private;
7195         int i;
7196
7197         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7198                 seq_printf(m,
7199                         "%s%s%s%s", i ? " " : "",
7200                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7201                         i == tr->clock_id ? "]" : "");
7202         seq_putc(m, '\n');
7203
7204         return 0;
7205 }
7206
7207 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7208 {
7209         int i;
7210
7211         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7212                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7213                         break;
7214         }
7215         if (i == ARRAY_SIZE(trace_clocks))
7216                 return -EINVAL;
7217
7218         mutex_lock(&trace_types_lock);
7219
7220         tr->clock_id = i;
7221
7222         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7223
7224         /*
7225          * New clock may not be consistent with the previous clock.
7226          * Reset the buffer so that it doesn't have incomparable timestamps.
7227          */
7228         tracing_reset_online_cpus(&tr->array_buffer);
7229
7230 #ifdef CONFIG_TRACER_MAX_TRACE
7231         if (tr->max_buffer.buffer)
7232                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7233         tracing_reset_online_cpus(&tr->max_buffer);
7234 #endif
7235
7236         mutex_unlock(&trace_types_lock);
7237
7238         return 0;
7239 }
7240
7241 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7242                                    size_t cnt, loff_t *fpos)
7243 {
7244         struct seq_file *m = filp->private_data;
7245         struct trace_array *tr = m->private;
7246         char buf[64];
7247         const char *clockstr;
7248         int ret;
7249
7250         if (cnt >= sizeof(buf))
7251                 return -EINVAL;
7252
7253         if (copy_from_user(buf, ubuf, cnt))
7254                 return -EFAULT;
7255
7256         buf[cnt] = 0;
7257
7258         clockstr = strstrip(buf);
7259
7260         ret = tracing_set_clock(tr, clockstr);
7261         if (ret)
7262                 return ret;
7263
7264         *fpos += cnt;
7265
7266         return cnt;
7267 }
7268
7269 static int tracing_clock_open(struct inode *inode, struct file *file)
7270 {
7271         struct trace_array *tr = inode->i_private;
7272         int ret;
7273
7274         ret = tracing_check_open_get_tr(tr);
7275         if (ret)
7276                 return ret;
7277
7278         ret = single_open(file, tracing_clock_show, inode->i_private);
7279         if (ret < 0)
7280                 trace_array_put(tr);
7281
7282         return ret;
7283 }
7284
7285 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7286 {
7287         struct trace_array *tr = m->private;
7288
7289         mutex_lock(&trace_types_lock);
7290
7291         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7292                 seq_puts(m, "delta [absolute]\n");
7293         else
7294                 seq_puts(m, "[delta] absolute\n");
7295
7296         mutex_unlock(&trace_types_lock);
7297
7298         return 0;
7299 }
7300
7301 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7302 {
7303         struct trace_array *tr = inode->i_private;
7304         int ret;
7305
7306         ret = tracing_check_open_get_tr(tr);
7307         if (ret)
7308                 return ret;
7309
7310         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7311         if (ret < 0)
7312                 trace_array_put(tr);
7313
7314         return ret;
7315 }
7316
7317 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7318 {
7319         if (rbe == this_cpu_read(trace_buffered_event))
7320                 return ring_buffer_time_stamp(buffer);
7321
7322         return ring_buffer_event_time_stamp(buffer, rbe);
7323 }
7324
7325 /*
7326  * Set or disable using the per CPU trace_buffer_event when possible.
7327  */
7328 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7329 {
7330         int ret = 0;
7331
7332         mutex_lock(&trace_types_lock);
7333
7334         if (set && tr->no_filter_buffering_ref++)
7335                 goto out;
7336
7337         if (!set) {
7338                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7339                         ret = -EINVAL;
7340                         goto out;
7341                 }
7342
7343                 --tr->no_filter_buffering_ref;
7344         }
7345  out:
7346         mutex_unlock(&trace_types_lock);
7347
7348         return ret;
7349 }
7350
7351 struct ftrace_buffer_info {
7352         struct trace_iterator   iter;
7353         void                    *spare;
7354         unsigned int            spare_cpu;
7355         unsigned int            read;
7356 };
7357
7358 #ifdef CONFIG_TRACER_SNAPSHOT
7359 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7360 {
7361         struct trace_array *tr = inode->i_private;
7362         struct trace_iterator *iter;
7363         struct seq_file *m;
7364         int ret;
7365
7366         ret = tracing_check_open_get_tr(tr);
7367         if (ret)
7368                 return ret;
7369
7370         if (file->f_mode & FMODE_READ) {
7371                 iter = __tracing_open(inode, file, true);
7372                 if (IS_ERR(iter))
7373                         ret = PTR_ERR(iter);
7374         } else {
7375                 /* Writes still need the seq_file to hold the private data */
7376                 ret = -ENOMEM;
7377                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7378                 if (!m)
7379                         goto out;
7380                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7381                 if (!iter) {
7382                         kfree(m);
7383                         goto out;
7384                 }
7385                 ret = 0;
7386
7387                 iter->tr = tr;
7388                 iter->array_buffer = &tr->max_buffer;
7389                 iter->cpu_file = tracing_get_cpu(inode);
7390                 m->private = iter;
7391                 file->private_data = m;
7392         }
7393 out:
7394         if (ret < 0)
7395                 trace_array_put(tr);
7396
7397         return ret;
7398 }
7399
7400 static ssize_t
7401 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7402                        loff_t *ppos)
7403 {
7404         struct seq_file *m = filp->private_data;
7405         struct trace_iterator *iter = m->private;
7406         struct trace_array *tr = iter->tr;
7407         unsigned long val;
7408         int ret;
7409
7410         ret = tracing_update_buffers();
7411         if (ret < 0)
7412                 return ret;
7413
7414         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7415         if (ret)
7416                 return ret;
7417
7418         mutex_lock(&trace_types_lock);
7419
7420         if (tr->current_trace->use_max_tr) {
7421                 ret = -EBUSY;
7422                 goto out;
7423         }
7424
7425         arch_spin_lock(&tr->max_lock);
7426         if (tr->cond_snapshot)
7427                 ret = -EBUSY;
7428         arch_spin_unlock(&tr->max_lock);
7429         if (ret)
7430                 goto out;
7431
7432         switch (val) {
7433         case 0:
7434                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7435                         ret = -EINVAL;
7436                         break;
7437                 }
7438                 if (tr->allocated_snapshot)
7439                         free_snapshot(tr);
7440                 break;
7441         case 1:
7442 /* Only allow per-cpu swap if the ring buffer supports it */
7443 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7444                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7445                         ret = -EINVAL;
7446                         break;
7447                 }
7448 #endif
7449                 if (tr->allocated_snapshot)
7450                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7451                                         &tr->array_buffer, iter->cpu_file);
7452                 else
7453                         ret = tracing_alloc_snapshot_instance(tr);
7454                 if (ret < 0)
7455                         break;
7456                 local_irq_disable();
7457                 /* Now, we're going to swap */
7458                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7459                         update_max_tr(tr, current, smp_processor_id(), NULL);
7460                 else
7461                         update_max_tr_single(tr, current, iter->cpu_file);
7462                 local_irq_enable();
7463                 break;
7464         default:
7465                 if (tr->allocated_snapshot) {
7466                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7467                                 tracing_reset_online_cpus(&tr->max_buffer);
7468                         else
7469                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7470                 }
7471                 break;
7472         }
7473
7474         if (ret >= 0) {
7475                 *ppos += cnt;
7476                 ret = cnt;
7477         }
7478 out:
7479         mutex_unlock(&trace_types_lock);
7480         return ret;
7481 }
7482
7483 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7484 {
7485         struct seq_file *m = file->private_data;
7486         int ret;
7487
7488         ret = tracing_release(inode, file);
7489
7490         if (file->f_mode & FMODE_READ)
7491                 return ret;
7492
7493         /* If write only, the seq_file is just a stub */
7494         if (m)
7495                 kfree(m->private);
7496         kfree(m);
7497
7498         return 0;
7499 }
7500
7501 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7502 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7503                                     size_t count, loff_t *ppos);
7504 static int tracing_buffers_release(struct inode *inode, struct file *file);
7505 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7506                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7507
7508 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7509 {
7510         struct ftrace_buffer_info *info;
7511         int ret;
7512
7513         /* The following checks for tracefs lockdown */
7514         ret = tracing_buffers_open(inode, filp);
7515         if (ret < 0)
7516                 return ret;
7517
7518         info = filp->private_data;
7519
7520         if (info->iter.trace->use_max_tr) {
7521                 tracing_buffers_release(inode, filp);
7522                 return -EBUSY;
7523         }
7524
7525         info->iter.snapshot = true;
7526         info->iter.array_buffer = &info->iter.tr->max_buffer;
7527
7528         return ret;
7529 }
7530
7531 #endif /* CONFIG_TRACER_SNAPSHOT */
7532
7533
7534 static const struct file_operations tracing_thresh_fops = {
7535         .open           = tracing_open_generic,
7536         .read           = tracing_thresh_read,
7537         .write          = tracing_thresh_write,
7538         .llseek         = generic_file_llseek,
7539 };
7540
7541 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7542 static const struct file_operations tracing_max_lat_fops = {
7543         .open           = tracing_open_generic,
7544         .read           = tracing_max_lat_read,
7545         .write          = tracing_max_lat_write,
7546         .llseek         = generic_file_llseek,
7547 };
7548 #endif
7549
7550 static const struct file_operations set_tracer_fops = {
7551         .open           = tracing_open_generic,
7552         .read           = tracing_set_trace_read,
7553         .write          = tracing_set_trace_write,
7554         .llseek         = generic_file_llseek,
7555 };
7556
7557 static const struct file_operations tracing_pipe_fops = {
7558         .open           = tracing_open_pipe,
7559         .poll           = tracing_poll_pipe,
7560         .read           = tracing_read_pipe,
7561         .splice_read    = tracing_splice_read_pipe,
7562         .release        = tracing_release_pipe,
7563         .llseek         = no_llseek,
7564 };
7565
7566 static const struct file_operations tracing_entries_fops = {
7567         .open           = tracing_open_generic_tr,
7568         .read           = tracing_entries_read,
7569         .write          = tracing_entries_write,
7570         .llseek         = generic_file_llseek,
7571         .release        = tracing_release_generic_tr,
7572 };
7573
7574 static const struct file_operations tracing_total_entries_fops = {
7575         .open           = tracing_open_generic_tr,
7576         .read           = tracing_total_entries_read,
7577         .llseek         = generic_file_llseek,
7578         .release        = tracing_release_generic_tr,
7579 };
7580
7581 static const struct file_operations tracing_free_buffer_fops = {
7582         .open           = tracing_open_generic_tr,
7583         .write          = tracing_free_buffer_write,
7584         .release        = tracing_free_buffer_release,
7585 };
7586
7587 static const struct file_operations tracing_mark_fops = {
7588         .open           = tracing_mark_open,
7589         .write          = tracing_mark_write,
7590         .release        = tracing_release_generic_tr,
7591 };
7592
7593 static const struct file_operations tracing_mark_raw_fops = {
7594         .open           = tracing_mark_open,
7595         .write          = tracing_mark_raw_write,
7596         .release        = tracing_release_generic_tr,
7597 };
7598
7599 static const struct file_operations trace_clock_fops = {
7600         .open           = tracing_clock_open,
7601         .read           = seq_read,
7602         .llseek         = seq_lseek,
7603         .release        = tracing_single_release_tr,
7604         .write          = tracing_clock_write,
7605 };
7606
7607 static const struct file_operations trace_time_stamp_mode_fops = {
7608         .open           = tracing_time_stamp_mode_open,
7609         .read           = seq_read,
7610         .llseek         = seq_lseek,
7611         .release        = tracing_single_release_tr,
7612 };
7613
7614 #ifdef CONFIG_TRACER_SNAPSHOT
7615 static const struct file_operations snapshot_fops = {
7616         .open           = tracing_snapshot_open,
7617         .read           = seq_read,
7618         .write          = tracing_snapshot_write,
7619         .llseek         = tracing_lseek,
7620         .release        = tracing_snapshot_release,
7621 };
7622
7623 static const struct file_operations snapshot_raw_fops = {
7624         .open           = snapshot_raw_open,
7625         .read           = tracing_buffers_read,
7626         .release        = tracing_buffers_release,
7627         .splice_read    = tracing_buffers_splice_read,
7628         .llseek         = no_llseek,
7629 };
7630
7631 #endif /* CONFIG_TRACER_SNAPSHOT */
7632
7633 /*
7634  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7635  * @filp: The active open file structure
7636  * @ubuf: The userspace provided buffer to read value into
7637  * @cnt: The maximum number of bytes to read
7638  * @ppos: The current "file" position
7639  *
7640  * This function implements the write interface for a struct trace_min_max_param.
7641  * The filp->private_data must point to a trace_min_max_param structure that
7642  * defines where to write the value, the min and the max acceptable values,
7643  * and a lock to protect the write.
7644  */
7645 static ssize_t
7646 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7647 {
7648         struct trace_min_max_param *param = filp->private_data;
7649         u64 val;
7650         int err;
7651
7652         if (!param)
7653                 return -EFAULT;
7654
7655         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7656         if (err)
7657                 return err;
7658
7659         if (param->lock)
7660                 mutex_lock(param->lock);
7661
7662         if (param->min && val < *param->min)
7663                 err = -EINVAL;
7664
7665         if (param->max && val > *param->max)
7666                 err = -EINVAL;
7667
7668         if (!err)
7669                 *param->val = val;
7670
7671         if (param->lock)
7672                 mutex_unlock(param->lock);
7673
7674         if (err)
7675                 return err;
7676
7677         return cnt;
7678 }
7679
7680 /*
7681  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7682  * @filp: The active open file structure
7683  * @ubuf: The userspace provided buffer to read value into
7684  * @cnt: The maximum number of bytes to read
7685  * @ppos: The current "file" position
7686  *
7687  * This function implements the read interface for a struct trace_min_max_param.
7688  * The filp->private_data must point to a trace_min_max_param struct with valid
7689  * data.
7690  */
7691 static ssize_t
7692 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7693 {
7694         struct trace_min_max_param *param = filp->private_data;
7695         char buf[U64_STR_SIZE];
7696         int len;
7697         u64 val;
7698
7699         if (!param)
7700                 return -EFAULT;
7701
7702         val = *param->val;
7703
7704         if (cnt > sizeof(buf))
7705                 cnt = sizeof(buf);
7706
7707         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7708
7709         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7710 }
7711
7712 const struct file_operations trace_min_max_fops = {
7713         .open           = tracing_open_generic,
7714         .read           = trace_min_max_read,
7715         .write          = trace_min_max_write,
7716 };
7717
7718 #define TRACING_LOG_ERRS_MAX    8
7719 #define TRACING_LOG_LOC_MAX     128
7720
7721 #define CMD_PREFIX "  Command: "
7722
7723 struct err_info {
7724         const char      **errs; /* ptr to loc-specific array of err strings */
7725         u8              type;   /* index into errs -> specific err string */
7726         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7727         u64             ts;
7728 };
7729
7730 struct tracing_log_err {
7731         struct list_head        list;
7732         struct err_info         info;
7733         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7734         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7735 };
7736
7737 static DEFINE_MUTEX(tracing_err_log_lock);
7738
7739 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7740 {
7741         struct tracing_log_err *err;
7742
7743         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7744                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7745                 if (!err)
7746                         err = ERR_PTR(-ENOMEM);
7747                 else
7748                         tr->n_err_log_entries++;
7749
7750                 return err;
7751         }
7752
7753         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7754         list_del(&err->list);
7755
7756         return err;
7757 }
7758
7759 /**
7760  * err_pos - find the position of a string within a command for error careting
7761  * @cmd: The tracing command that caused the error
7762  * @str: The string to position the caret at within @cmd
7763  *
7764  * Finds the position of the first occurrence of @str within @cmd.  The
7765  * return value can be passed to tracing_log_err() for caret placement
7766  * within @cmd.
7767  *
7768  * Returns the index within @cmd of the first occurrence of @str or 0
7769  * if @str was not found.
7770  */
7771 unsigned int err_pos(char *cmd, const char *str)
7772 {
7773         char *found;
7774
7775         if (WARN_ON(!strlen(cmd)))
7776                 return 0;
7777
7778         found = strstr(cmd, str);
7779         if (found)
7780                 return found - cmd;
7781
7782         return 0;
7783 }
7784
7785 /**
7786  * tracing_log_err - write an error to the tracing error log
7787  * @tr: The associated trace array for the error (NULL for top level array)
7788  * @loc: A string describing where the error occurred
7789  * @cmd: The tracing command that caused the error
7790  * @errs: The array of loc-specific static error strings
7791  * @type: The index into errs[], which produces the specific static err string
7792  * @pos: The position the caret should be placed in the cmd
7793  *
7794  * Writes an error into tracing/error_log of the form:
7795  *
7796  * <loc>: error: <text>
7797  *   Command: <cmd>
7798  *              ^
7799  *
7800  * tracing/error_log is a small log file containing the last
7801  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7802  * unless there has been a tracing error, and the error log can be
7803  * cleared and have its memory freed by writing the empty string in
7804  * truncation mode to it i.e. echo > tracing/error_log.
7805  *
7806  * NOTE: the @errs array along with the @type param are used to
7807  * produce a static error string - this string is not copied and saved
7808  * when the error is logged - only a pointer to it is saved.  See
7809  * existing callers for examples of how static strings are typically
7810  * defined for use with tracing_log_err().
7811  */
7812 void tracing_log_err(struct trace_array *tr,
7813                      const char *loc, const char *cmd,
7814                      const char **errs, u8 type, u8 pos)
7815 {
7816         struct tracing_log_err *err;
7817
7818         if (!tr)
7819                 tr = &global_trace;
7820
7821         mutex_lock(&tracing_err_log_lock);
7822         err = get_tracing_log_err(tr);
7823         if (PTR_ERR(err) == -ENOMEM) {
7824                 mutex_unlock(&tracing_err_log_lock);
7825                 return;
7826         }
7827
7828         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7829         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7830
7831         err->info.errs = errs;
7832         err->info.type = type;
7833         err->info.pos = pos;
7834         err->info.ts = local_clock();
7835
7836         list_add_tail(&err->list, &tr->err_log);
7837         mutex_unlock(&tracing_err_log_lock);
7838 }
7839
7840 static void clear_tracing_err_log(struct trace_array *tr)
7841 {
7842         struct tracing_log_err *err, *next;
7843
7844         mutex_lock(&tracing_err_log_lock);
7845         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7846                 list_del(&err->list);
7847                 kfree(err);
7848         }
7849
7850         tr->n_err_log_entries = 0;
7851         mutex_unlock(&tracing_err_log_lock);
7852 }
7853
7854 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7855 {
7856         struct trace_array *tr = m->private;
7857
7858         mutex_lock(&tracing_err_log_lock);
7859
7860         return seq_list_start(&tr->err_log, *pos);
7861 }
7862
7863 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7864 {
7865         struct trace_array *tr = m->private;
7866
7867         return seq_list_next(v, &tr->err_log, pos);
7868 }
7869
7870 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7871 {
7872         mutex_unlock(&tracing_err_log_lock);
7873 }
7874
7875 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7876 {
7877         u8 i;
7878
7879         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7880                 seq_putc(m, ' ');
7881         for (i = 0; i < pos; i++)
7882                 seq_putc(m, ' ');
7883         seq_puts(m, "^\n");
7884 }
7885
7886 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7887 {
7888         struct tracing_log_err *err = v;
7889
7890         if (err) {
7891                 const char *err_text = err->info.errs[err->info.type];
7892                 u64 sec = err->info.ts;
7893                 u32 nsec;
7894
7895                 nsec = do_div(sec, NSEC_PER_SEC);
7896                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7897                            err->loc, err_text);
7898                 seq_printf(m, "%s", err->cmd);
7899                 tracing_err_log_show_pos(m, err->info.pos);
7900         }
7901
7902         return 0;
7903 }
7904
7905 static const struct seq_operations tracing_err_log_seq_ops = {
7906         .start  = tracing_err_log_seq_start,
7907         .next   = tracing_err_log_seq_next,
7908         .stop   = tracing_err_log_seq_stop,
7909         .show   = tracing_err_log_seq_show
7910 };
7911
7912 static int tracing_err_log_open(struct inode *inode, struct file *file)
7913 {
7914         struct trace_array *tr = inode->i_private;
7915         int ret = 0;
7916
7917         ret = tracing_check_open_get_tr(tr);
7918         if (ret)
7919                 return ret;
7920
7921         /* If this file was opened for write, then erase contents */
7922         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7923                 clear_tracing_err_log(tr);
7924
7925         if (file->f_mode & FMODE_READ) {
7926                 ret = seq_open(file, &tracing_err_log_seq_ops);
7927                 if (!ret) {
7928                         struct seq_file *m = file->private_data;
7929                         m->private = tr;
7930                 } else {
7931                         trace_array_put(tr);
7932                 }
7933         }
7934         return ret;
7935 }
7936
7937 static ssize_t tracing_err_log_write(struct file *file,
7938                                      const char __user *buffer,
7939                                      size_t count, loff_t *ppos)
7940 {
7941         return count;
7942 }
7943
7944 static int tracing_err_log_release(struct inode *inode, struct file *file)
7945 {
7946         struct trace_array *tr = inode->i_private;
7947
7948         trace_array_put(tr);
7949
7950         if (file->f_mode & FMODE_READ)
7951                 seq_release(inode, file);
7952
7953         return 0;
7954 }
7955
7956 static const struct file_operations tracing_err_log_fops = {
7957         .open           = tracing_err_log_open,
7958         .write          = tracing_err_log_write,
7959         .read           = seq_read,
7960         .llseek         = seq_lseek,
7961         .release        = tracing_err_log_release,
7962 };
7963
7964 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7965 {
7966         struct trace_array *tr = inode->i_private;
7967         struct ftrace_buffer_info *info;
7968         int ret;
7969
7970         ret = tracing_check_open_get_tr(tr);
7971         if (ret)
7972                 return ret;
7973
7974         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7975         if (!info) {
7976                 trace_array_put(tr);
7977                 return -ENOMEM;
7978         }
7979
7980         mutex_lock(&trace_types_lock);
7981
7982         info->iter.tr           = tr;
7983         info->iter.cpu_file     = tracing_get_cpu(inode);
7984         info->iter.trace        = tr->current_trace;
7985         info->iter.array_buffer = &tr->array_buffer;
7986         info->spare             = NULL;
7987         /* Force reading ring buffer for first read */
7988         info->read              = (unsigned int)-1;
7989
7990         filp->private_data = info;
7991
7992         tr->trace_ref++;
7993
7994         mutex_unlock(&trace_types_lock);
7995
7996         ret = nonseekable_open(inode, filp);
7997         if (ret < 0)
7998                 trace_array_put(tr);
7999
8000         return ret;
8001 }
8002
8003 static __poll_t
8004 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8005 {
8006         struct ftrace_buffer_info *info = filp->private_data;
8007         struct trace_iterator *iter = &info->iter;
8008
8009         return trace_poll(iter, filp, poll_table);
8010 }
8011
8012 static ssize_t
8013 tracing_buffers_read(struct file *filp, char __user *ubuf,
8014                      size_t count, loff_t *ppos)
8015 {
8016         struct ftrace_buffer_info *info = filp->private_data;
8017         struct trace_iterator *iter = &info->iter;
8018         ssize_t ret = 0;
8019         ssize_t size;
8020
8021         if (!count)
8022                 return 0;
8023
8024 #ifdef CONFIG_TRACER_MAX_TRACE
8025         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8026                 return -EBUSY;
8027 #endif
8028
8029         if (!info->spare) {
8030                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8031                                                           iter->cpu_file);
8032                 if (IS_ERR(info->spare)) {
8033                         ret = PTR_ERR(info->spare);
8034                         info->spare = NULL;
8035                 } else {
8036                         info->spare_cpu = iter->cpu_file;
8037                 }
8038         }
8039         if (!info->spare)
8040                 return ret;
8041
8042         /* Do we have previous read data to read? */
8043         if (info->read < PAGE_SIZE)
8044                 goto read;
8045
8046  again:
8047         trace_access_lock(iter->cpu_file);
8048         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8049                                     &info->spare,
8050                                     count,
8051                                     iter->cpu_file, 0);
8052         trace_access_unlock(iter->cpu_file);
8053
8054         if (ret < 0) {
8055                 if (trace_empty(iter)) {
8056                         if ((filp->f_flags & O_NONBLOCK))
8057                                 return -EAGAIN;
8058
8059                         ret = wait_on_pipe(iter, 0);
8060                         if (ret)
8061                                 return ret;
8062
8063                         goto again;
8064                 }
8065                 return 0;
8066         }
8067
8068         info->read = 0;
8069  read:
8070         size = PAGE_SIZE - info->read;
8071         if (size > count)
8072                 size = count;
8073
8074         ret = copy_to_user(ubuf, info->spare + info->read, size);
8075         if (ret == size)
8076                 return -EFAULT;
8077
8078         size -= ret;
8079
8080         *ppos += size;
8081         info->read += size;
8082
8083         return size;
8084 }
8085
8086 static int tracing_buffers_release(struct inode *inode, struct file *file)
8087 {
8088         struct ftrace_buffer_info *info = file->private_data;
8089         struct trace_iterator *iter = &info->iter;
8090
8091         mutex_lock(&trace_types_lock);
8092
8093         iter->tr->trace_ref--;
8094
8095         __trace_array_put(iter->tr);
8096
8097         if (info->spare)
8098                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8099                                            info->spare_cpu, info->spare);
8100         kvfree(info);
8101
8102         mutex_unlock(&trace_types_lock);
8103
8104         return 0;
8105 }
8106
8107 struct buffer_ref {
8108         struct trace_buffer     *buffer;
8109         void                    *page;
8110         int                     cpu;
8111         refcount_t              refcount;
8112 };
8113
8114 static void buffer_ref_release(struct buffer_ref *ref)
8115 {
8116         if (!refcount_dec_and_test(&ref->refcount))
8117                 return;
8118         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8119         kfree(ref);
8120 }
8121
8122 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8123                                     struct pipe_buffer *buf)
8124 {
8125         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8126
8127         buffer_ref_release(ref);
8128         buf->private = 0;
8129 }
8130
8131 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8132                                 struct pipe_buffer *buf)
8133 {
8134         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8135
8136         if (refcount_read(&ref->refcount) > INT_MAX/2)
8137                 return false;
8138
8139         refcount_inc(&ref->refcount);
8140         return true;
8141 }
8142
8143 /* Pipe buffer operations for a buffer. */
8144 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8145         .release                = buffer_pipe_buf_release,
8146         .get                    = buffer_pipe_buf_get,
8147 };
8148
8149 /*
8150  * Callback from splice_to_pipe(), if we need to release some pages
8151  * at the end of the spd in case we error'ed out in filling the pipe.
8152  */
8153 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8154 {
8155         struct buffer_ref *ref =
8156                 (struct buffer_ref *)spd->partial[i].private;
8157
8158         buffer_ref_release(ref);
8159         spd->partial[i].private = 0;
8160 }
8161
8162 static ssize_t
8163 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8164                             struct pipe_inode_info *pipe, size_t len,
8165                             unsigned int flags)
8166 {
8167         struct ftrace_buffer_info *info = file->private_data;
8168         struct trace_iterator *iter = &info->iter;
8169         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8170         struct page *pages_def[PIPE_DEF_BUFFERS];
8171         struct splice_pipe_desc spd = {
8172                 .pages          = pages_def,
8173                 .partial        = partial_def,
8174                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8175                 .ops            = &buffer_pipe_buf_ops,
8176                 .spd_release    = buffer_spd_release,
8177         };
8178         struct buffer_ref *ref;
8179         int entries, i;
8180         ssize_t ret = 0;
8181
8182 #ifdef CONFIG_TRACER_MAX_TRACE
8183         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8184                 return -EBUSY;
8185 #endif
8186
8187         if (*ppos & (PAGE_SIZE - 1))
8188                 return -EINVAL;
8189
8190         if (len & (PAGE_SIZE - 1)) {
8191                 if (len < PAGE_SIZE)
8192                         return -EINVAL;
8193                 len &= PAGE_MASK;
8194         }
8195
8196         if (splice_grow_spd(pipe, &spd))
8197                 return -ENOMEM;
8198
8199  again:
8200         trace_access_lock(iter->cpu_file);
8201         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8202
8203         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8204                 struct page *page;
8205                 int r;
8206
8207                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8208                 if (!ref) {
8209                         ret = -ENOMEM;
8210                         break;
8211                 }
8212
8213                 refcount_set(&ref->refcount, 1);
8214                 ref->buffer = iter->array_buffer->buffer;
8215                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8216                 if (IS_ERR(ref->page)) {
8217                         ret = PTR_ERR(ref->page);
8218                         ref->page = NULL;
8219                         kfree(ref);
8220                         break;
8221                 }
8222                 ref->cpu = iter->cpu_file;
8223
8224                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8225                                           len, iter->cpu_file, 1);
8226                 if (r < 0) {
8227                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8228                                                    ref->page);
8229                         kfree(ref);
8230                         break;
8231                 }
8232
8233                 page = virt_to_page(ref->page);
8234
8235                 spd.pages[i] = page;
8236                 spd.partial[i].len = PAGE_SIZE;
8237                 spd.partial[i].offset = 0;
8238                 spd.partial[i].private = (unsigned long)ref;
8239                 spd.nr_pages++;
8240                 *ppos += PAGE_SIZE;
8241
8242                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8243         }
8244
8245         trace_access_unlock(iter->cpu_file);
8246         spd.nr_pages = i;
8247
8248         /* did we read anything? */
8249         if (!spd.nr_pages) {
8250                 if (ret)
8251                         goto out;
8252
8253                 ret = -EAGAIN;
8254                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8255                         goto out;
8256
8257                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8258                 if (ret)
8259                         goto out;
8260
8261                 goto again;
8262         }
8263
8264         ret = splice_to_pipe(pipe, &spd);
8265 out:
8266         splice_shrink_spd(&spd);
8267
8268         return ret;
8269 }
8270
8271 static const struct file_operations tracing_buffers_fops = {
8272         .open           = tracing_buffers_open,
8273         .read           = tracing_buffers_read,
8274         .poll           = tracing_buffers_poll,
8275         .release        = tracing_buffers_release,
8276         .splice_read    = tracing_buffers_splice_read,
8277         .llseek         = no_llseek,
8278 };
8279
8280 static ssize_t
8281 tracing_stats_read(struct file *filp, char __user *ubuf,
8282                    size_t count, loff_t *ppos)
8283 {
8284         struct inode *inode = file_inode(filp);
8285         struct trace_array *tr = inode->i_private;
8286         struct array_buffer *trace_buf = &tr->array_buffer;
8287         int cpu = tracing_get_cpu(inode);
8288         struct trace_seq *s;
8289         unsigned long cnt;
8290         unsigned long long t;
8291         unsigned long usec_rem;
8292
8293         s = kmalloc(sizeof(*s), GFP_KERNEL);
8294         if (!s)
8295                 return -ENOMEM;
8296
8297         trace_seq_init(s);
8298
8299         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8300         trace_seq_printf(s, "entries: %ld\n", cnt);
8301
8302         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8303         trace_seq_printf(s, "overrun: %ld\n", cnt);
8304
8305         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8306         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8307
8308         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8309         trace_seq_printf(s, "bytes: %ld\n", cnt);
8310
8311         if (trace_clocks[tr->clock_id].in_ns) {
8312                 /* local or global for trace_clock */
8313                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8314                 usec_rem = do_div(t, USEC_PER_SEC);
8315                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8316                                                                 t, usec_rem);
8317
8318                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8319                 usec_rem = do_div(t, USEC_PER_SEC);
8320                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8321         } else {
8322                 /* counter or tsc mode for trace_clock */
8323                 trace_seq_printf(s, "oldest event ts: %llu\n",
8324                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8325
8326                 trace_seq_printf(s, "now ts: %llu\n",
8327                                 ring_buffer_time_stamp(trace_buf->buffer));
8328         }
8329
8330         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8331         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8332
8333         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8334         trace_seq_printf(s, "read events: %ld\n", cnt);
8335
8336         count = simple_read_from_buffer(ubuf, count, ppos,
8337                                         s->buffer, trace_seq_used(s));
8338
8339         kfree(s);
8340
8341         return count;
8342 }
8343
8344 static const struct file_operations tracing_stats_fops = {
8345         .open           = tracing_open_generic_tr,
8346         .read           = tracing_stats_read,
8347         .llseek         = generic_file_llseek,
8348         .release        = tracing_release_generic_tr,
8349 };
8350
8351 #ifdef CONFIG_DYNAMIC_FTRACE
8352
8353 static ssize_t
8354 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8355                   size_t cnt, loff_t *ppos)
8356 {
8357         ssize_t ret;
8358         char *buf;
8359         int r;
8360
8361         /* 256 should be plenty to hold the amount needed */
8362         buf = kmalloc(256, GFP_KERNEL);
8363         if (!buf)
8364                 return -ENOMEM;
8365
8366         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8367                       ftrace_update_tot_cnt,
8368                       ftrace_number_of_pages,
8369                       ftrace_number_of_groups);
8370
8371         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372         kfree(buf);
8373         return ret;
8374 }
8375
8376 static const struct file_operations tracing_dyn_info_fops = {
8377         .open           = tracing_open_generic,
8378         .read           = tracing_read_dyn_info,
8379         .llseek         = generic_file_llseek,
8380 };
8381 #endif /* CONFIG_DYNAMIC_FTRACE */
8382
8383 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8384 static void
8385 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8386                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8387                 void *data)
8388 {
8389         tracing_snapshot_instance(tr);
8390 }
8391
8392 static void
8393 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8394                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8395                       void *data)
8396 {
8397         struct ftrace_func_mapper *mapper = data;
8398         long *count = NULL;
8399
8400         if (mapper)
8401                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8402
8403         if (count) {
8404
8405                 if (*count <= 0)
8406                         return;
8407
8408                 (*count)--;
8409         }
8410
8411         tracing_snapshot_instance(tr);
8412 }
8413
8414 static int
8415 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8416                       struct ftrace_probe_ops *ops, void *data)
8417 {
8418         struct ftrace_func_mapper *mapper = data;
8419         long *count = NULL;
8420
8421         seq_printf(m, "%ps:", (void *)ip);
8422
8423         seq_puts(m, "snapshot");
8424
8425         if (mapper)
8426                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8427
8428         if (count)
8429                 seq_printf(m, ":count=%ld\n", *count);
8430         else
8431                 seq_puts(m, ":unlimited\n");
8432
8433         return 0;
8434 }
8435
8436 static int
8437 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8438                      unsigned long ip, void *init_data, void **data)
8439 {
8440         struct ftrace_func_mapper *mapper = *data;
8441
8442         if (!mapper) {
8443                 mapper = allocate_ftrace_func_mapper();
8444                 if (!mapper)
8445                         return -ENOMEM;
8446                 *data = mapper;
8447         }
8448
8449         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8450 }
8451
8452 static void
8453 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8454                      unsigned long ip, void *data)
8455 {
8456         struct ftrace_func_mapper *mapper = data;
8457
8458         if (!ip) {
8459                 if (!mapper)
8460                         return;
8461                 free_ftrace_func_mapper(mapper, NULL);
8462                 return;
8463         }
8464
8465         ftrace_func_mapper_remove_ip(mapper, ip);
8466 }
8467
8468 static struct ftrace_probe_ops snapshot_probe_ops = {
8469         .func                   = ftrace_snapshot,
8470         .print                  = ftrace_snapshot_print,
8471 };
8472
8473 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8474         .func                   = ftrace_count_snapshot,
8475         .print                  = ftrace_snapshot_print,
8476         .init                   = ftrace_snapshot_init,
8477         .free                   = ftrace_snapshot_free,
8478 };
8479
8480 static int
8481 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8482                                char *glob, char *cmd, char *param, int enable)
8483 {
8484         struct ftrace_probe_ops *ops;
8485         void *count = (void *)-1;
8486         char *number;
8487         int ret;
8488
8489         if (!tr)
8490                 return -ENODEV;
8491
8492         /* hash funcs only work with set_ftrace_filter */
8493         if (!enable)
8494                 return -EINVAL;
8495
8496         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8497
8498         if (glob[0] == '!')
8499                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8500
8501         if (!param)
8502                 goto out_reg;
8503
8504         number = strsep(&param, ":");
8505
8506         if (!strlen(number))
8507                 goto out_reg;
8508
8509         /*
8510          * We use the callback data field (which is a pointer)
8511          * as our counter.
8512          */
8513         ret = kstrtoul(number, 0, (unsigned long *)&count);
8514         if (ret)
8515                 return ret;
8516
8517  out_reg:
8518         ret = tracing_alloc_snapshot_instance(tr);
8519         if (ret < 0)
8520                 goto out;
8521
8522         ret = register_ftrace_function_probe(glob, tr, ops, count);
8523
8524  out:
8525         return ret < 0 ? ret : 0;
8526 }
8527
8528 static struct ftrace_func_command ftrace_snapshot_cmd = {
8529         .name                   = "snapshot",
8530         .func                   = ftrace_trace_snapshot_callback,
8531 };
8532
8533 static __init int register_snapshot_cmd(void)
8534 {
8535         return register_ftrace_command(&ftrace_snapshot_cmd);
8536 }
8537 #else
8538 static inline __init int register_snapshot_cmd(void) { return 0; }
8539 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8540
8541 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8542 {
8543         if (WARN_ON(!tr->dir))
8544                 return ERR_PTR(-ENODEV);
8545
8546         /* Top directory uses NULL as the parent */
8547         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8548                 return NULL;
8549
8550         /* All sub buffers have a descriptor */
8551         return tr->dir;
8552 }
8553
8554 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8555 {
8556         struct dentry *d_tracer;
8557
8558         if (tr->percpu_dir)
8559                 return tr->percpu_dir;
8560
8561         d_tracer = tracing_get_dentry(tr);
8562         if (IS_ERR(d_tracer))
8563                 return NULL;
8564
8565         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8566
8567         MEM_FAIL(!tr->percpu_dir,
8568                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8569
8570         return tr->percpu_dir;
8571 }
8572
8573 static struct dentry *
8574 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8575                       void *data, long cpu, const struct file_operations *fops)
8576 {
8577         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8578
8579         if (ret) /* See tracing_get_cpu() */
8580                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8581         return ret;
8582 }
8583
8584 static void
8585 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8586 {
8587         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8588         struct dentry *d_cpu;
8589         char cpu_dir[30]; /* 30 characters should be more than enough */
8590
8591         if (!d_percpu)
8592                 return;
8593
8594         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8595         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8596         if (!d_cpu) {
8597                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8598                 return;
8599         }
8600
8601         /* per cpu trace_pipe */
8602         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8603                                 tr, cpu, &tracing_pipe_fops);
8604
8605         /* per cpu trace */
8606         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8607                                 tr, cpu, &tracing_fops);
8608
8609         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8610                                 tr, cpu, &tracing_buffers_fops);
8611
8612         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8613                                 tr, cpu, &tracing_stats_fops);
8614
8615         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8616                                 tr, cpu, &tracing_entries_fops);
8617
8618 #ifdef CONFIG_TRACER_SNAPSHOT
8619         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8620                                 tr, cpu, &snapshot_fops);
8621
8622         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8623                                 tr, cpu, &snapshot_raw_fops);
8624 #endif
8625 }
8626
8627 #ifdef CONFIG_FTRACE_SELFTEST
8628 /* Let selftest have access to static functions in this file */
8629 #include "trace_selftest.c"
8630 #endif
8631
8632 static ssize_t
8633 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8634                         loff_t *ppos)
8635 {
8636         struct trace_option_dentry *topt = filp->private_data;
8637         char *buf;
8638
8639         if (topt->flags->val & topt->opt->bit)
8640                 buf = "1\n";
8641         else
8642                 buf = "0\n";
8643
8644         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8645 }
8646
8647 static ssize_t
8648 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8649                          loff_t *ppos)
8650 {
8651         struct trace_option_dentry *topt = filp->private_data;
8652         unsigned long val;
8653         int ret;
8654
8655         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8656         if (ret)
8657                 return ret;
8658
8659         if (val != 0 && val != 1)
8660                 return -EINVAL;
8661
8662         if (!!(topt->flags->val & topt->opt->bit) != val) {
8663                 mutex_lock(&trace_types_lock);
8664                 ret = __set_tracer_option(topt->tr, topt->flags,
8665                                           topt->opt, !val);
8666                 mutex_unlock(&trace_types_lock);
8667                 if (ret)
8668                         return ret;
8669         }
8670
8671         *ppos += cnt;
8672
8673         return cnt;
8674 }
8675
8676
8677 static const struct file_operations trace_options_fops = {
8678         .open = tracing_open_generic,
8679         .read = trace_options_read,
8680         .write = trace_options_write,
8681         .llseek = generic_file_llseek,
8682 };
8683
8684 /*
8685  * In order to pass in both the trace_array descriptor as well as the index
8686  * to the flag that the trace option file represents, the trace_array
8687  * has a character array of trace_flags_index[], which holds the index
8688  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8689  * The address of this character array is passed to the flag option file
8690  * read/write callbacks.
8691  *
8692  * In order to extract both the index and the trace_array descriptor,
8693  * get_tr_index() uses the following algorithm.
8694  *
8695  *   idx = *ptr;
8696  *
8697  * As the pointer itself contains the address of the index (remember
8698  * index[1] == 1).
8699  *
8700  * Then to get the trace_array descriptor, by subtracting that index
8701  * from the ptr, we get to the start of the index itself.
8702  *
8703  *   ptr - idx == &index[0]
8704  *
8705  * Then a simple container_of() from that pointer gets us to the
8706  * trace_array descriptor.
8707  */
8708 static void get_tr_index(void *data, struct trace_array **ptr,
8709                          unsigned int *pindex)
8710 {
8711         *pindex = *(unsigned char *)data;
8712
8713         *ptr = container_of(data - *pindex, struct trace_array,
8714                             trace_flags_index);
8715 }
8716
8717 static ssize_t
8718 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8719                         loff_t *ppos)
8720 {
8721         void *tr_index = filp->private_data;
8722         struct trace_array *tr;
8723         unsigned int index;
8724         char *buf;
8725
8726         get_tr_index(tr_index, &tr, &index);
8727
8728         if (tr->trace_flags & (1 << index))
8729                 buf = "1\n";
8730         else
8731                 buf = "0\n";
8732
8733         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8734 }
8735
8736 static ssize_t
8737 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8738                          loff_t *ppos)
8739 {
8740         void *tr_index = filp->private_data;
8741         struct trace_array *tr;
8742         unsigned int index;
8743         unsigned long val;
8744         int ret;
8745
8746         get_tr_index(tr_index, &tr, &index);
8747
8748         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8749         if (ret)
8750                 return ret;
8751
8752         if (val != 0 && val != 1)
8753                 return -EINVAL;
8754
8755         mutex_lock(&event_mutex);
8756         mutex_lock(&trace_types_lock);
8757         ret = set_tracer_flag(tr, 1 << index, val);
8758         mutex_unlock(&trace_types_lock);
8759         mutex_unlock(&event_mutex);
8760
8761         if (ret < 0)
8762                 return ret;
8763
8764         *ppos += cnt;
8765
8766         return cnt;
8767 }
8768
8769 static const struct file_operations trace_options_core_fops = {
8770         .open = tracing_open_generic,
8771         .read = trace_options_core_read,
8772         .write = trace_options_core_write,
8773         .llseek = generic_file_llseek,
8774 };
8775
8776 struct dentry *trace_create_file(const char *name,
8777                                  umode_t mode,
8778                                  struct dentry *parent,
8779                                  void *data,
8780                                  const struct file_operations *fops)
8781 {
8782         struct dentry *ret;
8783
8784         ret = tracefs_create_file(name, mode, parent, data, fops);
8785         if (!ret)
8786                 pr_warn("Could not create tracefs '%s' entry\n", name);
8787
8788         return ret;
8789 }
8790
8791
8792 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8793 {
8794         struct dentry *d_tracer;
8795
8796         if (tr->options)
8797                 return tr->options;
8798
8799         d_tracer = tracing_get_dentry(tr);
8800         if (IS_ERR(d_tracer))
8801                 return NULL;
8802
8803         tr->options = tracefs_create_dir("options", d_tracer);
8804         if (!tr->options) {
8805                 pr_warn("Could not create tracefs directory 'options'\n");
8806                 return NULL;
8807         }
8808
8809         return tr->options;
8810 }
8811
8812 static void
8813 create_trace_option_file(struct trace_array *tr,
8814                          struct trace_option_dentry *topt,
8815                          struct tracer_flags *flags,
8816                          struct tracer_opt *opt)
8817 {
8818         struct dentry *t_options;
8819
8820         t_options = trace_options_init_dentry(tr);
8821         if (!t_options)
8822                 return;
8823
8824         topt->flags = flags;
8825         topt->opt = opt;
8826         topt->tr = tr;
8827
8828         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8829                                         t_options, topt, &trace_options_fops);
8830
8831 }
8832
8833 static void
8834 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8835 {
8836         struct trace_option_dentry *topts;
8837         struct trace_options *tr_topts;
8838         struct tracer_flags *flags;
8839         struct tracer_opt *opts;
8840         int cnt;
8841         int i;
8842
8843         if (!tracer)
8844                 return;
8845
8846         flags = tracer->flags;
8847
8848         if (!flags || !flags->opts)
8849                 return;
8850
8851         /*
8852          * If this is an instance, only create flags for tracers
8853          * the instance may have.
8854          */
8855         if (!trace_ok_for_array(tracer, tr))
8856                 return;
8857
8858         for (i = 0; i < tr->nr_topts; i++) {
8859                 /* Make sure there's no duplicate flags. */
8860                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8861                         return;
8862         }
8863
8864         opts = flags->opts;
8865
8866         for (cnt = 0; opts[cnt].name; cnt++)
8867                 ;
8868
8869         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8870         if (!topts)
8871                 return;
8872
8873         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8874                             GFP_KERNEL);
8875         if (!tr_topts) {
8876                 kfree(topts);
8877                 return;
8878         }
8879
8880         tr->topts = tr_topts;
8881         tr->topts[tr->nr_topts].tracer = tracer;
8882         tr->topts[tr->nr_topts].topts = topts;
8883         tr->nr_topts++;
8884
8885         for (cnt = 0; opts[cnt].name; cnt++) {
8886                 create_trace_option_file(tr, &topts[cnt], flags,
8887                                          &opts[cnt]);
8888                 MEM_FAIL(topts[cnt].entry == NULL,
8889                           "Failed to create trace option: %s",
8890                           opts[cnt].name);
8891         }
8892 }
8893
8894 static struct dentry *
8895 create_trace_option_core_file(struct trace_array *tr,
8896                               const char *option, long index)
8897 {
8898         struct dentry *t_options;
8899
8900         t_options = trace_options_init_dentry(tr);
8901         if (!t_options)
8902                 return NULL;
8903
8904         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8905                                  (void *)&tr->trace_flags_index[index],
8906                                  &trace_options_core_fops);
8907 }
8908
8909 static void create_trace_options_dir(struct trace_array *tr)
8910 {
8911         struct dentry *t_options;
8912         bool top_level = tr == &global_trace;
8913         int i;
8914
8915         t_options = trace_options_init_dentry(tr);
8916         if (!t_options)
8917                 return;
8918
8919         for (i = 0; trace_options[i]; i++) {
8920                 if (top_level ||
8921                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8922                         create_trace_option_core_file(tr, trace_options[i], i);
8923         }
8924 }
8925
8926 static ssize_t
8927 rb_simple_read(struct file *filp, char __user *ubuf,
8928                size_t cnt, loff_t *ppos)
8929 {
8930         struct trace_array *tr = filp->private_data;
8931         char buf[64];
8932         int r;
8933
8934         r = tracer_tracing_is_on(tr);
8935         r = sprintf(buf, "%d\n", r);
8936
8937         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8938 }
8939
8940 static ssize_t
8941 rb_simple_write(struct file *filp, const char __user *ubuf,
8942                 size_t cnt, loff_t *ppos)
8943 {
8944         struct trace_array *tr = filp->private_data;
8945         struct trace_buffer *buffer = tr->array_buffer.buffer;
8946         unsigned long val;
8947         int ret;
8948
8949         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8950         if (ret)
8951                 return ret;
8952
8953         if (buffer) {
8954                 mutex_lock(&trace_types_lock);
8955                 if (!!val == tracer_tracing_is_on(tr)) {
8956                         val = 0; /* do nothing */
8957                 } else if (val) {
8958                         tracer_tracing_on(tr);
8959                         if (tr->current_trace->start)
8960                                 tr->current_trace->start(tr);
8961                 } else {
8962                         tracer_tracing_off(tr);
8963                         if (tr->current_trace->stop)
8964                                 tr->current_trace->stop(tr);
8965                 }
8966                 mutex_unlock(&trace_types_lock);
8967         }
8968
8969         (*ppos)++;
8970
8971         return cnt;
8972 }
8973
8974 static const struct file_operations rb_simple_fops = {
8975         .open           = tracing_open_generic_tr,
8976         .read           = rb_simple_read,
8977         .write          = rb_simple_write,
8978         .release        = tracing_release_generic_tr,
8979         .llseek         = default_llseek,
8980 };
8981
8982 static ssize_t
8983 buffer_percent_read(struct file *filp, char __user *ubuf,
8984                     size_t cnt, loff_t *ppos)
8985 {
8986         struct trace_array *tr = filp->private_data;
8987         char buf[64];
8988         int r;
8989
8990         r = tr->buffer_percent;
8991         r = sprintf(buf, "%d\n", r);
8992
8993         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8994 }
8995
8996 static ssize_t
8997 buffer_percent_write(struct file *filp, const char __user *ubuf,
8998                      size_t cnt, loff_t *ppos)
8999 {
9000         struct trace_array *tr = filp->private_data;
9001         unsigned long val;
9002         int ret;
9003
9004         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9005         if (ret)
9006                 return ret;
9007
9008         if (val > 100)
9009                 return -EINVAL;
9010
9011         if (!val)
9012                 val = 1;
9013
9014         tr->buffer_percent = val;
9015
9016         (*ppos)++;
9017
9018         return cnt;
9019 }
9020
9021 static const struct file_operations buffer_percent_fops = {
9022         .open           = tracing_open_generic_tr,
9023         .read           = buffer_percent_read,
9024         .write          = buffer_percent_write,
9025         .release        = tracing_release_generic_tr,
9026         .llseek         = default_llseek,
9027 };
9028
9029 static struct dentry *trace_instance_dir;
9030
9031 static void
9032 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9033
9034 static int
9035 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9036 {
9037         enum ring_buffer_flags rb_flags;
9038
9039         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9040
9041         buf->tr = tr;
9042
9043         buf->buffer = ring_buffer_alloc(size, rb_flags);
9044         if (!buf->buffer)
9045                 return -ENOMEM;
9046
9047         buf->data = alloc_percpu(struct trace_array_cpu);
9048         if (!buf->data) {
9049                 ring_buffer_free(buf->buffer);
9050                 buf->buffer = NULL;
9051                 return -ENOMEM;
9052         }
9053
9054         /* Allocate the first page for all buffers */
9055         set_buffer_entries(&tr->array_buffer,
9056                            ring_buffer_size(tr->array_buffer.buffer, 0));
9057
9058         return 0;
9059 }
9060
9061 static int allocate_trace_buffers(struct trace_array *tr, int size)
9062 {
9063         int ret;
9064
9065         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9066         if (ret)
9067                 return ret;
9068
9069 #ifdef CONFIG_TRACER_MAX_TRACE
9070         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9071                                     allocate_snapshot ? size : 1);
9072         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9073                 ring_buffer_free(tr->array_buffer.buffer);
9074                 tr->array_buffer.buffer = NULL;
9075                 free_percpu(tr->array_buffer.data);
9076                 tr->array_buffer.data = NULL;
9077                 return -ENOMEM;
9078         }
9079         tr->allocated_snapshot = allocate_snapshot;
9080
9081         /*
9082          * Only the top level trace array gets its snapshot allocated
9083          * from the kernel command line.
9084          */
9085         allocate_snapshot = false;
9086 #endif
9087
9088         return 0;
9089 }
9090
9091 static void free_trace_buffer(struct array_buffer *buf)
9092 {
9093         if (buf->buffer) {
9094                 ring_buffer_free(buf->buffer);
9095                 buf->buffer = NULL;
9096                 free_percpu(buf->data);
9097                 buf->data = NULL;
9098         }
9099 }
9100
9101 static void free_trace_buffers(struct trace_array *tr)
9102 {
9103         if (!tr)
9104                 return;
9105
9106         free_trace_buffer(&tr->array_buffer);
9107
9108 #ifdef CONFIG_TRACER_MAX_TRACE
9109         free_trace_buffer(&tr->max_buffer);
9110 #endif
9111 }
9112
9113 static void init_trace_flags_index(struct trace_array *tr)
9114 {
9115         int i;
9116
9117         /* Used by the trace options files */
9118         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9119                 tr->trace_flags_index[i] = i;
9120 }
9121
9122 static void __update_tracer_options(struct trace_array *tr)
9123 {
9124         struct tracer *t;
9125
9126         for (t = trace_types; t; t = t->next)
9127                 add_tracer_options(tr, t);
9128 }
9129
9130 static void update_tracer_options(struct trace_array *tr)
9131 {
9132         mutex_lock(&trace_types_lock);
9133         __update_tracer_options(tr);
9134         mutex_unlock(&trace_types_lock);
9135 }
9136
9137 /* Must have trace_types_lock held */
9138 struct trace_array *trace_array_find(const char *instance)
9139 {
9140         struct trace_array *tr, *found = NULL;
9141
9142         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9143                 if (tr->name && strcmp(tr->name, instance) == 0) {
9144                         found = tr;
9145                         break;
9146                 }
9147         }
9148
9149         return found;
9150 }
9151
9152 struct trace_array *trace_array_find_get(const char *instance)
9153 {
9154         struct trace_array *tr;
9155
9156         mutex_lock(&trace_types_lock);
9157         tr = trace_array_find(instance);
9158         if (tr)
9159                 tr->ref++;
9160         mutex_unlock(&trace_types_lock);
9161
9162         return tr;
9163 }
9164
9165 static int trace_array_create_dir(struct trace_array *tr)
9166 {
9167         int ret;
9168
9169         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9170         if (!tr->dir)
9171                 return -EINVAL;
9172
9173         ret = event_trace_add_tracer(tr->dir, tr);
9174         if (ret) {
9175                 tracefs_remove(tr->dir);
9176                 return ret;
9177         }
9178
9179         init_tracer_tracefs(tr, tr->dir);
9180         __update_tracer_options(tr);
9181
9182         return ret;
9183 }
9184
9185 static struct trace_array *trace_array_create(const char *name)
9186 {
9187         struct trace_array *tr;
9188         int ret;
9189
9190         ret = -ENOMEM;
9191         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9192         if (!tr)
9193                 return ERR_PTR(ret);
9194
9195         tr->name = kstrdup(name, GFP_KERNEL);
9196         if (!tr->name)
9197                 goto out_free_tr;
9198
9199         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9200                 goto out_free_tr;
9201
9202         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9203
9204         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9205
9206         raw_spin_lock_init(&tr->start_lock);
9207
9208         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9209
9210         tr->current_trace = &nop_trace;
9211
9212         INIT_LIST_HEAD(&tr->systems);
9213         INIT_LIST_HEAD(&tr->events);
9214         INIT_LIST_HEAD(&tr->hist_vars);
9215         INIT_LIST_HEAD(&tr->err_log);
9216
9217         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9218                 goto out_free_tr;
9219
9220         if (ftrace_allocate_ftrace_ops(tr) < 0)
9221                 goto out_free_tr;
9222
9223         ftrace_init_trace_array(tr);
9224
9225         init_trace_flags_index(tr);
9226
9227         if (trace_instance_dir) {
9228                 ret = trace_array_create_dir(tr);
9229                 if (ret)
9230                         goto out_free_tr;
9231         } else
9232                 __trace_early_add_events(tr);
9233
9234         list_add(&tr->list, &ftrace_trace_arrays);
9235
9236         tr->ref++;
9237
9238         return tr;
9239
9240  out_free_tr:
9241         ftrace_free_ftrace_ops(tr);
9242         free_trace_buffers(tr);
9243         free_cpumask_var(tr->tracing_cpumask);
9244         kfree(tr->name);
9245         kfree(tr);
9246
9247         return ERR_PTR(ret);
9248 }
9249
9250 static int instance_mkdir(const char *name)
9251 {
9252         struct trace_array *tr;
9253         int ret;
9254
9255         mutex_lock(&event_mutex);
9256         mutex_lock(&trace_types_lock);
9257
9258         ret = -EEXIST;
9259         if (trace_array_find(name))
9260                 goto out_unlock;
9261
9262         tr = trace_array_create(name);
9263
9264         ret = PTR_ERR_OR_ZERO(tr);
9265
9266 out_unlock:
9267         mutex_unlock(&trace_types_lock);
9268         mutex_unlock(&event_mutex);
9269         return ret;
9270 }
9271
9272 /**
9273  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9274  * @name: The name of the trace array to be looked up/created.
9275  *
9276  * Returns pointer to trace array with given name.
9277  * NULL, if it cannot be created.
9278  *
9279  * NOTE: This function increments the reference counter associated with the
9280  * trace array returned. This makes sure it cannot be freed while in use.
9281  * Use trace_array_put() once the trace array is no longer needed.
9282  * If the trace_array is to be freed, trace_array_destroy() needs to
9283  * be called after the trace_array_put(), or simply let user space delete
9284  * it from the tracefs instances directory. But until the
9285  * trace_array_put() is called, user space can not delete it.
9286  *
9287  */
9288 struct trace_array *trace_array_get_by_name(const char *name)
9289 {
9290         struct trace_array *tr;
9291
9292         mutex_lock(&event_mutex);
9293         mutex_lock(&trace_types_lock);
9294
9295         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9296                 if (tr->name && strcmp(tr->name, name) == 0)
9297                         goto out_unlock;
9298         }
9299
9300         tr = trace_array_create(name);
9301
9302         if (IS_ERR(tr))
9303                 tr = NULL;
9304 out_unlock:
9305         if (tr)
9306                 tr->ref++;
9307
9308         mutex_unlock(&trace_types_lock);
9309         mutex_unlock(&event_mutex);
9310         return tr;
9311 }
9312 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9313
9314 static int __remove_instance(struct trace_array *tr)
9315 {
9316         int i;
9317
9318         /* Reference counter for a newly created trace array = 1. */
9319         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9320                 return -EBUSY;
9321
9322         list_del(&tr->list);
9323
9324         /* Disable all the flags that were enabled coming in */
9325         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9326                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9327                         set_tracer_flag(tr, 1 << i, 0);
9328         }
9329
9330         tracing_set_nop(tr);
9331         clear_ftrace_function_probes(tr);
9332         event_trace_del_tracer(tr);
9333         ftrace_clear_pids(tr);
9334         ftrace_destroy_function_files(tr);
9335         tracefs_remove(tr->dir);
9336         free_percpu(tr->last_func_repeats);
9337         free_trace_buffers(tr);
9338
9339         for (i = 0; i < tr->nr_topts; i++) {
9340                 kfree(tr->topts[i].topts);
9341         }
9342         kfree(tr->topts);
9343
9344         free_cpumask_var(tr->tracing_cpumask);
9345         kfree(tr->name);
9346         kfree(tr);
9347
9348         return 0;
9349 }
9350
9351 int trace_array_destroy(struct trace_array *this_tr)
9352 {
9353         struct trace_array *tr;
9354         int ret;
9355
9356         if (!this_tr)
9357                 return -EINVAL;
9358
9359         mutex_lock(&event_mutex);
9360         mutex_lock(&trace_types_lock);
9361
9362         ret = -ENODEV;
9363
9364         /* Making sure trace array exists before destroying it. */
9365         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9366                 if (tr == this_tr) {
9367                         ret = __remove_instance(tr);
9368                         break;
9369                 }
9370         }
9371
9372         mutex_unlock(&trace_types_lock);
9373         mutex_unlock(&event_mutex);
9374
9375         return ret;
9376 }
9377 EXPORT_SYMBOL_GPL(trace_array_destroy);
9378
9379 static int instance_rmdir(const char *name)
9380 {
9381         struct trace_array *tr;
9382         int ret;
9383
9384         mutex_lock(&event_mutex);
9385         mutex_lock(&trace_types_lock);
9386
9387         ret = -ENODEV;
9388         tr = trace_array_find(name);
9389         if (tr)
9390                 ret = __remove_instance(tr);
9391
9392         mutex_unlock(&trace_types_lock);
9393         mutex_unlock(&event_mutex);
9394
9395         return ret;
9396 }
9397
9398 static __init void create_trace_instances(struct dentry *d_tracer)
9399 {
9400         struct trace_array *tr;
9401
9402         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9403                                                          instance_mkdir,
9404                                                          instance_rmdir);
9405         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9406                 return;
9407
9408         mutex_lock(&event_mutex);
9409         mutex_lock(&trace_types_lock);
9410
9411         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9412                 if (!tr->name)
9413                         continue;
9414                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9415                              "Failed to create instance directory\n"))
9416                         break;
9417         }
9418
9419         mutex_unlock(&trace_types_lock);
9420         mutex_unlock(&event_mutex);
9421 }
9422
9423 static void
9424 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9425 {
9426         struct trace_event_file *file;
9427         int cpu;
9428
9429         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9430                         tr, &show_traces_fops);
9431
9432         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9433                         tr, &set_tracer_fops);
9434
9435         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9436                           tr, &tracing_cpumask_fops);
9437
9438         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9439                           tr, &tracing_iter_fops);
9440
9441         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9442                           tr, &tracing_fops);
9443
9444         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9445                           tr, &tracing_pipe_fops);
9446
9447         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9448                           tr, &tracing_entries_fops);
9449
9450         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9451                           tr, &tracing_total_entries_fops);
9452
9453         trace_create_file("free_buffer", 0200, d_tracer,
9454                           tr, &tracing_free_buffer_fops);
9455
9456         trace_create_file("trace_marker", 0220, d_tracer,
9457                           tr, &tracing_mark_fops);
9458
9459         file = __find_event_file(tr, "ftrace", "print");
9460         if (file && file->dir)
9461                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9462                                   file, &event_trigger_fops);
9463         tr->trace_marker_file = file;
9464
9465         trace_create_file("trace_marker_raw", 0220, d_tracer,
9466                           tr, &tracing_mark_raw_fops);
9467
9468         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9469                           &trace_clock_fops);
9470
9471         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9472                           tr, &rb_simple_fops);
9473
9474         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9475                           &trace_time_stamp_mode_fops);
9476
9477         tr->buffer_percent = 50;
9478
9479         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9480                         tr, &buffer_percent_fops);
9481
9482         create_trace_options_dir(tr);
9483
9484         trace_create_maxlat_file(tr, d_tracer);
9485
9486         if (ftrace_create_function_files(tr, d_tracer))
9487                 MEM_FAIL(1, "Could not allocate function filter files");
9488
9489 #ifdef CONFIG_TRACER_SNAPSHOT
9490         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9491                           tr, &snapshot_fops);
9492 #endif
9493
9494         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9495                           tr, &tracing_err_log_fops);
9496
9497         for_each_tracing_cpu(cpu)
9498                 tracing_init_tracefs_percpu(tr, cpu);
9499
9500         ftrace_init_tracefs(tr, d_tracer);
9501 }
9502
9503 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9504 {
9505         struct vfsmount *mnt;
9506         struct file_system_type *type;
9507
9508         /*
9509          * To maintain backward compatibility for tools that mount
9510          * debugfs to get to the tracing facility, tracefs is automatically
9511          * mounted to the debugfs/tracing directory.
9512          */
9513         type = get_fs_type("tracefs");
9514         if (!type)
9515                 return NULL;
9516         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9517         put_filesystem(type);
9518         if (IS_ERR(mnt))
9519                 return NULL;
9520         mntget(mnt);
9521
9522         return mnt;
9523 }
9524
9525 /**
9526  * tracing_init_dentry - initialize top level trace array
9527  *
9528  * This is called when creating files or directories in the tracing
9529  * directory. It is called via fs_initcall() by any of the boot up code
9530  * and expects to return the dentry of the top level tracing directory.
9531  */
9532 int tracing_init_dentry(void)
9533 {
9534         struct trace_array *tr = &global_trace;
9535
9536         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9537                 pr_warn("Tracing disabled due to lockdown\n");
9538                 return -EPERM;
9539         }
9540
9541         /* The top level trace array uses  NULL as parent */
9542         if (tr->dir)
9543                 return 0;
9544
9545         if (WARN_ON(!tracefs_initialized()))
9546                 return -ENODEV;
9547
9548         /*
9549          * As there may still be users that expect the tracing
9550          * files to exist in debugfs/tracing, we must automount
9551          * the tracefs file system there, so older tools still
9552          * work with the newer kernel.
9553          */
9554         tr->dir = debugfs_create_automount("tracing", NULL,
9555                                            trace_automount, NULL);
9556
9557         return 0;
9558 }
9559
9560 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9561 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9562
9563 static struct workqueue_struct *eval_map_wq __initdata;
9564 static struct work_struct eval_map_work __initdata;
9565
9566 static void __init eval_map_work_func(struct work_struct *work)
9567 {
9568         int len;
9569
9570         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9571         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9572 }
9573
9574 static int __init trace_eval_init(void)
9575 {
9576         INIT_WORK(&eval_map_work, eval_map_work_func);
9577
9578         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9579         if (!eval_map_wq) {
9580                 pr_err("Unable to allocate eval_map_wq\n");
9581                 /* Do work here */
9582                 eval_map_work_func(&eval_map_work);
9583                 return -ENOMEM;
9584         }
9585
9586         queue_work(eval_map_wq, &eval_map_work);
9587         return 0;
9588 }
9589
9590 static int __init trace_eval_sync(void)
9591 {
9592         /* Make sure the eval map updates are finished */
9593         if (eval_map_wq)
9594                 destroy_workqueue(eval_map_wq);
9595         return 0;
9596 }
9597
9598 late_initcall_sync(trace_eval_sync);
9599
9600
9601 #ifdef CONFIG_MODULES
9602 static void trace_module_add_evals(struct module *mod)
9603 {
9604         if (!mod->num_trace_evals)
9605                 return;
9606
9607         /*
9608          * Modules with bad taint do not have events created, do
9609          * not bother with enums either.
9610          */
9611         if (trace_module_has_bad_taint(mod))
9612                 return;
9613
9614         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9615 }
9616
9617 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9618 static void trace_module_remove_evals(struct module *mod)
9619 {
9620         union trace_eval_map_item *map;
9621         union trace_eval_map_item **last = &trace_eval_maps;
9622
9623         if (!mod->num_trace_evals)
9624                 return;
9625
9626         mutex_lock(&trace_eval_mutex);
9627
9628         map = trace_eval_maps;
9629
9630         while (map) {
9631                 if (map->head.mod == mod)
9632                         break;
9633                 map = trace_eval_jmp_to_tail(map);
9634                 last = &map->tail.next;
9635                 map = map->tail.next;
9636         }
9637         if (!map)
9638                 goto out;
9639
9640         *last = trace_eval_jmp_to_tail(map)->tail.next;
9641         kfree(map);
9642  out:
9643         mutex_unlock(&trace_eval_mutex);
9644 }
9645 #else
9646 static inline void trace_module_remove_evals(struct module *mod) { }
9647 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9648
9649 static int trace_module_notify(struct notifier_block *self,
9650                                unsigned long val, void *data)
9651 {
9652         struct module *mod = data;
9653
9654         switch (val) {
9655         case MODULE_STATE_COMING:
9656                 trace_module_add_evals(mod);
9657                 break;
9658         case MODULE_STATE_GOING:
9659                 trace_module_remove_evals(mod);
9660                 break;
9661         }
9662
9663         return NOTIFY_OK;
9664 }
9665
9666 static struct notifier_block trace_module_nb = {
9667         .notifier_call = trace_module_notify,
9668         .priority = 0,
9669 };
9670 #endif /* CONFIG_MODULES */
9671
9672 static __init int tracer_init_tracefs(void)
9673 {
9674         int ret;
9675
9676         trace_access_lock_init();
9677
9678         ret = tracing_init_dentry();
9679         if (ret)
9680                 return 0;
9681
9682         event_trace_init();
9683
9684         init_tracer_tracefs(&global_trace, NULL);
9685         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9686
9687         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9688                         &global_trace, &tracing_thresh_fops);
9689
9690         trace_create_file("README", TRACE_MODE_READ, NULL,
9691                         NULL, &tracing_readme_fops);
9692
9693         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9694                         NULL, &tracing_saved_cmdlines_fops);
9695
9696         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9697                           NULL, &tracing_saved_cmdlines_size_fops);
9698
9699         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9700                         NULL, &tracing_saved_tgids_fops);
9701
9702         trace_eval_init();
9703
9704         trace_create_eval_file(NULL);
9705
9706 #ifdef CONFIG_MODULES
9707         register_module_notifier(&trace_module_nb);
9708 #endif
9709
9710 #ifdef CONFIG_DYNAMIC_FTRACE
9711         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9712                         NULL, &tracing_dyn_info_fops);
9713 #endif
9714
9715         create_trace_instances(NULL);
9716
9717         update_tracer_options(&global_trace);
9718
9719         return 0;
9720 }
9721
9722 fs_initcall(tracer_init_tracefs);
9723
9724 static int trace_panic_handler(struct notifier_block *this,
9725                                unsigned long event, void *unused)
9726 {
9727         if (ftrace_dump_on_oops)
9728                 ftrace_dump(ftrace_dump_on_oops);
9729         return NOTIFY_OK;
9730 }
9731
9732 static struct notifier_block trace_panic_notifier = {
9733         .notifier_call  = trace_panic_handler,
9734         .next           = NULL,
9735         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9736 };
9737
9738 static int trace_die_handler(struct notifier_block *self,
9739                              unsigned long val,
9740                              void *data)
9741 {
9742         switch (val) {
9743         case DIE_OOPS:
9744                 if (ftrace_dump_on_oops)
9745                         ftrace_dump(ftrace_dump_on_oops);
9746                 break;
9747         default:
9748                 break;
9749         }
9750         return NOTIFY_OK;
9751 }
9752
9753 static struct notifier_block trace_die_notifier = {
9754         .notifier_call = trace_die_handler,
9755         .priority = 200
9756 };
9757
9758 /*
9759  * printk is set to max of 1024, we really don't need it that big.
9760  * Nothing should be printing 1000 characters anyway.
9761  */
9762 #define TRACE_MAX_PRINT         1000
9763
9764 /*
9765  * Define here KERN_TRACE so that we have one place to modify
9766  * it if we decide to change what log level the ftrace dump
9767  * should be at.
9768  */
9769 #define KERN_TRACE              KERN_EMERG
9770
9771 void
9772 trace_printk_seq(struct trace_seq *s)
9773 {
9774         /* Probably should print a warning here. */
9775         if (s->seq.len >= TRACE_MAX_PRINT)
9776                 s->seq.len = TRACE_MAX_PRINT;
9777
9778         /*
9779          * More paranoid code. Although the buffer size is set to
9780          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9781          * an extra layer of protection.
9782          */
9783         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9784                 s->seq.len = s->seq.size - 1;
9785
9786         /* should be zero ended, but we are paranoid. */
9787         s->buffer[s->seq.len] = 0;
9788
9789         printk(KERN_TRACE "%s", s->buffer);
9790
9791         trace_seq_init(s);
9792 }
9793
9794 void trace_init_global_iter(struct trace_iterator *iter)
9795 {
9796         iter->tr = &global_trace;
9797         iter->trace = iter->tr->current_trace;
9798         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9799         iter->array_buffer = &global_trace.array_buffer;
9800
9801         if (iter->trace && iter->trace->open)
9802                 iter->trace->open(iter);
9803
9804         /* Annotate start of buffers if we had overruns */
9805         if (ring_buffer_overruns(iter->array_buffer->buffer))
9806                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9807
9808         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9809         if (trace_clocks[iter->tr->clock_id].in_ns)
9810                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9811 }
9812
9813 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9814 {
9815         /* use static because iter can be a bit big for the stack */
9816         static struct trace_iterator iter;
9817         static atomic_t dump_running;
9818         struct trace_array *tr = &global_trace;
9819         unsigned int old_userobj;
9820         unsigned long flags;
9821         int cnt = 0, cpu;
9822
9823         /* Only allow one dump user at a time. */
9824         if (atomic_inc_return(&dump_running) != 1) {
9825                 atomic_dec(&dump_running);
9826                 return;
9827         }
9828
9829         /*
9830          * Always turn off tracing when we dump.
9831          * We don't need to show trace output of what happens
9832          * between multiple crashes.
9833          *
9834          * If the user does a sysrq-z, then they can re-enable
9835          * tracing with echo 1 > tracing_on.
9836          */
9837         tracing_off();
9838
9839         local_irq_save(flags);
9840
9841         /* Simulate the iterator */
9842         trace_init_global_iter(&iter);
9843         /* Can not use kmalloc for iter.temp and iter.fmt */
9844         iter.temp = static_temp_buf;
9845         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9846         iter.fmt = static_fmt_buf;
9847         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9848
9849         for_each_tracing_cpu(cpu) {
9850                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9851         }
9852
9853         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9854
9855         /* don't look at user memory in panic mode */
9856         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9857
9858         switch (oops_dump_mode) {
9859         case DUMP_ALL:
9860                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9861                 break;
9862         case DUMP_ORIG:
9863                 iter.cpu_file = raw_smp_processor_id();
9864                 break;
9865         case DUMP_NONE:
9866                 goto out_enable;
9867         default:
9868                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9869                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9870         }
9871
9872         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9873
9874         /* Did function tracer already get disabled? */
9875         if (ftrace_is_dead()) {
9876                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9877                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9878         }
9879
9880         /*
9881          * We need to stop all tracing on all CPUS to read
9882          * the next buffer. This is a bit expensive, but is
9883          * not done often. We fill all what we can read,
9884          * and then release the locks again.
9885          */
9886
9887         while (!trace_empty(&iter)) {
9888
9889                 if (!cnt)
9890                         printk(KERN_TRACE "---------------------------------\n");
9891
9892                 cnt++;
9893
9894                 trace_iterator_reset(&iter);
9895                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9896
9897                 if (trace_find_next_entry_inc(&iter) != NULL) {
9898                         int ret;
9899
9900                         ret = print_trace_line(&iter);
9901                         if (ret != TRACE_TYPE_NO_CONSUME)
9902                                 trace_consume(&iter);
9903                 }
9904                 touch_nmi_watchdog();
9905
9906                 trace_printk_seq(&iter.seq);
9907         }
9908
9909         if (!cnt)
9910                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9911         else
9912                 printk(KERN_TRACE "---------------------------------\n");
9913
9914  out_enable:
9915         tr->trace_flags |= old_userobj;
9916
9917         for_each_tracing_cpu(cpu) {
9918                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9919         }
9920         atomic_dec(&dump_running);
9921         local_irq_restore(flags);
9922 }
9923 EXPORT_SYMBOL_GPL(ftrace_dump);
9924
9925 #define WRITE_BUFSIZE  4096
9926
9927 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9928                                 size_t count, loff_t *ppos,
9929                                 int (*createfn)(const char *))
9930 {
9931         char *kbuf, *buf, *tmp;
9932         int ret = 0;
9933         size_t done = 0;
9934         size_t size;
9935
9936         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9937         if (!kbuf)
9938                 return -ENOMEM;
9939
9940         while (done < count) {
9941                 size = count - done;
9942
9943                 if (size >= WRITE_BUFSIZE)
9944                         size = WRITE_BUFSIZE - 1;
9945
9946                 if (copy_from_user(kbuf, buffer + done, size)) {
9947                         ret = -EFAULT;
9948                         goto out;
9949                 }
9950                 kbuf[size] = '\0';
9951                 buf = kbuf;
9952                 do {
9953                         tmp = strchr(buf, '\n');
9954                         if (tmp) {
9955                                 *tmp = '\0';
9956                                 size = tmp - buf + 1;
9957                         } else {
9958                                 size = strlen(buf);
9959                                 if (done + size < count) {
9960                                         if (buf != kbuf)
9961                                                 break;
9962                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9963                                         pr_warn("Line length is too long: Should be less than %d\n",
9964                                                 WRITE_BUFSIZE - 2);
9965                                         ret = -EINVAL;
9966                                         goto out;
9967                                 }
9968                         }
9969                         done += size;
9970
9971                         /* Remove comments */
9972                         tmp = strchr(buf, '#');
9973
9974                         if (tmp)
9975                                 *tmp = '\0';
9976
9977                         ret = createfn(buf);
9978                         if (ret)
9979                                 goto out;
9980                         buf += size;
9981
9982                 } while (done < count);
9983         }
9984         ret = done;
9985
9986 out:
9987         kfree(kbuf);
9988
9989         return ret;
9990 }
9991
9992 __init static int tracer_alloc_buffers(void)
9993 {
9994         int ring_buf_size;
9995         int ret = -ENOMEM;
9996
9997
9998         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9999                 pr_warn("Tracing disabled due to lockdown\n");
10000                 return -EPERM;
10001         }
10002
10003         /*
10004          * Make sure we don't accidentally add more trace options
10005          * than we have bits for.
10006          */
10007         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10008
10009         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10010                 goto out;
10011
10012         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10013                 goto out_free_buffer_mask;
10014
10015         /* Only allocate trace_printk buffers if a trace_printk exists */
10016         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10017                 /* Must be called before global_trace.buffer is allocated */
10018                 trace_printk_init_buffers();
10019
10020         /* To save memory, keep the ring buffer size to its minimum */
10021         if (ring_buffer_expanded)
10022                 ring_buf_size = trace_buf_size;
10023         else
10024                 ring_buf_size = 1;
10025
10026         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10027         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10028
10029         raw_spin_lock_init(&global_trace.start_lock);
10030
10031         /*
10032          * The prepare callbacks allocates some memory for the ring buffer. We
10033          * don't free the buffer if the CPU goes down. If we were to free
10034          * the buffer, then the user would lose any trace that was in the
10035          * buffer. The memory will be removed once the "instance" is removed.
10036          */
10037         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10038                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10039                                       NULL);
10040         if (ret < 0)
10041                 goto out_free_cpumask;
10042         /* Used for event triggers */
10043         ret = -ENOMEM;
10044         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10045         if (!temp_buffer)
10046                 goto out_rm_hp_state;
10047
10048         if (trace_create_savedcmd() < 0)
10049                 goto out_free_temp_buffer;
10050
10051         /* TODO: make the number of buffers hot pluggable with CPUS */
10052         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10053                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10054                 goto out_free_savedcmd;
10055         }
10056
10057         if (global_trace.buffer_disabled)
10058                 tracing_off();
10059
10060         if (trace_boot_clock) {
10061                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10062                 if (ret < 0)
10063                         pr_warn("Trace clock %s not defined, going back to default\n",
10064                                 trace_boot_clock);
10065         }
10066
10067         /*
10068          * register_tracer() might reference current_trace, so it
10069          * needs to be set before we register anything. This is
10070          * just a bootstrap of current_trace anyway.
10071          */
10072         global_trace.current_trace = &nop_trace;
10073
10074         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10075
10076         ftrace_init_global_array_ops(&global_trace);
10077
10078         init_trace_flags_index(&global_trace);
10079
10080         register_tracer(&nop_trace);
10081
10082         /* Function tracing may start here (via kernel command line) */
10083         init_function_trace();
10084
10085         /* All seems OK, enable tracing */
10086         tracing_disabled = 0;
10087
10088         atomic_notifier_chain_register(&panic_notifier_list,
10089                                        &trace_panic_notifier);
10090
10091         register_die_notifier(&trace_die_notifier);
10092
10093         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10094
10095         INIT_LIST_HEAD(&global_trace.systems);
10096         INIT_LIST_HEAD(&global_trace.events);
10097         INIT_LIST_HEAD(&global_trace.hist_vars);
10098         INIT_LIST_HEAD(&global_trace.err_log);
10099         list_add(&global_trace.list, &ftrace_trace_arrays);
10100
10101         apply_trace_boot_options();
10102
10103         register_snapshot_cmd();
10104
10105         test_can_verify();
10106
10107         return 0;
10108
10109 out_free_savedcmd:
10110         free_saved_cmdlines_buffer(savedcmd);
10111 out_free_temp_buffer:
10112         ring_buffer_free(temp_buffer);
10113 out_rm_hp_state:
10114         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10115 out_free_cpumask:
10116         free_cpumask_var(global_trace.tracing_cpumask);
10117 out_free_buffer_mask:
10118         free_cpumask_var(tracing_buffer_mask);
10119 out:
10120         return ret;
10121 }
10122
10123 void __init early_trace_init(void)
10124 {
10125         if (tracepoint_printk) {
10126                 tracepoint_print_iter =
10127                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10128                 if (MEM_FAIL(!tracepoint_print_iter,
10129                              "Failed to allocate trace iterator\n"))
10130                         tracepoint_printk = 0;
10131                 else
10132                         static_key_enable(&tracepoint_printk_key.key);
10133         }
10134         tracer_alloc_buffers();
10135 }
10136
10137 void __init trace_init(void)
10138 {
10139         trace_event_init();
10140 }
10141
10142 __init static void clear_boot_tracer(void)
10143 {
10144         /*
10145          * The default tracer at boot buffer is an init section.
10146          * This function is called in lateinit. If we did not
10147          * find the boot tracer, then clear it out, to prevent
10148          * later registration from accessing the buffer that is
10149          * about to be freed.
10150          */
10151         if (!default_bootup_tracer)
10152                 return;
10153
10154         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10155                default_bootup_tracer);
10156         default_bootup_tracer = NULL;
10157 }
10158
10159 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10160 __init static void tracing_set_default_clock(void)
10161 {
10162         /* sched_clock_stable() is determined in late_initcall */
10163         if (!trace_boot_clock && !sched_clock_stable()) {
10164                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10165                         pr_warn("Can not set tracing clock due to lockdown\n");
10166                         return;
10167                 }
10168
10169                 printk(KERN_WARNING
10170                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10171                        "If you want to keep using the local clock, then add:\n"
10172                        "  \"trace_clock=local\"\n"
10173                        "on the kernel command line\n");
10174                 tracing_set_clock(&global_trace, "global");
10175         }
10176 }
10177 #else
10178 static inline void tracing_set_default_clock(void) { }
10179 #endif
10180
10181 __init static int late_trace_init(void)
10182 {
10183         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10184                 static_key_disable(&tracepoint_printk_key.key);
10185                 tracepoint_printk = 0;
10186         }
10187
10188         tracing_set_default_clock();
10189         clear_boot_tracer();
10190         return 0;
10191 }
10192
10193 late_initcall_sync(late_trace_init);