Linux 6.9-rc1
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149
150 union trace_eval_map_item;
151
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160
161 static DEFINE_MUTEX(trace_eval_mutex);
162
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195
196 static int __init set_cmdline_ftrace(char *str)
197 {
198         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199         default_bootup_tracer = bootup_tracer_buf;
200         /* We are using ftrace early, expand it */
201         trace_set_ring_buffer_expanded(NULL);
202         return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205
206 int ftrace_dump_on_oops_enabled(void)
207 {
208         if (!strcmp("0", ftrace_dump_on_oops))
209                 return 0;
210         else
211                 return 1;
212 }
213
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216         if (!*str) {
217                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218                 return 1;
219         }
220
221         if (*str == ',') {
222                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223                 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224                 return 1;
225         }
226
227         if (*str++ == '=') {
228                 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229                 return 1;
230         }
231
232         return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235
236 static int __init stop_trace_on_warning(char *str)
237 {
238         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239                 __disable_trace_on_warning = 1;
240         return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243
244 static int __init boot_alloc_snapshot(char *str)
245 {
246         char *slot = boot_snapshot_info + boot_snapshot_index;
247         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248         int ret;
249
250         if (str[0] == '=') {
251                 str++;
252                 if (strlen(str) >= left)
253                         return -1;
254
255                 ret = snprintf(slot, left, "%s\t", str);
256                 boot_snapshot_index += ret;
257         } else {
258                 allocate_snapshot = true;
259                 /* We also need the main ring buffer expanded */
260                 trace_set_ring_buffer_expanded(NULL);
261         }
262         return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265
266
267 static int __init boot_snapshot(char *str)
268 {
269         snapshot_at_boot = true;
270         boot_alloc_snapshot(str);
271         return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274
275
276 static int __init boot_instance(char *str)
277 {
278         char *slot = boot_instance_info + boot_instance_index;
279         int left = sizeof(boot_instance_info) - boot_instance_index;
280         int ret;
281
282         if (strlen(str) >= left)
283                 return -1;
284
285         ret = snprintf(slot, left, "%s\t", str);
286         boot_instance_index += ret;
287
288         return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291
292
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294
295 static int __init set_trace_boot_options(char *str)
296 {
297         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298         return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304
305 static int __init set_trace_boot_clock(char *str)
306 {
307         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308         trace_boot_clock = trace_boot_clock_buf;
309         return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312
313 static int __init set_tracepoint_printk(char *str)
314 {
315         /* Ignore the "tp_printk_stop_on_boot" param */
316         if (*str == '_')
317                 return 0;
318
319         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320                 tracepoint_printk = 1;
321         return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327         tracepoint_printk_stop_on_boot = true;
328         return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331
332 unsigned long long ns2usecs(u64 nsec)
333 {
334         nsec += 500;
335         do_div(nsec, 1000);
336         return nsec;
337 }
338
339 static void
340 trace_process_export(struct trace_export *export,
341                struct ring_buffer_event *event, int flag)
342 {
343         struct trace_entry *entry;
344         unsigned int size = 0;
345
346         if (export->flags & flag) {
347                 entry = ring_buffer_event_data(event);
348                 size = ring_buffer_event_length(event);
349                 export->write(export, entry, size);
350         }
351 }
352
353 static DEFINE_MUTEX(ftrace_export_lock);
354
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363         if (export->flags & TRACE_EXPORT_FUNCTION)
364                 static_branch_inc(&trace_function_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_EVENT)
367                 static_branch_inc(&trace_event_exports_enabled);
368
369         if (export->flags & TRACE_EXPORT_MARKER)
370                 static_branch_inc(&trace_marker_exports_enabled);
371 }
372
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375         if (export->flags & TRACE_EXPORT_FUNCTION)
376                 static_branch_dec(&trace_function_exports_enabled);
377
378         if (export->flags & TRACE_EXPORT_EVENT)
379                 static_branch_dec(&trace_event_exports_enabled);
380
381         if (export->flags & TRACE_EXPORT_MARKER)
382                 static_branch_dec(&trace_marker_exports_enabled);
383 }
384
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387         struct trace_export *export;
388
389         preempt_disable_notrace();
390
391         export = rcu_dereference_raw_check(ftrace_exports_list);
392         while (export) {
393                 trace_process_export(export, event, flag);
394                 export = rcu_dereference_raw_check(export->next);
395         }
396
397         preempt_enable_notrace();
398 }
399
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403         rcu_assign_pointer(export->next, *list);
404         /*
405          * We are entering export into the list but another
406          * CPU might be walking that list. We need to make sure
407          * the export->next pointer is valid before another CPU sees
408          * the export pointer included into the list.
409          */
410         rcu_assign_pointer(*list, export);
411 }
412
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416         struct trace_export **p;
417
418         for (p = list; *p != NULL; p = &(*p)->next)
419                 if (*p == export)
420                         break;
421
422         if (*p != export)
423                 return -1;
424
425         rcu_assign_pointer(*p, (*p)->next);
426
427         return 0;
428 }
429
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433         ftrace_exports_enable(export);
434
435         add_trace_export(list, export);
436 }
437
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441         int ret;
442
443         ret = rm_trace_export(list, export);
444         ftrace_exports_disable(export);
445
446         return ret;
447 }
448
449 int register_ftrace_export(struct trace_export *export)
450 {
451         if (WARN_ON_ONCE(!export->write))
452                 return -1;
453
454         mutex_lock(&ftrace_export_lock);
455
456         add_ftrace_export(&ftrace_exports_list, export);
457
458         mutex_unlock(&ftrace_export_lock);
459
460         return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466         int ret;
467
468         mutex_lock(&ftrace_export_lock);
469
470         ret = rm_ftrace_export(&ftrace_exports_list, export);
471
472         mutex_unlock(&ftrace_export_lock);
473
474         return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS                                             \
480         (FUNCTION_DEFAULT_FLAGS |                                       \
481          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
482          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
483          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
484          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
485          TRACE_ITER_HASH_PTR)
486
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
489                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
494
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500         .trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502
503 void trace_set_ring_buffer_expanded(struct trace_array *tr)
504 {
505         if (!tr)
506                 tr = &global_trace;
507         tr->ring_buffer_expanded = true;
508 }
509
510 LIST_HEAD(ftrace_trace_arrays);
511
512 int trace_array_get(struct trace_array *this_tr)
513 {
514         struct trace_array *tr;
515         int ret = -ENODEV;
516
517         mutex_lock(&trace_types_lock);
518         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
519                 if (tr == this_tr) {
520                         tr->ref++;
521                         ret = 0;
522                         break;
523                 }
524         }
525         mutex_unlock(&trace_types_lock);
526
527         return ret;
528 }
529
530 static void __trace_array_put(struct trace_array *this_tr)
531 {
532         WARN_ON(!this_tr->ref);
533         this_tr->ref--;
534 }
535
536 /**
537  * trace_array_put - Decrement the reference counter for this trace array.
538  * @this_tr : pointer to the trace array
539  *
540  * NOTE: Use this when we no longer need the trace array returned by
541  * trace_array_get_by_name(). This ensures the trace array can be later
542  * destroyed.
543  *
544  */
545 void trace_array_put(struct trace_array *this_tr)
546 {
547         if (!this_tr)
548                 return;
549
550         mutex_lock(&trace_types_lock);
551         __trace_array_put(this_tr);
552         mutex_unlock(&trace_types_lock);
553 }
554 EXPORT_SYMBOL_GPL(trace_array_put);
555
556 int tracing_check_open_get_tr(struct trace_array *tr)
557 {
558         int ret;
559
560         ret = security_locked_down(LOCKDOWN_TRACEFS);
561         if (ret)
562                 return ret;
563
564         if (tracing_disabled)
565                 return -ENODEV;
566
567         if (tr && trace_array_get(tr) < 0)
568                 return -ENODEV;
569
570         return 0;
571 }
572
573 int call_filter_check_discard(struct trace_event_call *call, void *rec,
574                               struct trace_buffer *buffer,
575                               struct ring_buffer_event *event)
576 {
577         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
578             !filter_match_preds(call->filter, rec)) {
579                 __trace_event_discard_commit(buffer, event);
580                 return 1;
581         }
582
583         return 0;
584 }
585
586 /**
587  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
588  * @filtered_pids: The list of pids to check
589  * @search_pid: The PID to find in @filtered_pids
590  *
591  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
592  */
593 bool
594 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
595 {
596         return trace_pid_list_is_set(filtered_pids, search_pid);
597 }
598
599 /**
600  * trace_ignore_this_task - should a task be ignored for tracing
601  * @filtered_pids: The list of pids to check
602  * @filtered_no_pids: The list of pids not to be traced
603  * @task: The task that should be ignored if not filtered
604  *
605  * Checks if @task should be traced or not from @filtered_pids.
606  * Returns true if @task should *NOT* be traced.
607  * Returns false if @task should be traced.
608  */
609 bool
610 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
611                        struct trace_pid_list *filtered_no_pids,
612                        struct task_struct *task)
613 {
614         /*
615          * If filtered_no_pids is not empty, and the task's pid is listed
616          * in filtered_no_pids, then return true.
617          * Otherwise, if filtered_pids is empty, that means we can
618          * trace all tasks. If it has content, then only trace pids
619          * within filtered_pids.
620          */
621
622         return (filtered_pids &&
623                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
624                 (filtered_no_pids &&
625                  trace_find_filtered_pid(filtered_no_pids, task->pid));
626 }
627
628 /**
629  * trace_filter_add_remove_task - Add or remove a task from a pid_list
630  * @pid_list: The list to modify
631  * @self: The current task for fork or NULL for exit
632  * @task: The task to add or remove
633  *
634  * If adding a task, if @self is defined, the task is only added if @self
635  * is also included in @pid_list. This happens on fork and tasks should
636  * only be added when the parent is listed. If @self is NULL, then the
637  * @task pid will be removed from the list, which would happen on exit
638  * of a task.
639  */
640 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
641                                   struct task_struct *self,
642                                   struct task_struct *task)
643 {
644         if (!pid_list)
645                 return;
646
647         /* For forks, we only add if the forking task is listed */
648         if (self) {
649                 if (!trace_find_filtered_pid(pid_list, self->pid))
650                         return;
651         }
652
653         /* "self" is set for forks, and NULL for exits */
654         if (self)
655                 trace_pid_list_set(pid_list, task->pid);
656         else
657                 trace_pid_list_clear(pid_list, task->pid);
658 }
659
660 /**
661  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
662  * @pid_list: The pid list to show
663  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
664  * @pos: The position of the file
665  *
666  * This is used by the seq_file "next" operation to iterate the pids
667  * listed in a trace_pid_list structure.
668  *
669  * Returns the pid+1 as we want to display pid of zero, but NULL would
670  * stop the iteration.
671  */
672 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
673 {
674         long pid = (unsigned long)v;
675         unsigned int next;
676
677         (*pos)++;
678
679         /* pid already is +1 of the actual previous bit */
680         if (trace_pid_list_next(pid_list, pid, &next) < 0)
681                 return NULL;
682
683         pid = next;
684
685         /* Return pid + 1 to allow zero to be represented */
686         return (void *)(pid + 1);
687 }
688
689 /**
690  * trace_pid_start - Used for seq_file to start reading pid lists
691  * @pid_list: The pid list to show
692  * @pos: The position of the file
693  *
694  * This is used by seq_file "start" operation to start the iteration
695  * of listing pids.
696  *
697  * Returns the pid+1 as we want to display pid of zero, but NULL would
698  * stop the iteration.
699  */
700 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
701 {
702         unsigned long pid;
703         unsigned int first;
704         loff_t l = 0;
705
706         if (trace_pid_list_first(pid_list, &first) < 0)
707                 return NULL;
708
709         pid = first;
710
711         /* Return pid + 1 so that zero can be the exit value */
712         for (pid++; pid && l < *pos;
713              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
714                 ;
715         return (void *)pid;
716 }
717
718 /**
719  * trace_pid_show - show the current pid in seq_file processing
720  * @m: The seq_file structure to write into
721  * @v: A void pointer of the pid (+1) value to display
722  *
723  * Can be directly used by seq_file operations to display the current
724  * pid value.
725  */
726 int trace_pid_show(struct seq_file *m, void *v)
727 {
728         unsigned long pid = (unsigned long)v - 1;
729
730         seq_printf(m, "%lu\n", pid);
731         return 0;
732 }
733
734 /* 128 should be much more than enough */
735 #define PID_BUF_SIZE            127
736
737 int trace_pid_write(struct trace_pid_list *filtered_pids,
738                     struct trace_pid_list **new_pid_list,
739                     const char __user *ubuf, size_t cnt)
740 {
741         struct trace_pid_list *pid_list;
742         struct trace_parser parser;
743         unsigned long val;
744         int nr_pids = 0;
745         ssize_t read = 0;
746         ssize_t ret;
747         loff_t pos;
748         pid_t pid;
749
750         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
751                 return -ENOMEM;
752
753         /*
754          * Always recreate a new array. The write is an all or nothing
755          * operation. Always create a new array when adding new pids by
756          * the user. If the operation fails, then the current list is
757          * not modified.
758          */
759         pid_list = trace_pid_list_alloc();
760         if (!pid_list) {
761                 trace_parser_put(&parser);
762                 return -ENOMEM;
763         }
764
765         if (filtered_pids) {
766                 /* copy the current bits to the new max */
767                 ret = trace_pid_list_first(filtered_pids, &pid);
768                 while (!ret) {
769                         trace_pid_list_set(pid_list, pid);
770                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
771                         nr_pids++;
772                 }
773         }
774
775         ret = 0;
776         while (cnt > 0) {
777
778                 pos = 0;
779
780                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
781                 if (ret < 0)
782                         break;
783
784                 read += ret;
785                 ubuf += ret;
786                 cnt -= ret;
787
788                 if (!trace_parser_loaded(&parser))
789                         break;
790
791                 ret = -EINVAL;
792                 if (kstrtoul(parser.buffer, 0, &val))
793                         break;
794
795                 pid = (pid_t)val;
796
797                 if (trace_pid_list_set(pid_list, pid) < 0) {
798                         ret = -1;
799                         break;
800                 }
801                 nr_pids++;
802
803                 trace_parser_clear(&parser);
804                 ret = 0;
805         }
806         trace_parser_put(&parser);
807
808         if (ret < 0) {
809                 trace_pid_list_free(pid_list);
810                 return ret;
811         }
812
813         if (!nr_pids) {
814                 /* Cleared the list of pids */
815                 trace_pid_list_free(pid_list);
816                 pid_list = NULL;
817         }
818
819         *new_pid_list = pid_list;
820
821         return read;
822 }
823
824 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
825 {
826         u64 ts;
827
828         /* Early boot up does not have a buffer yet */
829         if (!buf->buffer)
830                 return trace_clock_local();
831
832         ts = ring_buffer_time_stamp(buf->buffer);
833         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
834
835         return ts;
836 }
837
838 u64 ftrace_now(int cpu)
839 {
840         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
841 }
842
843 /**
844  * tracing_is_enabled - Show if global_trace has been enabled
845  *
846  * Shows if the global trace has been enabled or not. It uses the
847  * mirror flag "buffer_disabled" to be used in fast paths such as for
848  * the irqsoff tracer. But it may be inaccurate due to races. If you
849  * need to know the accurate state, use tracing_is_on() which is a little
850  * slower, but accurate.
851  */
852 int tracing_is_enabled(void)
853 {
854         /*
855          * For quick access (irqsoff uses this in fast path), just
856          * return the mirror variable of the state of the ring buffer.
857          * It's a little racy, but we don't really care.
858          */
859         smp_rmb();
860         return !global_trace.buffer_disabled;
861 }
862
863 /*
864  * trace_buf_size is the size in bytes that is allocated
865  * for a buffer. Note, the number of bytes is always rounded
866  * to page size.
867  *
868  * This number is purposely set to a low number of 16384.
869  * If the dump on oops happens, it will be much appreciated
870  * to not have to wait for all that output. Anyway this can be
871  * boot time and run time configurable.
872  */
873 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
874
875 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
876
877 /* trace_types holds a link list of available tracers. */
878 static struct tracer            *trace_types __read_mostly;
879
880 /*
881  * trace_types_lock is used to protect the trace_types list.
882  */
883 DEFINE_MUTEX(trace_types_lock);
884
885 /*
886  * serialize the access of the ring buffer
887  *
888  * ring buffer serializes readers, but it is low level protection.
889  * The validity of the events (which returns by ring_buffer_peek() ..etc)
890  * are not protected by ring buffer.
891  *
892  * The content of events may become garbage if we allow other process consumes
893  * these events concurrently:
894  *   A) the page of the consumed events may become a normal page
895  *      (not reader page) in ring buffer, and this page will be rewritten
896  *      by events producer.
897  *   B) The page of the consumed events may become a page for splice_read,
898  *      and this page will be returned to system.
899  *
900  * These primitives allow multi process access to different cpu ring buffer
901  * concurrently.
902  *
903  * These primitives don't distinguish read-only and read-consume access.
904  * Multi read-only access are also serialized.
905  */
906
907 #ifdef CONFIG_SMP
908 static DECLARE_RWSEM(all_cpu_access_lock);
909 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
910
911 static inline void trace_access_lock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 /* gain it for accessing the whole ring buffer. */
915                 down_write(&all_cpu_access_lock);
916         } else {
917                 /* gain it for accessing a cpu ring buffer. */
918
919                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
920                 down_read(&all_cpu_access_lock);
921
922                 /* Secondly block other access to this @cpu ring buffer. */
923                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
924         }
925 }
926
927 static inline void trace_access_unlock(int cpu)
928 {
929         if (cpu == RING_BUFFER_ALL_CPUS) {
930                 up_write(&all_cpu_access_lock);
931         } else {
932                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
933                 up_read(&all_cpu_access_lock);
934         }
935 }
936
937 static inline void trace_access_lock_init(void)
938 {
939         int cpu;
940
941         for_each_possible_cpu(cpu)
942                 mutex_init(&per_cpu(cpu_access_lock, cpu));
943 }
944
945 #else
946
947 static DEFINE_MUTEX(access_lock);
948
949 static inline void trace_access_lock(int cpu)
950 {
951         (void)cpu;
952         mutex_lock(&access_lock);
953 }
954
955 static inline void trace_access_unlock(int cpu)
956 {
957         (void)cpu;
958         mutex_unlock(&access_lock);
959 }
960
961 static inline void trace_access_lock_init(void)
962 {
963 }
964
965 #endif
966
967 #ifdef CONFIG_STACKTRACE
968 static void __ftrace_trace_stack(struct trace_buffer *buffer,
969                                  unsigned int trace_ctx,
970                                  int skip, struct pt_regs *regs);
971 static inline void ftrace_trace_stack(struct trace_array *tr,
972                                       struct trace_buffer *buffer,
973                                       unsigned int trace_ctx,
974                                       int skip, struct pt_regs *regs);
975
976 #else
977 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
978                                         unsigned int trace_ctx,
979                                         int skip, struct pt_regs *regs)
980 {
981 }
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983                                       struct trace_buffer *buffer,
984                                       unsigned long trace_ctx,
985                                       int skip, struct pt_regs *regs)
986 {
987 }
988
989 #endif
990
991 static __always_inline void
992 trace_event_setup(struct ring_buffer_event *event,
993                   int type, unsigned int trace_ctx)
994 {
995         struct trace_entry *ent = ring_buffer_event_data(event);
996
997         tracing_generic_entry_update(ent, type, trace_ctx);
998 }
999
1000 static __always_inline struct ring_buffer_event *
1001 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1002                           int type,
1003                           unsigned long len,
1004                           unsigned int trace_ctx)
1005 {
1006         struct ring_buffer_event *event;
1007
1008         event = ring_buffer_lock_reserve(buffer, len);
1009         if (event != NULL)
1010                 trace_event_setup(event, type, trace_ctx);
1011
1012         return event;
1013 }
1014
1015 void tracer_tracing_on(struct trace_array *tr)
1016 {
1017         if (tr->array_buffer.buffer)
1018                 ring_buffer_record_on(tr->array_buffer.buffer);
1019         /*
1020          * This flag is looked at when buffers haven't been allocated
1021          * yet, or by some tracers (like irqsoff), that just want to
1022          * know if the ring buffer has been disabled, but it can handle
1023          * races of where it gets disabled but we still do a record.
1024          * As the check is in the fast path of the tracers, it is more
1025          * important to be fast than accurate.
1026          */
1027         tr->buffer_disabled = 0;
1028         /* Make the flag seen by readers */
1029         smp_wmb();
1030 }
1031
1032 /**
1033  * tracing_on - enable tracing buffers
1034  *
1035  * This function enables tracing buffers that may have been
1036  * disabled with tracing_off.
1037  */
1038 void tracing_on(void)
1039 {
1040         tracer_tracing_on(&global_trace);
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_on);
1043
1044
1045 static __always_inline void
1046 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1047 {
1048         __this_cpu_write(trace_taskinfo_save, true);
1049
1050         /* If this is the temp buffer, we need to commit fully */
1051         if (this_cpu_read(trace_buffered_event) == event) {
1052                 /* Length is in event->array[0] */
1053                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1054                 /* Release the temp buffer */
1055                 this_cpu_dec(trace_buffered_event_cnt);
1056                 /* ring_buffer_unlock_commit() enables preemption */
1057                 preempt_enable_notrace();
1058         } else
1059                 ring_buffer_unlock_commit(buffer);
1060 }
1061
1062 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1063                        const char *str, int size)
1064 {
1065         struct ring_buffer_event *event;
1066         struct trace_buffer *buffer;
1067         struct print_entry *entry;
1068         unsigned int trace_ctx;
1069         int alloc;
1070
1071         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running && tr == &global_trace))
1075                 return 0;
1076
1077         if (unlikely(tracing_disabled))
1078                 return 0;
1079
1080         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1081
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = tr->array_buffer.buffer;
1084         ring_buffer_nest_start(buffer);
1085         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1086                                             trace_ctx);
1087         if (!event) {
1088                 size = 0;
1089                 goto out;
1090         }
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip = ip;
1094
1095         memcpy(&entry->buf, str, size);
1096
1097         /* Add a newline if necessary */
1098         if (entry->buf[size - 1] != '\n') {
1099                 entry->buf[size] = '\n';
1100                 entry->buf[size + 1] = '\0';
1101         } else
1102                 entry->buf[size] = '\0';
1103
1104         __buffer_unlock_commit(buffer, event);
1105         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1106  out:
1107         ring_buffer_nest_end(buffer);
1108         return size;
1109 }
1110 EXPORT_SYMBOL_GPL(__trace_array_puts);
1111
1112 /**
1113  * __trace_puts - write a constant string into the trace buffer.
1114  * @ip:    The address of the caller
1115  * @str:   The constant string to write
1116  * @size:  The size of the string.
1117  */
1118 int __trace_puts(unsigned long ip, const char *str, int size)
1119 {
1120         return __trace_array_puts(&global_trace, ip, str, size);
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_puts);
1123
1124 /**
1125  * __trace_bputs - write the pointer to a constant string into trace buffer
1126  * @ip:    The address of the caller
1127  * @str:   The constant string to write to the buffer to
1128  */
1129 int __trace_bputs(unsigned long ip, const char *str)
1130 {
1131         struct ring_buffer_event *event;
1132         struct trace_buffer *buffer;
1133         struct bputs_entry *entry;
1134         unsigned int trace_ctx;
1135         int size = sizeof(struct bputs_entry);
1136         int ret = 0;
1137
1138         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1139                 return 0;
1140
1141         if (unlikely(tracing_selftest_running || tracing_disabled))
1142                 return 0;
1143
1144         trace_ctx = tracing_gen_ctx();
1145         buffer = global_trace.array_buffer.buffer;
1146
1147         ring_buffer_nest_start(buffer);
1148         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1149                                             trace_ctx);
1150         if (!event)
1151                 goto out;
1152
1153         entry = ring_buffer_event_data(event);
1154         entry->ip                       = ip;
1155         entry->str                      = str;
1156
1157         __buffer_unlock_commit(buffer, event);
1158         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1159
1160         ret = 1;
1161  out:
1162         ring_buffer_nest_end(buffer);
1163         return ret;
1164 }
1165 EXPORT_SYMBOL_GPL(__trace_bputs);
1166
1167 #ifdef CONFIG_TRACER_SNAPSHOT
1168 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1169                                            void *cond_data)
1170 {
1171         struct tracer *tracer = tr->current_trace;
1172         unsigned long flags;
1173
1174         if (in_nmi()) {
1175                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1176                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1177                 return;
1178         }
1179
1180         if (!tr->allocated_snapshot) {
1181                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1182                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1183                 tracer_tracing_off(tr);
1184                 return;
1185         }
1186
1187         /* Note, snapshot can not be used when the tracer uses it */
1188         if (tracer->use_max_tr) {
1189                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1190                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1191                 return;
1192         }
1193
1194         local_irq_save(flags);
1195         update_max_tr(tr, current, smp_processor_id(), cond_data);
1196         local_irq_restore(flags);
1197 }
1198
1199 void tracing_snapshot_instance(struct trace_array *tr)
1200 {
1201         tracing_snapshot_instance_cond(tr, NULL);
1202 }
1203
1204 /**
1205  * tracing_snapshot - take a snapshot of the current buffer.
1206  *
1207  * This causes a swap between the snapshot buffer and the current live
1208  * tracing buffer. You can use this to take snapshots of the live
1209  * trace when some condition is triggered, but continue to trace.
1210  *
1211  * Note, make sure to allocate the snapshot with either
1212  * a tracing_snapshot_alloc(), or by doing it manually
1213  * with: echo 1 > /sys/kernel/tracing/snapshot
1214  *
1215  * If the snapshot buffer is not allocated, it will stop tracing.
1216  * Basically making a permanent snapshot.
1217  */
1218 void tracing_snapshot(void)
1219 {
1220         struct trace_array *tr = &global_trace;
1221
1222         tracing_snapshot_instance(tr);
1223 }
1224 EXPORT_SYMBOL_GPL(tracing_snapshot);
1225
1226 /**
1227  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1228  * @tr:         The tracing instance to snapshot
1229  * @cond_data:  The data to be tested conditionally, and possibly saved
1230  *
1231  * This is the same as tracing_snapshot() except that the snapshot is
1232  * conditional - the snapshot will only happen if the
1233  * cond_snapshot.update() implementation receiving the cond_data
1234  * returns true, which means that the trace array's cond_snapshot
1235  * update() operation used the cond_data to determine whether the
1236  * snapshot should be taken, and if it was, presumably saved it along
1237  * with the snapshot.
1238  */
1239 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1240 {
1241         tracing_snapshot_instance_cond(tr, cond_data);
1242 }
1243 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1244
1245 /**
1246  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1247  * @tr:         The tracing instance
1248  *
1249  * When the user enables a conditional snapshot using
1250  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1251  * with the snapshot.  This accessor is used to retrieve it.
1252  *
1253  * Should not be called from cond_snapshot.update(), since it takes
1254  * the tr->max_lock lock, which the code calling
1255  * cond_snapshot.update() has already done.
1256  *
1257  * Returns the cond_data associated with the trace array's snapshot.
1258  */
1259 void *tracing_cond_snapshot_data(struct trace_array *tr)
1260 {
1261         void *cond_data = NULL;
1262
1263         local_irq_disable();
1264         arch_spin_lock(&tr->max_lock);
1265
1266         if (tr->cond_snapshot)
1267                 cond_data = tr->cond_snapshot->cond_data;
1268
1269         arch_spin_unlock(&tr->max_lock);
1270         local_irq_enable();
1271
1272         return cond_data;
1273 }
1274 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1275
1276 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1277                                         struct array_buffer *size_buf, int cpu_id);
1278 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1279
1280 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1281 {
1282         int order;
1283         int ret;
1284
1285         if (!tr->allocated_snapshot) {
1286
1287                 /* Make the snapshot buffer have the same order as main buffer */
1288                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1289                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1290                 if (ret < 0)
1291                         return ret;
1292
1293                 /* allocate spare buffer */
1294                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1295                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1296                 if (ret < 0)
1297                         return ret;
1298
1299                 tr->allocated_snapshot = true;
1300         }
1301
1302         return 0;
1303 }
1304
1305 static void free_snapshot(struct trace_array *tr)
1306 {
1307         /*
1308          * We don't free the ring buffer. instead, resize it because
1309          * The max_tr ring buffer has some state (e.g. ring->clock) and
1310          * we want preserve it.
1311          */
1312         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1313         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1314         set_buffer_entries(&tr->max_buffer, 1);
1315         tracing_reset_online_cpus(&tr->max_buffer);
1316         tr->allocated_snapshot = false;
1317 }
1318
1319 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1320 {
1321         int ret;
1322
1323         lockdep_assert_held(&trace_types_lock);
1324
1325         spin_lock(&tr->snapshot_trigger_lock);
1326         if (tr->snapshot == UINT_MAX) {
1327                 spin_unlock(&tr->snapshot_trigger_lock);
1328                 return -EBUSY;
1329         }
1330
1331         tr->snapshot++;
1332         spin_unlock(&tr->snapshot_trigger_lock);
1333
1334         ret = tracing_alloc_snapshot_instance(tr);
1335         if (ret) {
1336                 spin_lock(&tr->snapshot_trigger_lock);
1337                 tr->snapshot--;
1338                 spin_unlock(&tr->snapshot_trigger_lock);
1339         }
1340
1341         return ret;
1342 }
1343
1344 int tracing_arm_snapshot(struct trace_array *tr)
1345 {
1346         int ret;
1347
1348         mutex_lock(&trace_types_lock);
1349         ret = tracing_arm_snapshot_locked(tr);
1350         mutex_unlock(&trace_types_lock);
1351
1352         return ret;
1353 }
1354
1355 void tracing_disarm_snapshot(struct trace_array *tr)
1356 {
1357         spin_lock(&tr->snapshot_trigger_lock);
1358         if (!WARN_ON(!tr->snapshot))
1359                 tr->snapshot--;
1360         spin_unlock(&tr->snapshot_trigger_lock);
1361 }
1362
1363 /**
1364  * tracing_alloc_snapshot - allocate snapshot buffer.
1365  *
1366  * This only allocates the snapshot buffer if it isn't already
1367  * allocated - it doesn't also take a snapshot.
1368  *
1369  * This is meant to be used in cases where the snapshot buffer needs
1370  * to be set up for events that can't sleep but need to be able to
1371  * trigger a snapshot.
1372  */
1373 int tracing_alloc_snapshot(void)
1374 {
1375         struct trace_array *tr = &global_trace;
1376         int ret;
1377
1378         ret = tracing_alloc_snapshot_instance(tr);
1379         WARN_ON(ret < 0);
1380
1381         return ret;
1382 }
1383 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1384
1385 /**
1386  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1387  *
1388  * This is similar to tracing_snapshot(), but it will allocate the
1389  * snapshot buffer if it isn't already allocated. Use this only
1390  * where it is safe to sleep, as the allocation may sleep.
1391  *
1392  * This causes a swap between the snapshot buffer and the current live
1393  * tracing buffer. You can use this to take snapshots of the live
1394  * trace when some condition is triggered, but continue to trace.
1395  */
1396 void tracing_snapshot_alloc(void)
1397 {
1398         int ret;
1399
1400         ret = tracing_alloc_snapshot();
1401         if (ret < 0)
1402                 return;
1403
1404         tracing_snapshot();
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1407
1408 /**
1409  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1410  * @tr:         The tracing instance
1411  * @cond_data:  User data to associate with the snapshot
1412  * @update:     Implementation of the cond_snapshot update function
1413  *
1414  * Check whether the conditional snapshot for the given instance has
1415  * already been enabled, or if the current tracer is already using a
1416  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1417  * save the cond_data and update function inside.
1418  *
1419  * Returns 0 if successful, error otherwise.
1420  */
1421 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1422                                  cond_update_fn_t update)
1423 {
1424         struct cond_snapshot *cond_snapshot;
1425         int ret = 0;
1426
1427         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1428         if (!cond_snapshot)
1429                 return -ENOMEM;
1430
1431         cond_snapshot->cond_data = cond_data;
1432         cond_snapshot->update = update;
1433
1434         mutex_lock(&trace_types_lock);
1435
1436         if (tr->current_trace->use_max_tr) {
1437                 ret = -EBUSY;
1438                 goto fail_unlock;
1439         }
1440
1441         /*
1442          * The cond_snapshot can only change to NULL without the
1443          * trace_types_lock. We don't care if we race with it going
1444          * to NULL, but we want to make sure that it's not set to
1445          * something other than NULL when we get here, which we can
1446          * do safely with only holding the trace_types_lock and not
1447          * having to take the max_lock.
1448          */
1449         if (tr->cond_snapshot) {
1450                 ret = -EBUSY;
1451                 goto fail_unlock;
1452         }
1453
1454         ret = tracing_arm_snapshot_locked(tr);
1455         if (ret)
1456                 goto fail_unlock;
1457
1458         local_irq_disable();
1459         arch_spin_lock(&tr->max_lock);
1460         tr->cond_snapshot = cond_snapshot;
1461         arch_spin_unlock(&tr->max_lock);
1462         local_irq_enable();
1463
1464         mutex_unlock(&trace_types_lock);
1465
1466         return ret;
1467
1468  fail_unlock:
1469         mutex_unlock(&trace_types_lock);
1470         kfree(cond_snapshot);
1471         return ret;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1474
1475 /**
1476  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1477  * @tr:         The tracing instance
1478  *
1479  * Check whether the conditional snapshot for the given instance is
1480  * enabled; if so, free the cond_snapshot associated with it,
1481  * otherwise return -EINVAL.
1482  *
1483  * Returns 0 if successful, error otherwise.
1484  */
1485 int tracing_snapshot_cond_disable(struct trace_array *tr)
1486 {
1487         int ret = 0;
1488
1489         local_irq_disable();
1490         arch_spin_lock(&tr->max_lock);
1491
1492         if (!tr->cond_snapshot)
1493                 ret = -EINVAL;
1494         else {
1495                 kfree(tr->cond_snapshot);
1496                 tr->cond_snapshot = NULL;
1497         }
1498
1499         arch_spin_unlock(&tr->max_lock);
1500         local_irq_enable();
1501
1502         tracing_disarm_snapshot(tr);
1503
1504         return ret;
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1507 #else
1508 void tracing_snapshot(void)
1509 {
1510         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1511 }
1512 EXPORT_SYMBOL_GPL(tracing_snapshot);
1513 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1514 {
1515         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1516 }
1517 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1518 int tracing_alloc_snapshot(void)
1519 {
1520         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1521         return -ENODEV;
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1524 void tracing_snapshot_alloc(void)
1525 {
1526         /* Give warning */
1527         tracing_snapshot();
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1530 void *tracing_cond_snapshot_data(struct trace_array *tr)
1531 {
1532         return NULL;
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1535 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1536 {
1537         return -ENODEV;
1538 }
1539 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542         return false;
1543 }
1544 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1545 #define free_snapshot(tr)       do { } while (0)
1546 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1547 #endif /* CONFIG_TRACER_SNAPSHOT */
1548
1549 void tracer_tracing_off(struct trace_array *tr)
1550 {
1551         if (tr->array_buffer.buffer)
1552                 ring_buffer_record_off(tr->array_buffer.buffer);
1553         /*
1554          * This flag is looked at when buffers haven't been allocated
1555          * yet, or by some tracers (like irqsoff), that just want to
1556          * know if the ring buffer has been disabled, but it can handle
1557          * races of where it gets disabled but we still do a record.
1558          * As the check is in the fast path of the tracers, it is more
1559          * important to be fast than accurate.
1560          */
1561         tr->buffer_disabled = 1;
1562         /* Make the flag seen by readers */
1563         smp_wmb();
1564 }
1565
1566 /**
1567  * tracing_off - turn off tracing buffers
1568  *
1569  * This function stops the tracing buffers from recording data.
1570  * It does not disable any overhead the tracers themselves may
1571  * be causing. This function simply causes all recording to
1572  * the ring buffers to fail.
1573  */
1574 void tracing_off(void)
1575 {
1576         tracer_tracing_off(&global_trace);
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_off);
1579
1580 void disable_trace_on_warning(void)
1581 {
1582         if (__disable_trace_on_warning) {
1583                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1584                         "Disabling tracing due to warning\n");
1585                 tracing_off();
1586         }
1587 }
1588
1589 /**
1590  * tracer_tracing_is_on - show real state of ring buffer enabled
1591  * @tr : the trace array to know if ring buffer is enabled
1592  *
1593  * Shows real state of the ring buffer if it is enabled or not.
1594  */
1595 bool tracer_tracing_is_on(struct trace_array *tr)
1596 {
1597         if (tr->array_buffer.buffer)
1598                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1599         return !tr->buffer_disabled;
1600 }
1601
1602 /**
1603  * tracing_is_on - show state of ring buffers enabled
1604  */
1605 int tracing_is_on(void)
1606 {
1607         return tracer_tracing_is_on(&global_trace);
1608 }
1609 EXPORT_SYMBOL_GPL(tracing_is_on);
1610
1611 static int __init set_buf_size(char *str)
1612 {
1613         unsigned long buf_size;
1614
1615         if (!str)
1616                 return 0;
1617         buf_size = memparse(str, &str);
1618         /*
1619          * nr_entries can not be zero and the startup
1620          * tests require some buffer space. Therefore
1621          * ensure we have at least 4096 bytes of buffer.
1622          */
1623         trace_buf_size = max(4096UL, buf_size);
1624         return 1;
1625 }
1626 __setup("trace_buf_size=", set_buf_size);
1627
1628 static int __init set_tracing_thresh(char *str)
1629 {
1630         unsigned long threshold;
1631         int ret;
1632
1633         if (!str)
1634                 return 0;
1635         ret = kstrtoul(str, 0, &threshold);
1636         if (ret < 0)
1637                 return 0;
1638         tracing_thresh = threshold * 1000;
1639         return 1;
1640 }
1641 __setup("tracing_thresh=", set_tracing_thresh);
1642
1643 unsigned long nsecs_to_usecs(unsigned long nsecs)
1644 {
1645         return nsecs / 1000;
1646 }
1647
1648 /*
1649  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1650  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1651  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1652  * of strings in the order that the evals (enum) were defined.
1653  */
1654 #undef C
1655 #define C(a, b) b
1656
1657 /* These must match the bit positions in trace_iterator_flags */
1658 static const char *trace_options[] = {
1659         TRACE_FLAGS
1660         NULL
1661 };
1662
1663 static struct {
1664         u64 (*func)(void);
1665         const char *name;
1666         int in_ns;              /* is this clock in nanoseconds? */
1667 } trace_clocks[] = {
1668         { trace_clock_local,            "local",        1 },
1669         { trace_clock_global,           "global",       1 },
1670         { trace_clock_counter,          "counter",      0 },
1671         { trace_clock_jiffies,          "uptime",       0 },
1672         { trace_clock,                  "perf",         1 },
1673         { ktime_get_mono_fast_ns,       "mono",         1 },
1674         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1675         { ktime_get_boot_fast_ns,       "boot",         1 },
1676         { ktime_get_tai_fast_ns,        "tai",          1 },
1677         ARCH_TRACE_CLOCKS
1678 };
1679
1680 bool trace_clock_in_ns(struct trace_array *tr)
1681 {
1682         if (trace_clocks[tr->clock_id].in_ns)
1683                 return true;
1684
1685         return false;
1686 }
1687
1688 /*
1689  * trace_parser_get_init - gets the buffer for trace parser
1690  */
1691 int trace_parser_get_init(struct trace_parser *parser, int size)
1692 {
1693         memset(parser, 0, sizeof(*parser));
1694
1695         parser->buffer = kmalloc(size, GFP_KERNEL);
1696         if (!parser->buffer)
1697                 return 1;
1698
1699         parser->size = size;
1700         return 0;
1701 }
1702
1703 /*
1704  * trace_parser_put - frees the buffer for trace parser
1705  */
1706 void trace_parser_put(struct trace_parser *parser)
1707 {
1708         kfree(parser->buffer);
1709         parser->buffer = NULL;
1710 }
1711
1712 /*
1713  * trace_get_user - reads the user input string separated by  space
1714  * (matched by isspace(ch))
1715  *
1716  * For each string found the 'struct trace_parser' is updated,
1717  * and the function returns.
1718  *
1719  * Returns number of bytes read.
1720  *
1721  * See kernel/trace/trace.h for 'struct trace_parser' details.
1722  */
1723 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1724         size_t cnt, loff_t *ppos)
1725 {
1726         char ch;
1727         size_t read = 0;
1728         ssize_t ret;
1729
1730         if (!*ppos)
1731                 trace_parser_clear(parser);
1732
1733         ret = get_user(ch, ubuf++);
1734         if (ret)
1735                 goto out;
1736
1737         read++;
1738         cnt--;
1739
1740         /*
1741          * The parser is not finished with the last write,
1742          * continue reading the user input without skipping spaces.
1743          */
1744         if (!parser->cont) {
1745                 /* skip white space */
1746                 while (cnt && isspace(ch)) {
1747                         ret = get_user(ch, ubuf++);
1748                         if (ret)
1749                                 goto out;
1750                         read++;
1751                         cnt--;
1752                 }
1753
1754                 parser->idx = 0;
1755
1756                 /* only spaces were written */
1757                 if (isspace(ch) || !ch) {
1758                         *ppos += read;
1759                         ret = read;
1760                         goto out;
1761                 }
1762         }
1763
1764         /* read the non-space input */
1765         while (cnt && !isspace(ch) && ch) {
1766                 if (parser->idx < parser->size - 1)
1767                         parser->buffer[parser->idx++] = ch;
1768                 else {
1769                         ret = -EINVAL;
1770                         goto out;
1771                 }
1772                 ret = get_user(ch, ubuf++);
1773                 if (ret)
1774                         goto out;
1775                 read++;
1776                 cnt--;
1777         }
1778
1779         /* We either got finished input or we have to wait for another call. */
1780         if (isspace(ch) || !ch) {
1781                 parser->buffer[parser->idx] = 0;
1782                 parser->cont = false;
1783         } else if (parser->idx < parser->size - 1) {
1784                 parser->cont = true;
1785                 parser->buffer[parser->idx++] = ch;
1786                 /* Make sure the parsed string always terminates with '\0'. */
1787                 parser->buffer[parser->idx] = 0;
1788         } else {
1789                 ret = -EINVAL;
1790                 goto out;
1791         }
1792
1793         *ppos += read;
1794         ret = read;
1795
1796 out:
1797         return ret;
1798 }
1799
1800 /* TODO add a seq_buf_to_buffer() */
1801 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1802 {
1803         int len;
1804
1805         if (trace_seq_used(s) <= s->readpos)
1806                 return -EBUSY;
1807
1808         len = trace_seq_used(s) - s->readpos;
1809         if (cnt > len)
1810                 cnt = len;
1811         memcpy(buf, s->buffer + s->readpos, cnt);
1812
1813         s->readpos += cnt;
1814         return cnt;
1815 }
1816
1817 unsigned long __read_mostly     tracing_thresh;
1818
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820 static const struct file_operations tracing_max_lat_fops;
1821
1822 #ifdef LATENCY_FS_NOTIFY
1823
1824 static struct workqueue_struct *fsnotify_wq;
1825
1826 static void latency_fsnotify_workfn(struct work_struct *work)
1827 {
1828         struct trace_array *tr = container_of(work, struct trace_array,
1829                                               fsnotify_work);
1830         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1831 }
1832
1833 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1834 {
1835         struct trace_array *tr = container_of(iwork, struct trace_array,
1836                                               fsnotify_irqwork);
1837         queue_work(fsnotify_wq, &tr->fsnotify_work);
1838 }
1839
1840 static void trace_create_maxlat_file(struct trace_array *tr,
1841                                      struct dentry *d_tracer)
1842 {
1843         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1844         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1845         tr->d_max_latency = trace_create_file("tracing_max_latency",
1846                                               TRACE_MODE_WRITE,
1847                                               d_tracer, tr,
1848                                               &tracing_max_lat_fops);
1849 }
1850
1851 __init static int latency_fsnotify_init(void)
1852 {
1853         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1854                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1855         if (!fsnotify_wq) {
1856                 pr_err("Unable to allocate tr_max_lat_wq\n");
1857                 return -ENOMEM;
1858         }
1859         return 0;
1860 }
1861
1862 late_initcall_sync(latency_fsnotify_init);
1863
1864 void latency_fsnotify(struct trace_array *tr)
1865 {
1866         if (!fsnotify_wq)
1867                 return;
1868         /*
1869          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1870          * possible that we are called from __schedule() or do_idle(), which
1871          * could cause a deadlock.
1872          */
1873         irq_work_queue(&tr->fsnotify_irqwork);
1874 }
1875
1876 #else /* !LATENCY_FS_NOTIFY */
1877
1878 #define trace_create_maxlat_file(tr, d_tracer)                          \
1879         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1880                           d_tracer, tr, &tracing_max_lat_fops)
1881
1882 #endif
1883
1884 /*
1885  * Copy the new maximum trace into the separate maximum-trace
1886  * structure. (this way the maximum trace is permanently saved,
1887  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1888  */
1889 static void
1890 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1891 {
1892         struct array_buffer *trace_buf = &tr->array_buffer;
1893         struct array_buffer *max_buf = &tr->max_buffer;
1894         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1895         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1896
1897         max_buf->cpu = cpu;
1898         max_buf->time_start = data->preempt_timestamp;
1899
1900         max_data->saved_latency = tr->max_latency;
1901         max_data->critical_start = data->critical_start;
1902         max_data->critical_end = data->critical_end;
1903
1904         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1905         max_data->pid = tsk->pid;
1906         /*
1907          * If tsk == current, then use current_uid(), as that does not use
1908          * RCU. The irq tracer can be called out of RCU scope.
1909          */
1910         if (tsk == current)
1911                 max_data->uid = current_uid();
1912         else
1913                 max_data->uid = task_uid(tsk);
1914
1915         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1916         max_data->policy = tsk->policy;
1917         max_data->rt_priority = tsk->rt_priority;
1918
1919         /* record this tasks comm */
1920         tracing_record_cmdline(tsk);
1921         latency_fsnotify(tr);
1922 }
1923
1924 /**
1925  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1926  * @tr: tracer
1927  * @tsk: the task with the latency
1928  * @cpu: The cpu that initiated the trace.
1929  * @cond_data: User data associated with a conditional snapshot
1930  *
1931  * Flip the buffers between the @tr and the max_tr and record information
1932  * about which task was the cause of this latency.
1933  */
1934 void
1935 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1936               void *cond_data)
1937 {
1938         if (tr->stop_count)
1939                 return;
1940
1941         WARN_ON_ONCE(!irqs_disabled());
1942
1943         if (!tr->allocated_snapshot) {
1944                 /* Only the nop tracer should hit this when disabling */
1945                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1946                 return;
1947         }
1948
1949         arch_spin_lock(&tr->max_lock);
1950
1951         /* Inherit the recordable setting from array_buffer */
1952         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1953                 ring_buffer_record_on(tr->max_buffer.buffer);
1954         else
1955                 ring_buffer_record_off(tr->max_buffer.buffer);
1956
1957 #ifdef CONFIG_TRACER_SNAPSHOT
1958         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1959                 arch_spin_unlock(&tr->max_lock);
1960                 return;
1961         }
1962 #endif
1963         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1964
1965         __update_max_tr(tr, tsk, cpu);
1966
1967         arch_spin_unlock(&tr->max_lock);
1968
1969         /* Any waiters on the old snapshot buffer need to wake up */
1970         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1971 }
1972
1973 /**
1974  * update_max_tr_single - only copy one trace over, and reset the rest
1975  * @tr: tracer
1976  * @tsk: task with the latency
1977  * @cpu: the cpu of the buffer to copy.
1978  *
1979  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1980  */
1981 void
1982 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1983 {
1984         int ret;
1985
1986         if (tr->stop_count)
1987                 return;
1988
1989         WARN_ON_ONCE(!irqs_disabled());
1990         if (!tr->allocated_snapshot) {
1991                 /* Only the nop tracer should hit this when disabling */
1992                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1993                 return;
1994         }
1995
1996         arch_spin_lock(&tr->max_lock);
1997
1998         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1999
2000         if (ret == -EBUSY) {
2001                 /*
2002                  * We failed to swap the buffer due to a commit taking
2003                  * place on this CPU. We fail to record, but we reset
2004                  * the max trace buffer (no one writes directly to it)
2005                  * and flag that it failed.
2006                  * Another reason is resize is in progress.
2007                  */
2008                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2009                         "Failed to swap buffers due to commit or resize in progress\n");
2010         }
2011
2012         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2013
2014         __update_max_tr(tr, tsk, cpu);
2015         arch_spin_unlock(&tr->max_lock);
2016 }
2017
2018 #endif /* CONFIG_TRACER_MAX_TRACE */
2019
2020 struct pipe_wait {
2021         struct trace_iterator           *iter;
2022         int                             wait_index;
2023 };
2024
2025 static bool wait_pipe_cond(void *data)
2026 {
2027         struct pipe_wait *pwait = data;
2028         struct trace_iterator *iter = pwait->iter;
2029
2030         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2031                 return true;
2032
2033         return iter->closed;
2034 }
2035
2036 static int wait_on_pipe(struct trace_iterator *iter, int full)
2037 {
2038         struct pipe_wait pwait;
2039         int ret;
2040
2041         /* Iterators are static, they should be filled or empty */
2042         if (trace_buffer_iter(iter, iter->cpu_file))
2043                 return 0;
2044
2045         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2046         pwait.iter = iter;
2047
2048         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2049                                wait_pipe_cond, &pwait);
2050
2051 #ifdef CONFIG_TRACER_MAX_TRACE
2052         /*
2053          * Make sure this is still the snapshot buffer, as if a snapshot were
2054          * to happen, this would now be the main buffer.
2055          */
2056         if (iter->snapshot)
2057                 iter->array_buffer = &iter->tr->max_buffer;
2058 #endif
2059         return ret;
2060 }
2061
2062 #ifdef CONFIG_FTRACE_STARTUP_TEST
2063 static bool selftests_can_run;
2064
2065 struct trace_selftests {
2066         struct list_head                list;
2067         struct tracer                   *type;
2068 };
2069
2070 static LIST_HEAD(postponed_selftests);
2071
2072 static int save_selftest(struct tracer *type)
2073 {
2074         struct trace_selftests *selftest;
2075
2076         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2077         if (!selftest)
2078                 return -ENOMEM;
2079
2080         selftest->type = type;
2081         list_add(&selftest->list, &postponed_selftests);
2082         return 0;
2083 }
2084
2085 static int run_tracer_selftest(struct tracer *type)
2086 {
2087         struct trace_array *tr = &global_trace;
2088         struct tracer *saved_tracer = tr->current_trace;
2089         int ret;
2090
2091         if (!type->selftest || tracing_selftest_disabled)
2092                 return 0;
2093
2094         /*
2095          * If a tracer registers early in boot up (before scheduling is
2096          * initialized and such), then do not run its selftests yet.
2097          * Instead, run it a little later in the boot process.
2098          */
2099         if (!selftests_can_run)
2100                 return save_selftest(type);
2101
2102         if (!tracing_is_on()) {
2103                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2104                         type->name);
2105                 return 0;
2106         }
2107
2108         /*
2109          * Run a selftest on this tracer.
2110          * Here we reset the trace buffer, and set the current
2111          * tracer to be this tracer. The tracer can then run some
2112          * internal tracing to verify that everything is in order.
2113          * If we fail, we do not register this tracer.
2114          */
2115         tracing_reset_online_cpus(&tr->array_buffer);
2116
2117         tr->current_trace = type;
2118
2119 #ifdef CONFIG_TRACER_MAX_TRACE
2120         if (type->use_max_tr) {
2121                 /* If we expanded the buffers, make sure the max is expanded too */
2122                 if (tr->ring_buffer_expanded)
2123                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2124                                            RING_BUFFER_ALL_CPUS);
2125                 tr->allocated_snapshot = true;
2126         }
2127 #endif
2128
2129         /* the test is responsible for initializing and enabling */
2130         pr_info("Testing tracer %s: ", type->name);
2131         ret = type->selftest(type, tr);
2132         /* the test is responsible for resetting too */
2133         tr->current_trace = saved_tracer;
2134         if (ret) {
2135                 printk(KERN_CONT "FAILED!\n");
2136                 /* Add the warning after printing 'FAILED' */
2137                 WARN_ON(1);
2138                 return -1;
2139         }
2140         /* Only reset on passing, to avoid touching corrupted buffers */
2141         tracing_reset_online_cpus(&tr->array_buffer);
2142
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144         if (type->use_max_tr) {
2145                 tr->allocated_snapshot = false;
2146
2147                 /* Shrink the max buffer again */
2148                 if (tr->ring_buffer_expanded)
2149                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2150                                            RING_BUFFER_ALL_CPUS);
2151         }
2152 #endif
2153
2154         printk(KERN_CONT "PASSED\n");
2155         return 0;
2156 }
2157
2158 static int do_run_tracer_selftest(struct tracer *type)
2159 {
2160         int ret;
2161
2162         /*
2163          * Tests can take a long time, especially if they are run one after the
2164          * other, as does happen during bootup when all the tracers are
2165          * registered. This could cause the soft lockup watchdog to trigger.
2166          */
2167         cond_resched();
2168
2169         tracing_selftest_running = true;
2170         ret = run_tracer_selftest(type);
2171         tracing_selftest_running = false;
2172
2173         return ret;
2174 }
2175
2176 static __init int init_trace_selftests(void)
2177 {
2178         struct trace_selftests *p, *n;
2179         struct tracer *t, **last;
2180         int ret;
2181
2182         selftests_can_run = true;
2183
2184         mutex_lock(&trace_types_lock);
2185
2186         if (list_empty(&postponed_selftests))
2187                 goto out;
2188
2189         pr_info("Running postponed tracer tests:\n");
2190
2191         tracing_selftest_running = true;
2192         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2193                 /* This loop can take minutes when sanitizers are enabled, so
2194                  * lets make sure we allow RCU processing.
2195                  */
2196                 cond_resched();
2197                 ret = run_tracer_selftest(p->type);
2198                 /* If the test fails, then warn and remove from available_tracers */
2199                 if (ret < 0) {
2200                         WARN(1, "tracer: %s failed selftest, disabling\n",
2201                              p->type->name);
2202                         last = &trace_types;
2203                         for (t = trace_types; t; t = t->next) {
2204                                 if (t == p->type) {
2205                                         *last = t->next;
2206                                         break;
2207                                 }
2208                                 last = &t->next;
2209                         }
2210                 }
2211                 list_del(&p->list);
2212                 kfree(p);
2213         }
2214         tracing_selftest_running = false;
2215
2216  out:
2217         mutex_unlock(&trace_types_lock);
2218
2219         return 0;
2220 }
2221 core_initcall(init_trace_selftests);
2222 #else
2223 static inline int run_tracer_selftest(struct tracer *type)
2224 {
2225         return 0;
2226 }
2227 static inline int do_run_tracer_selftest(struct tracer *type)
2228 {
2229         return 0;
2230 }
2231 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2232
2233 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2234
2235 static void __init apply_trace_boot_options(void);
2236
2237 /**
2238  * register_tracer - register a tracer with the ftrace system.
2239  * @type: the plugin for the tracer
2240  *
2241  * Register a new plugin tracer.
2242  */
2243 int __init register_tracer(struct tracer *type)
2244 {
2245         struct tracer *t;
2246         int ret = 0;
2247
2248         if (!type->name) {
2249                 pr_info("Tracer must have a name\n");
2250                 return -1;
2251         }
2252
2253         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2254                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2255                 return -1;
2256         }
2257
2258         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2259                 pr_warn("Can not register tracer %s due to lockdown\n",
2260                            type->name);
2261                 return -EPERM;
2262         }
2263
2264         mutex_lock(&trace_types_lock);
2265
2266         for (t = trace_types; t; t = t->next) {
2267                 if (strcmp(type->name, t->name) == 0) {
2268                         /* already found */
2269                         pr_info("Tracer %s already registered\n",
2270                                 type->name);
2271                         ret = -1;
2272                         goto out;
2273                 }
2274         }
2275
2276         if (!type->set_flag)
2277                 type->set_flag = &dummy_set_flag;
2278         if (!type->flags) {
2279                 /*allocate a dummy tracer_flags*/
2280                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2281                 if (!type->flags) {
2282                         ret = -ENOMEM;
2283                         goto out;
2284                 }
2285                 type->flags->val = 0;
2286                 type->flags->opts = dummy_tracer_opt;
2287         } else
2288                 if (!type->flags->opts)
2289                         type->flags->opts = dummy_tracer_opt;
2290
2291         /* store the tracer for __set_tracer_option */
2292         type->flags->trace = type;
2293
2294         ret = do_run_tracer_selftest(type);
2295         if (ret < 0)
2296                 goto out;
2297
2298         type->next = trace_types;
2299         trace_types = type;
2300         add_tracer_options(&global_trace, type);
2301
2302  out:
2303         mutex_unlock(&trace_types_lock);
2304
2305         if (ret || !default_bootup_tracer)
2306                 goto out_unlock;
2307
2308         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2309                 goto out_unlock;
2310
2311         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2312         /* Do we want this tracer to start on bootup? */
2313         tracing_set_tracer(&global_trace, type->name);
2314         default_bootup_tracer = NULL;
2315
2316         apply_trace_boot_options();
2317
2318         /* disable other selftests, since this will break it. */
2319         disable_tracing_selftest("running a tracer");
2320
2321  out_unlock:
2322         return ret;
2323 }
2324
2325 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2326 {
2327         struct trace_buffer *buffer = buf->buffer;
2328
2329         if (!buffer)
2330                 return;
2331
2332         ring_buffer_record_disable(buffer);
2333
2334         /* Make sure all commits have finished */
2335         synchronize_rcu();
2336         ring_buffer_reset_cpu(buffer, cpu);
2337
2338         ring_buffer_record_enable(buffer);
2339 }
2340
2341 void tracing_reset_online_cpus(struct array_buffer *buf)
2342 {
2343         struct trace_buffer *buffer = buf->buffer;
2344
2345         if (!buffer)
2346                 return;
2347
2348         ring_buffer_record_disable(buffer);
2349
2350         /* Make sure all commits have finished */
2351         synchronize_rcu();
2352
2353         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2354
2355         ring_buffer_reset_online_cpus(buffer);
2356
2357         ring_buffer_record_enable(buffer);
2358 }
2359
2360 /* Must have trace_types_lock held */
2361 void tracing_reset_all_online_cpus_unlocked(void)
2362 {
2363         struct trace_array *tr;
2364
2365         lockdep_assert_held(&trace_types_lock);
2366
2367         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2368                 if (!tr->clear_trace)
2369                         continue;
2370                 tr->clear_trace = false;
2371                 tracing_reset_online_cpus(&tr->array_buffer);
2372 #ifdef CONFIG_TRACER_MAX_TRACE
2373                 tracing_reset_online_cpus(&tr->max_buffer);
2374 #endif
2375         }
2376 }
2377
2378 void tracing_reset_all_online_cpus(void)
2379 {
2380         mutex_lock(&trace_types_lock);
2381         tracing_reset_all_online_cpus_unlocked();
2382         mutex_unlock(&trace_types_lock);
2383 }
2384
2385 int is_tracing_stopped(void)
2386 {
2387         return global_trace.stop_count;
2388 }
2389
2390 static void tracing_start_tr(struct trace_array *tr)
2391 {
2392         struct trace_buffer *buffer;
2393         unsigned long flags;
2394
2395         if (tracing_disabled)
2396                 return;
2397
2398         raw_spin_lock_irqsave(&tr->start_lock, flags);
2399         if (--tr->stop_count) {
2400                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2401                         /* Someone screwed up their debugging */
2402                         tr->stop_count = 0;
2403                 }
2404                 goto out;
2405         }
2406
2407         /* Prevent the buffers from switching */
2408         arch_spin_lock(&tr->max_lock);
2409
2410         buffer = tr->array_buffer.buffer;
2411         if (buffer)
2412                 ring_buffer_record_enable(buffer);
2413
2414 #ifdef CONFIG_TRACER_MAX_TRACE
2415         buffer = tr->max_buffer.buffer;
2416         if (buffer)
2417                 ring_buffer_record_enable(buffer);
2418 #endif
2419
2420         arch_spin_unlock(&tr->max_lock);
2421
2422  out:
2423         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2424 }
2425
2426 /**
2427  * tracing_start - quick start of the tracer
2428  *
2429  * If tracing is enabled but was stopped by tracing_stop,
2430  * this will start the tracer back up.
2431  */
2432 void tracing_start(void)
2433
2434 {
2435         return tracing_start_tr(&global_trace);
2436 }
2437
2438 static void tracing_stop_tr(struct trace_array *tr)
2439 {
2440         struct trace_buffer *buffer;
2441         unsigned long flags;
2442
2443         raw_spin_lock_irqsave(&tr->start_lock, flags);
2444         if (tr->stop_count++)
2445                 goto out;
2446
2447         /* Prevent the buffers from switching */
2448         arch_spin_lock(&tr->max_lock);
2449
2450         buffer = tr->array_buffer.buffer;
2451         if (buffer)
2452                 ring_buffer_record_disable(buffer);
2453
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455         buffer = tr->max_buffer.buffer;
2456         if (buffer)
2457                 ring_buffer_record_disable(buffer);
2458 #endif
2459
2460         arch_spin_unlock(&tr->max_lock);
2461
2462  out:
2463         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2464 }
2465
2466 /**
2467  * tracing_stop - quick stop of the tracer
2468  *
2469  * Light weight way to stop tracing. Use in conjunction with
2470  * tracing_start.
2471  */
2472 void tracing_stop(void)
2473 {
2474         return tracing_stop_tr(&global_trace);
2475 }
2476
2477 /*
2478  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2479  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2480  * simplifies those functions and keeps them in sync.
2481  */
2482 enum print_line_t trace_handle_return(struct trace_seq *s)
2483 {
2484         return trace_seq_has_overflowed(s) ?
2485                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2486 }
2487 EXPORT_SYMBOL_GPL(trace_handle_return);
2488
2489 static unsigned short migration_disable_value(void)
2490 {
2491 #if defined(CONFIG_SMP)
2492         return current->migration_disabled;
2493 #else
2494         return 0;
2495 #endif
2496 }
2497
2498 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2499 {
2500         unsigned int trace_flags = irqs_status;
2501         unsigned int pc;
2502
2503         pc = preempt_count();
2504
2505         if (pc & NMI_MASK)
2506                 trace_flags |= TRACE_FLAG_NMI;
2507         if (pc & HARDIRQ_MASK)
2508                 trace_flags |= TRACE_FLAG_HARDIRQ;
2509         if (in_serving_softirq())
2510                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2511         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2512                 trace_flags |= TRACE_FLAG_BH_OFF;
2513
2514         if (tif_need_resched())
2515                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2516         if (test_preempt_need_resched())
2517                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2518         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2519                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2520 }
2521
2522 struct ring_buffer_event *
2523 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2524                           int type,
2525                           unsigned long len,
2526                           unsigned int trace_ctx)
2527 {
2528         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2529 }
2530
2531 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2532 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2533 static int trace_buffered_event_ref;
2534
2535 /**
2536  * trace_buffered_event_enable - enable buffering events
2537  *
2538  * When events are being filtered, it is quicker to use a temporary
2539  * buffer to write the event data into if there's a likely chance
2540  * that it will not be committed. The discard of the ring buffer
2541  * is not as fast as committing, and is much slower than copying
2542  * a commit.
2543  *
2544  * When an event is to be filtered, allocate per cpu buffers to
2545  * write the event data into, and if the event is filtered and discarded
2546  * it is simply dropped, otherwise, the entire data is to be committed
2547  * in one shot.
2548  */
2549 void trace_buffered_event_enable(void)
2550 {
2551         struct ring_buffer_event *event;
2552         struct page *page;
2553         int cpu;
2554
2555         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2556
2557         if (trace_buffered_event_ref++)
2558                 return;
2559
2560         for_each_tracing_cpu(cpu) {
2561                 page = alloc_pages_node(cpu_to_node(cpu),
2562                                         GFP_KERNEL | __GFP_NORETRY, 0);
2563                 /* This is just an optimization and can handle failures */
2564                 if (!page) {
2565                         pr_err("Failed to allocate event buffer\n");
2566                         break;
2567                 }
2568
2569                 event = page_address(page);
2570                 memset(event, 0, sizeof(*event));
2571
2572                 per_cpu(trace_buffered_event, cpu) = event;
2573
2574                 preempt_disable();
2575                 if (cpu == smp_processor_id() &&
2576                     __this_cpu_read(trace_buffered_event) !=
2577                     per_cpu(trace_buffered_event, cpu))
2578                         WARN_ON_ONCE(1);
2579                 preempt_enable();
2580         }
2581 }
2582
2583 static void enable_trace_buffered_event(void *data)
2584 {
2585         /* Probably not needed, but do it anyway */
2586         smp_rmb();
2587         this_cpu_dec(trace_buffered_event_cnt);
2588 }
2589
2590 static void disable_trace_buffered_event(void *data)
2591 {
2592         this_cpu_inc(trace_buffered_event_cnt);
2593 }
2594
2595 /**
2596  * trace_buffered_event_disable - disable buffering events
2597  *
2598  * When a filter is removed, it is faster to not use the buffered
2599  * events, and to commit directly into the ring buffer. Free up
2600  * the temp buffers when there are no more users. This requires
2601  * special synchronization with current events.
2602  */
2603 void trace_buffered_event_disable(void)
2604 {
2605         int cpu;
2606
2607         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2608
2609         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2610                 return;
2611
2612         if (--trace_buffered_event_ref)
2613                 return;
2614
2615         /* For each CPU, set the buffer as used. */
2616         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2617                          NULL, true);
2618
2619         /* Wait for all current users to finish */
2620         synchronize_rcu();
2621
2622         for_each_tracing_cpu(cpu) {
2623                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2624                 per_cpu(trace_buffered_event, cpu) = NULL;
2625         }
2626
2627         /*
2628          * Wait for all CPUs that potentially started checking if they can use
2629          * their event buffer only after the previous synchronize_rcu() call and
2630          * they still read a valid pointer from trace_buffered_event. It must be
2631          * ensured they don't see cleared trace_buffered_event_cnt else they
2632          * could wrongly decide to use the pointed-to buffer which is now freed.
2633          */
2634         synchronize_rcu();
2635
2636         /* For each CPU, relinquish the buffer */
2637         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2638                          true);
2639 }
2640
2641 static struct trace_buffer *temp_buffer;
2642
2643 struct ring_buffer_event *
2644 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2645                           struct trace_event_file *trace_file,
2646                           int type, unsigned long len,
2647                           unsigned int trace_ctx)
2648 {
2649         struct ring_buffer_event *entry;
2650         struct trace_array *tr = trace_file->tr;
2651         int val;
2652
2653         *current_rb = tr->array_buffer.buffer;
2654
2655         if (!tr->no_filter_buffering_ref &&
2656             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2657                 preempt_disable_notrace();
2658                 /*
2659                  * Filtering is on, so try to use the per cpu buffer first.
2660                  * This buffer will simulate a ring_buffer_event,
2661                  * where the type_len is zero and the array[0] will
2662                  * hold the full length.
2663                  * (see include/linux/ring-buffer.h for details on
2664                  *  how the ring_buffer_event is structured).
2665                  *
2666                  * Using a temp buffer during filtering and copying it
2667                  * on a matched filter is quicker than writing directly
2668                  * into the ring buffer and then discarding it when
2669                  * it doesn't match. That is because the discard
2670                  * requires several atomic operations to get right.
2671                  * Copying on match and doing nothing on a failed match
2672                  * is still quicker than no copy on match, but having
2673                  * to discard out of the ring buffer on a failed match.
2674                  */
2675                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2676                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2677
2678                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2679
2680                         /*
2681                          * Preemption is disabled, but interrupts and NMIs
2682                          * can still come in now. If that happens after
2683                          * the above increment, then it will have to go
2684                          * back to the old method of allocating the event
2685                          * on the ring buffer, and if the filter fails, it
2686                          * will have to call ring_buffer_discard_commit()
2687                          * to remove it.
2688                          *
2689                          * Need to also check the unlikely case that the
2690                          * length is bigger than the temp buffer size.
2691                          * If that happens, then the reserve is pretty much
2692                          * guaranteed to fail, as the ring buffer currently
2693                          * only allows events less than a page. But that may
2694                          * change in the future, so let the ring buffer reserve
2695                          * handle the failure in that case.
2696                          */
2697                         if (val == 1 && likely(len <= max_len)) {
2698                                 trace_event_setup(entry, type, trace_ctx);
2699                                 entry->array[0] = len;
2700                                 /* Return with preemption disabled */
2701                                 return entry;
2702                         }
2703                         this_cpu_dec(trace_buffered_event_cnt);
2704                 }
2705                 /* __trace_buffer_lock_reserve() disables preemption */
2706                 preempt_enable_notrace();
2707         }
2708
2709         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2710                                             trace_ctx);
2711         /*
2712          * If tracing is off, but we have triggers enabled
2713          * we still need to look at the event data. Use the temp_buffer
2714          * to store the trace event for the trigger to use. It's recursive
2715          * safe and will not be recorded anywhere.
2716          */
2717         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2718                 *current_rb = temp_buffer;
2719                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2720                                                     trace_ctx);
2721         }
2722         return entry;
2723 }
2724 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2725
2726 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2727 static DEFINE_MUTEX(tracepoint_printk_mutex);
2728
2729 static void output_printk(struct trace_event_buffer *fbuffer)
2730 {
2731         struct trace_event_call *event_call;
2732         struct trace_event_file *file;
2733         struct trace_event *event;
2734         unsigned long flags;
2735         struct trace_iterator *iter = tracepoint_print_iter;
2736
2737         /* We should never get here if iter is NULL */
2738         if (WARN_ON_ONCE(!iter))
2739                 return;
2740
2741         event_call = fbuffer->trace_file->event_call;
2742         if (!event_call || !event_call->event.funcs ||
2743             !event_call->event.funcs->trace)
2744                 return;
2745
2746         file = fbuffer->trace_file;
2747         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2748             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2749              !filter_match_preds(file->filter, fbuffer->entry)))
2750                 return;
2751
2752         event = &fbuffer->trace_file->event_call->event;
2753
2754         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2755         trace_seq_init(&iter->seq);
2756         iter->ent = fbuffer->entry;
2757         event_call->event.funcs->trace(iter, 0, event);
2758         trace_seq_putc(&iter->seq, 0);
2759         printk("%s", iter->seq.buffer);
2760
2761         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2762 }
2763
2764 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2765                              void *buffer, size_t *lenp,
2766                              loff_t *ppos)
2767 {
2768         int save_tracepoint_printk;
2769         int ret;
2770
2771         mutex_lock(&tracepoint_printk_mutex);
2772         save_tracepoint_printk = tracepoint_printk;
2773
2774         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2775
2776         /*
2777          * This will force exiting early, as tracepoint_printk
2778          * is always zero when tracepoint_printk_iter is not allocated
2779          */
2780         if (!tracepoint_print_iter)
2781                 tracepoint_printk = 0;
2782
2783         if (save_tracepoint_printk == tracepoint_printk)
2784                 goto out;
2785
2786         if (tracepoint_printk)
2787                 static_key_enable(&tracepoint_printk_key.key);
2788         else
2789                 static_key_disable(&tracepoint_printk_key.key);
2790
2791  out:
2792         mutex_unlock(&tracepoint_printk_mutex);
2793
2794         return ret;
2795 }
2796
2797 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2798 {
2799         enum event_trigger_type tt = ETT_NONE;
2800         struct trace_event_file *file = fbuffer->trace_file;
2801
2802         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2803                         fbuffer->entry, &tt))
2804                 goto discard;
2805
2806         if (static_key_false(&tracepoint_printk_key.key))
2807                 output_printk(fbuffer);
2808
2809         if (static_branch_unlikely(&trace_event_exports_enabled))
2810                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2811
2812         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2813                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2814
2815 discard:
2816         if (tt)
2817                 event_triggers_post_call(file, tt);
2818
2819 }
2820 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2821
2822 /*
2823  * Skip 3:
2824  *
2825  *   trace_buffer_unlock_commit_regs()
2826  *   trace_event_buffer_commit()
2827  *   trace_event_raw_event_xxx()
2828  */
2829 # define STACK_SKIP 3
2830
2831 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2832                                      struct trace_buffer *buffer,
2833                                      struct ring_buffer_event *event,
2834                                      unsigned int trace_ctx,
2835                                      struct pt_regs *regs)
2836 {
2837         __buffer_unlock_commit(buffer, event);
2838
2839         /*
2840          * If regs is not set, then skip the necessary functions.
2841          * Note, we can still get here via blktrace, wakeup tracer
2842          * and mmiotrace, but that's ok if they lose a function or
2843          * two. They are not that meaningful.
2844          */
2845         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2846         ftrace_trace_userstack(tr, buffer, trace_ctx);
2847 }
2848
2849 /*
2850  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2851  */
2852 void
2853 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2854                                    struct ring_buffer_event *event)
2855 {
2856         __buffer_unlock_commit(buffer, event);
2857 }
2858
2859 void
2860 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2861                parent_ip, unsigned int trace_ctx)
2862 {
2863         struct trace_event_call *call = &event_function;
2864         struct trace_buffer *buffer = tr->array_buffer.buffer;
2865         struct ring_buffer_event *event;
2866         struct ftrace_entry *entry;
2867
2868         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2869                                             trace_ctx);
2870         if (!event)
2871                 return;
2872         entry   = ring_buffer_event_data(event);
2873         entry->ip                       = ip;
2874         entry->parent_ip                = parent_ip;
2875
2876         if (!call_filter_check_discard(call, entry, buffer, event)) {
2877                 if (static_branch_unlikely(&trace_function_exports_enabled))
2878                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2879                 __buffer_unlock_commit(buffer, event);
2880         }
2881 }
2882
2883 #ifdef CONFIG_STACKTRACE
2884
2885 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2886 #define FTRACE_KSTACK_NESTING   4
2887
2888 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2889
2890 struct ftrace_stack {
2891         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2892 };
2893
2894
2895 struct ftrace_stacks {
2896         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2897 };
2898
2899 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2900 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2901
2902 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2903                                  unsigned int trace_ctx,
2904                                  int skip, struct pt_regs *regs)
2905 {
2906         struct trace_event_call *call = &event_kernel_stack;
2907         struct ring_buffer_event *event;
2908         unsigned int size, nr_entries;
2909         struct ftrace_stack *fstack;
2910         struct stack_entry *entry;
2911         int stackidx;
2912
2913         /*
2914          * Add one, for this function and the call to save_stack_trace()
2915          * If regs is set, then these functions will not be in the way.
2916          */
2917 #ifndef CONFIG_UNWINDER_ORC
2918         if (!regs)
2919                 skip++;
2920 #endif
2921
2922         preempt_disable_notrace();
2923
2924         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2925
2926         /* This should never happen. If it does, yell once and skip */
2927         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2928                 goto out;
2929
2930         /*
2931          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2932          * interrupt will either see the value pre increment or post
2933          * increment. If the interrupt happens pre increment it will have
2934          * restored the counter when it returns.  We just need a barrier to
2935          * keep gcc from moving things around.
2936          */
2937         barrier();
2938
2939         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2940         size = ARRAY_SIZE(fstack->calls);
2941
2942         if (regs) {
2943                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2944                                                    size, skip);
2945         } else {
2946                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2947         }
2948
2949         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2950                                     struct_size(entry, caller, nr_entries),
2951                                     trace_ctx);
2952         if (!event)
2953                 goto out;
2954         entry = ring_buffer_event_data(event);
2955
2956         entry->size = nr_entries;
2957         memcpy(&entry->caller, fstack->calls,
2958                flex_array_size(entry, caller, nr_entries));
2959
2960         if (!call_filter_check_discard(call, entry, buffer, event))
2961                 __buffer_unlock_commit(buffer, event);
2962
2963  out:
2964         /* Again, don't let gcc optimize things here */
2965         barrier();
2966         __this_cpu_dec(ftrace_stack_reserve);
2967         preempt_enable_notrace();
2968
2969 }
2970
2971 static inline void ftrace_trace_stack(struct trace_array *tr,
2972                                       struct trace_buffer *buffer,
2973                                       unsigned int trace_ctx,
2974                                       int skip, struct pt_regs *regs)
2975 {
2976         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2977                 return;
2978
2979         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
2980 }
2981
2982 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2983                    int skip)
2984 {
2985         struct trace_buffer *buffer = tr->array_buffer.buffer;
2986
2987         if (rcu_is_watching()) {
2988                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2989                 return;
2990         }
2991
2992         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2993                 return;
2994
2995         /*
2996          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2997          * but if the above rcu_is_watching() failed, then the NMI
2998          * triggered someplace critical, and ct_irq_enter() should
2999          * not be called from NMI.
3000          */
3001         if (unlikely(in_nmi()))
3002                 return;
3003
3004         ct_irq_enter_irqson();
3005         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3006         ct_irq_exit_irqson();
3007 }
3008
3009 /**
3010  * trace_dump_stack - record a stack back trace in the trace buffer
3011  * @skip: Number of functions to skip (helper handlers)
3012  */
3013 void trace_dump_stack(int skip)
3014 {
3015         if (tracing_disabled || tracing_selftest_running)
3016                 return;
3017
3018 #ifndef CONFIG_UNWINDER_ORC
3019         /* Skip 1 to skip this function. */
3020         skip++;
3021 #endif
3022         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3023                              tracing_gen_ctx(), skip, NULL);
3024 }
3025 EXPORT_SYMBOL_GPL(trace_dump_stack);
3026
3027 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3028 static DEFINE_PER_CPU(int, user_stack_count);
3029
3030 static void
3031 ftrace_trace_userstack(struct trace_array *tr,
3032                        struct trace_buffer *buffer, unsigned int trace_ctx)
3033 {
3034         struct trace_event_call *call = &event_user_stack;
3035         struct ring_buffer_event *event;
3036         struct userstack_entry *entry;
3037
3038         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3039                 return;
3040
3041         /*
3042          * NMIs can not handle page faults, even with fix ups.
3043          * The save user stack can (and often does) fault.
3044          */
3045         if (unlikely(in_nmi()))
3046                 return;
3047
3048         /*
3049          * prevent recursion, since the user stack tracing may
3050          * trigger other kernel events.
3051          */
3052         preempt_disable();
3053         if (__this_cpu_read(user_stack_count))
3054                 goto out;
3055
3056         __this_cpu_inc(user_stack_count);
3057
3058         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3059                                             sizeof(*entry), trace_ctx);
3060         if (!event)
3061                 goto out_drop_count;
3062         entry   = ring_buffer_event_data(event);
3063
3064         entry->tgid             = current->tgid;
3065         memset(&entry->caller, 0, sizeof(entry->caller));
3066
3067         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3068         if (!call_filter_check_discard(call, entry, buffer, event))
3069                 __buffer_unlock_commit(buffer, event);
3070
3071  out_drop_count:
3072         __this_cpu_dec(user_stack_count);
3073  out:
3074         preempt_enable();
3075 }
3076 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3077 static void ftrace_trace_userstack(struct trace_array *tr,
3078                                    struct trace_buffer *buffer,
3079                                    unsigned int trace_ctx)
3080 {
3081 }
3082 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3083
3084 #endif /* CONFIG_STACKTRACE */
3085
3086 static inline void
3087 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3088                           unsigned long long delta)
3089 {
3090         entry->bottom_delta_ts = delta & U32_MAX;
3091         entry->top_delta_ts = (delta >> 32);
3092 }
3093
3094 void trace_last_func_repeats(struct trace_array *tr,
3095                              struct trace_func_repeats *last_info,
3096                              unsigned int trace_ctx)
3097 {
3098         struct trace_buffer *buffer = tr->array_buffer.buffer;
3099         struct func_repeats_entry *entry;
3100         struct ring_buffer_event *event;
3101         u64 delta;
3102
3103         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3104                                             sizeof(*entry), trace_ctx);
3105         if (!event)
3106                 return;
3107
3108         delta = ring_buffer_event_time_stamp(buffer, event) -
3109                 last_info->ts_last_call;
3110
3111         entry = ring_buffer_event_data(event);
3112         entry->ip = last_info->ip;
3113         entry->parent_ip = last_info->parent_ip;
3114         entry->count = last_info->count;
3115         func_repeats_set_delta_ts(entry, delta);
3116
3117         __buffer_unlock_commit(buffer, event);
3118 }
3119
3120 /* created for use with alloc_percpu */
3121 struct trace_buffer_struct {
3122         int nesting;
3123         char buffer[4][TRACE_BUF_SIZE];
3124 };
3125
3126 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3127
3128 /*
3129  * This allows for lockless recording.  If we're nested too deeply, then
3130  * this returns NULL.
3131  */
3132 static char *get_trace_buf(void)
3133 {
3134         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3135
3136         if (!trace_percpu_buffer || buffer->nesting >= 4)
3137                 return NULL;
3138
3139         buffer->nesting++;
3140
3141         /* Interrupts must see nesting incremented before we use the buffer */
3142         barrier();
3143         return &buffer->buffer[buffer->nesting - 1][0];
3144 }
3145
3146 static void put_trace_buf(void)
3147 {
3148         /* Don't let the decrement of nesting leak before this */
3149         barrier();
3150         this_cpu_dec(trace_percpu_buffer->nesting);
3151 }
3152
3153 static int alloc_percpu_trace_buffer(void)
3154 {
3155         struct trace_buffer_struct __percpu *buffers;
3156
3157         if (trace_percpu_buffer)
3158                 return 0;
3159
3160         buffers = alloc_percpu(struct trace_buffer_struct);
3161         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3162                 return -ENOMEM;
3163
3164         trace_percpu_buffer = buffers;
3165         return 0;
3166 }
3167
3168 static int buffers_allocated;
3169
3170 void trace_printk_init_buffers(void)
3171 {
3172         if (buffers_allocated)
3173                 return;
3174
3175         if (alloc_percpu_trace_buffer())
3176                 return;
3177
3178         /* trace_printk() is for debug use only. Don't use it in production. */
3179
3180         pr_warn("\n");
3181         pr_warn("**********************************************************\n");
3182         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3183         pr_warn("**                                                      **\n");
3184         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3185         pr_warn("**                                                      **\n");
3186         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3187         pr_warn("** unsafe for production use.                           **\n");
3188         pr_warn("**                                                      **\n");
3189         pr_warn("** If you see this message and you are not debugging    **\n");
3190         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3191         pr_warn("**                                                      **\n");
3192         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3193         pr_warn("**********************************************************\n");
3194
3195         /* Expand the buffers to set size */
3196         tracing_update_buffers(&global_trace);
3197
3198         buffers_allocated = 1;
3199
3200         /*
3201          * trace_printk_init_buffers() can be called by modules.
3202          * If that happens, then we need to start cmdline recording
3203          * directly here. If the global_trace.buffer is already
3204          * allocated here, then this was called by module code.
3205          */
3206         if (global_trace.array_buffer.buffer)
3207                 tracing_start_cmdline_record();
3208 }
3209 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3210
3211 void trace_printk_start_comm(void)
3212 {
3213         /* Start tracing comms if trace printk is set */
3214         if (!buffers_allocated)
3215                 return;
3216         tracing_start_cmdline_record();
3217 }
3218
3219 static void trace_printk_start_stop_comm(int enabled)
3220 {
3221         if (!buffers_allocated)
3222                 return;
3223
3224         if (enabled)
3225                 tracing_start_cmdline_record();
3226         else
3227                 tracing_stop_cmdline_record();
3228 }
3229
3230 /**
3231  * trace_vbprintk - write binary msg to tracing buffer
3232  * @ip:    The address of the caller
3233  * @fmt:   The string format to write to the buffer
3234  * @args:  Arguments for @fmt
3235  */
3236 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3237 {
3238         struct trace_event_call *call = &event_bprint;
3239         struct ring_buffer_event *event;
3240         struct trace_buffer *buffer;
3241         struct trace_array *tr = &global_trace;
3242         struct bprint_entry *entry;
3243         unsigned int trace_ctx;
3244         char *tbuffer;
3245         int len = 0, size;
3246
3247         if (unlikely(tracing_selftest_running || tracing_disabled))
3248                 return 0;
3249
3250         /* Don't pollute graph traces with trace_vprintk internals */
3251         pause_graph_tracing();
3252
3253         trace_ctx = tracing_gen_ctx();
3254         preempt_disable_notrace();
3255
3256         tbuffer = get_trace_buf();
3257         if (!tbuffer) {
3258                 len = 0;
3259                 goto out_nobuffer;
3260         }
3261
3262         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3263
3264         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3265                 goto out_put;
3266
3267         size = sizeof(*entry) + sizeof(u32) * len;
3268         buffer = tr->array_buffer.buffer;
3269         ring_buffer_nest_start(buffer);
3270         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3271                                             trace_ctx);
3272         if (!event)
3273                 goto out;
3274         entry = ring_buffer_event_data(event);
3275         entry->ip                       = ip;
3276         entry->fmt                      = fmt;
3277
3278         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3279         if (!call_filter_check_discard(call, entry, buffer, event)) {
3280                 __buffer_unlock_commit(buffer, event);
3281                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3282         }
3283
3284 out:
3285         ring_buffer_nest_end(buffer);
3286 out_put:
3287         put_trace_buf();
3288
3289 out_nobuffer:
3290         preempt_enable_notrace();
3291         unpause_graph_tracing();
3292
3293         return len;
3294 }
3295 EXPORT_SYMBOL_GPL(trace_vbprintk);
3296
3297 __printf(3, 0)
3298 static int
3299 __trace_array_vprintk(struct trace_buffer *buffer,
3300                       unsigned long ip, const char *fmt, va_list args)
3301 {
3302         struct trace_event_call *call = &event_print;
3303         struct ring_buffer_event *event;
3304         int len = 0, size;
3305         struct print_entry *entry;
3306         unsigned int trace_ctx;
3307         char *tbuffer;
3308
3309         if (tracing_disabled)
3310                 return 0;
3311
3312         /* Don't pollute graph traces with trace_vprintk internals */
3313         pause_graph_tracing();
3314
3315         trace_ctx = tracing_gen_ctx();
3316         preempt_disable_notrace();
3317
3318
3319         tbuffer = get_trace_buf();
3320         if (!tbuffer) {
3321                 len = 0;
3322                 goto out_nobuffer;
3323         }
3324
3325         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3326
3327         size = sizeof(*entry) + len + 1;
3328         ring_buffer_nest_start(buffer);
3329         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3330                                             trace_ctx);
3331         if (!event)
3332                 goto out;
3333         entry = ring_buffer_event_data(event);
3334         entry->ip = ip;
3335
3336         memcpy(&entry->buf, tbuffer, len + 1);
3337         if (!call_filter_check_discard(call, entry, buffer, event)) {
3338                 __buffer_unlock_commit(buffer, event);
3339                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3340         }
3341
3342 out:
3343         ring_buffer_nest_end(buffer);
3344         put_trace_buf();
3345
3346 out_nobuffer:
3347         preempt_enable_notrace();
3348         unpause_graph_tracing();
3349
3350         return len;
3351 }
3352
3353 __printf(3, 0)
3354 int trace_array_vprintk(struct trace_array *tr,
3355                         unsigned long ip, const char *fmt, va_list args)
3356 {
3357         if (tracing_selftest_running && tr == &global_trace)
3358                 return 0;
3359
3360         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3361 }
3362
3363 /**
3364  * trace_array_printk - Print a message to a specific instance
3365  * @tr: The instance trace_array descriptor
3366  * @ip: The instruction pointer that this is called from.
3367  * @fmt: The format to print (printf format)
3368  *
3369  * If a subsystem sets up its own instance, they have the right to
3370  * printk strings into their tracing instance buffer using this
3371  * function. Note, this function will not write into the top level
3372  * buffer (use trace_printk() for that), as writing into the top level
3373  * buffer should only have events that can be individually disabled.
3374  * trace_printk() is only used for debugging a kernel, and should not
3375  * be ever incorporated in normal use.
3376  *
3377  * trace_array_printk() can be used, as it will not add noise to the
3378  * top level tracing buffer.
3379  *
3380  * Note, trace_array_init_printk() must be called on @tr before this
3381  * can be used.
3382  */
3383 __printf(3, 0)
3384 int trace_array_printk(struct trace_array *tr,
3385                        unsigned long ip, const char *fmt, ...)
3386 {
3387         int ret;
3388         va_list ap;
3389
3390         if (!tr)
3391                 return -ENOENT;
3392
3393         /* This is only allowed for created instances */
3394         if (tr == &global_trace)
3395                 return 0;
3396
3397         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3398                 return 0;
3399
3400         va_start(ap, fmt);
3401         ret = trace_array_vprintk(tr, ip, fmt, ap);
3402         va_end(ap);
3403         return ret;
3404 }
3405 EXPORT_SYMBOL_GPL(trace_array_printk);
3406
3407 /**
3408  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3409  * @tr: The trace array to initialize the buffers for
3410  *
3411  * As trace_array_printk() only writes into instances, they are OK to
3412  * have in the kernel (unlike trace_printk()). This needs to be called
3413  * before trace_array_printk() can be used on a trace_array.
3414  */
3415 int trace_array_init_printk(struct trace_array *tr)
3416 {
3417         if (!tr)
3418                 return -ENOENT;
3419
3420         /* This is only allowed for created instances */
3421         if (tr == &global_trace)
3422                 return -EINVAL;
3423
3424         return alloc_percpu_trace_buffer();
3425 }
3426 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3427
3428 __printf(3, 4)
3429 int trace_array_printk_buf(struct trace_buffer *buffer,
3430                            unsigned long ip, const char *fmt, ...)
3431 {
3432         int ret;
3433         va_list ap;
3434
3435         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3436                 return 0;
3437
3438         va_start(ap, fmt);
3439         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3440         va_end(ap);
3441         return ret;
3442 }
3443
3444 __printf(2, 0)
3445 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3446 {
3447         return trace_array_vprintk(&global_trace, ip, fmt, args);
3448 }
3449 EXPORT_SYMBOL_GPL(trace_vprintk);
3450
3451 static void trace_iterator_increment(struct trace_iterator *iter)
3452 {
3453         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3454
3455         iter->idx++;
3456         if (buf_iter)
3457                 ring_buffer_iter_advance(buf_iter);
3458 }
3459
3460 static struct trace_entry *
3461 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3462                 unsigned long *lost_events)
3463 {
3464         struct ring_buffer_event *event;
3465         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3466
3467         if (buf_iter) {
3468                 event = ring_buffer_iter_peek(buf_iter, ts);
3469                 if (lost_events)
3470                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3471                                 (unsigned long)-1 : 0;
3472         } else {
3473                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3474                                          lost_events);
3475         }
3476
3477         if (event) {
3478                 iter->ent_size = ring_buffer_event_length(event);
3479                 return ring_buffer_event_data(event);
3480         }
3481         iter->ent_size = 0;
3482         return NULL;
3483 }
3484
3485 static struct trace_entry *
3486 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3487                   unsigned long *missing_events, u64 *ent_ts)
3488 {
3489         struct trace_buffer *buffer = iter->array_buffer->buffer;
3490         struct trace_entry *ent, *next = NULL;
3491         unsigned long lost_events = 0, next_lost = 0;
3492         int cpu_file = iter->cpu_file;
3493         u64 next_ts = 0, ts;
3494         int next_cpu = -1;
3495         int next_size = 0;
3496         int cpu;
3497
3498         /*
3499          * If we are in a per_cpu trace file, don't bother by iterating over
3500          * all cpu and peek directly.
3501          */
3502         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3503                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3504                         return NULL;
3505                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3506                 if (ent_cpu)
3507                         *ent_cpu = cpu_file;
3508
3509                 return ent;
3510         }
3511
3512         for_each_tracing_cpu(cpu) {
3513
3514                 if (ring_buffer_empty_cpu(buffer, cpu))
3515                         continue;
3516
3517                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3518
3519                 /*
3520                  * Pick the entry with the smallest timestamp:
3521                  */
3522                 if (ent && (!next || ts < next_ts)) {
3523                         next = ent;
3524                         next_cpu = cpu;
3525                         next_ts = ts;
3526                         next_lost = lost_events;
3527                         next_size = iter->ent_size;
3528                 }
3529         }
3530
3531         iter->ent_size = next_size;
3532
3533         if (ent_cpu)
3534                 *ent_cpu = next_cpu;
3535
3536         if (ent_ts)
3537                 *ent_ts = next_ts;
3538
3539         if (missing_events)
3540                 *missing_events = next_lost;
3541
3542         return next;
3543 }
3544
3545 #define STATIC_FMT_BUF_SIZE     128
3546 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3547
3548 char *trace_iter_expand_format(struct trace_iterator *iter)
3549 {
3550         char *tmp;
3551
3552         /*
3553          * iter->tr is NULL when used with tp_printk, which makes
3554          * this get called where it is not safe to call krealloc().
3555          */
3556         if (!iter->tr || iter->fmt == static_fmt_buf)
3557                 return NULL;
3558
3559         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3560                        GFP_KERNEL);
3561         if (tmp) {
3562                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3563                 iter->fmt = tmp;
3564         }
3565
3566         return tmp;
3567 }
3568
3569 /* Returns true if the string is safe to dereference from an event */
3570 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3571                            bool star, int len)
3572 {
3573         unsigned long addr = (unsigned long)str;
3574         struct trace_event *trace_event;
3575         struct trace_event_call *event;
3576
3577         /* Ignore strings with no length */
3578         if (star && !len)
3579                 return true;
3580
3581         /* OK if part of the event data */
3582         if ((addr >= (unsigned long)iter->ent) &&
3583             (addr < (unsigned long)iter->ent + iter->ent_size))
3584                 return true;
3585
3586         /* OK if part of the temp seq buffer */
3587         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3588             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3589                 return true;
3590
3591         /* Core rodata can not be freed */
3592         if (is_kernel_rodata(addr))
3593                 return true;
3594
3595         if (trace_is_tracepoint_string(str))
3596                 return true;
3597
3598         /*
3599          * Now this could be a module event, referencing core module
3600          * data, which is OK.
3601          */
3602         if (!iter->ent)
3603                 return false;
3604
3605         trace_event = ftrace_find_event(iter->ent->type);
3606         if (!trace_event)
3607                 return false;
3608
3609         event = container_of(trace_event, struct trace_event_call, event);
3610         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3611                 return false;
3612
3613         /* Would rather have rodata, but this will suffice */
3614         if (within_module_core(addr, event->module))
3615                 return true;
3616
3617         return false;
3618 }
3619
3620 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3621
3622 static int test_can_verify_check(const char *fmt, ...)
3623 {
3624         char buf[16];
3625         va_list ap;
3626         int ret;
3627
3628         /*
3629          * The verifier is dependent on vsnprintf() modifies the va_list
3630          * passed to it, where it is sent as a reference. Some architectures
3631          * (like x86_32) passes it by value, which means that vsnprintf()
3632          * does not modify the va_list passed to it, and the verifier
3633          * would then need to be able to understand all the values that
3634          * vsnprintf can use. If it is passed by value, then the verifier
3635          * is disabled.
3636          */
3637         va_start(ap, fmt);
3638         vsnprintf(buf, 16, "%d", ap);
3639         ret = va_arg(ap, int);
3640         va_end(ap);
3641
3642         return ret;
3643 }
3644
3645 static void test_can_verify(void)
3646 {
3647         if (!test_can_verify_check("%d %d", 0, 1)) {
3648                 pr_info("trace event string verifier disabled\n");
3649                 static_branch_inc(&trace_no_verify);
3650         }
3651 }
3652
3653 /**
3654  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3655  * @iter: The iterator that holds the seq buffer and the event being printed
3656  * @fmt: The format used to print the event
3657  * @ap: The va_list holding the data to print from @fmt.
3658  *
3659  * This writes the data into the @iter->seq buffer using the data from
3660  * @fmt and @ap. If the format has a %s, then the source of the string
3661  * is examined to make sure it is safe to print, otherwise it will
3662  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3663  * pointer.
3664  */
3665 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3666                          va_list ap)
3667 {
3668         const char *p = fmt;
3669         const char *str;
3670         int i, j;
3671
3672         if (WARN_ON_ONCE(!fmt))
3673                 return;
3674
3675         if (static_branch_unlikely(&trace_no_verify))
3676                 goto print;
3677
3678         /* Don't bother checking when doing a ftrace_dump() */
3679         if (iter->fmt == static_fmt_buf)
3680                 goto print;
3681
3682         while (*p) {
3683                 bool star = false;
3684                 int len = 0;
3685
3686                 j = 0;
3687
3688                 /* We only care about %s and variants */
3689                 for (i = 0; p[i]; i++) {
3690                         if (i + 1 >= iter->fmt_size) {
3691                                 /*
3692                                  * If we can't expand the copy buffer,
3693                                  * just print it.
3694                                  */
3695                                 if (!trace_iter_expand_format(iter))
3696                                         goto print;
3697                         }
3698
3699                         if (p[i] == '\\' && p[i+1]) {
3700                                 i++;
3701                                 continue;
3702                         }
3703                         if (p[i] == '%') {
3704                                 /* Need to test cases like %08.*s */
3705                                 for (j = 1; p[i+j]; j++) {
3706                                         if (isdigit(p[i+j]) ||
3707                                             p[i+j] == '.')
3708                                                 continue;
3709                                         if (p[i+j] == '*') {
3710                                                 star = true;
3711                                                 continue;
3712                                         }
3713                                         break;
3714                                 }
3715                                 if (p[i+j] == 's')
3716                                         break;
3717                                 star = false;
3718                         }
3719                         j = 0;
3720                 }
3721                 /* If no %s found then just print normally */
3722                 if (!p[i])
3723                         break;
3724
3725                 /* Copy up to the %s, and print that */
3726                 strncpy(iter->fmt, p, i);
3727                 iter->fmt[i] = '\0';
3728                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3729
3730                 /*
3731                  * If iter->seq is full, the above call no longer guarantees
3732                  * that ap is in sync with fmt processing, and further calls
3733                  * to va_arg() can return wrong positional arguments.
3734                  *
3735                  * Ensure that ap is no longer used in this case.
3736                  */
3737                 if (iter->seq.full) {
3738                         p = "";
3739                         break;
3740                 }
3741
3742                 if (star)
3743                         len = va_arg(ap, int);
3744
3745                 /* The ap now points to the string data of the %s */
3746                 str = va_arg(ap, const char *);
3747
3748                 /*
3749                  * If you hit this warning, it is likely that the
3750                  * trace event in question used %s on a string that
3751                  * was saved at the time of the event, but may not be
3752                  * around when the trace is read. Use __string(),
3753                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3754                  * instead. See samples/trace_events/trace-events-sample.h
3755                  * for reference.
3756                  */
3757                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3758                               "fmt: '%s' current_buffer: '%s'",
3759                               fmt, seq_buf_str(&iter->seq.seq))) {
3760                         int ret;
3761
3762                         /* Try to safely read the string */
3763                         if (star) {
3764                                 if (len + 1 > iter->fmt_size)
3765                                         len = iter->fmt_size - 1;
3766                                 if (len < 0)
3767                                         len = 0;
3768                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3769                                 iter->fmt[len] = 0;
3770                                 star = false;
3771                         } else {
3772                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3773                                                                   iter->fmt_size);
3774                         }
3775                         if (ret < 0)
3776                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3777                         else
3778                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3779                                                  str, iter->fmt);
3780                         str = "[UNSAFE-MEMORY]";
3781                         strcpy(iter->fmt, "%s");
3782                 } else {
3783                         strncpy(iter->fmt, p + i, j + 1);
3784                         iter->fmt[j+1] = '\0';
3785                 }
3786                 if (star)
3787                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3788                 else
3789                         trace_seq_printf(&iter->seq, iter->fmt, str);
3790
3791                 p += i + j + 1;
3792         }
3793  print:
3794         if (*p)
3795                 trace_seq_vprintf(&iter->seq, p, ap);
3796 }
3797
3798 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3799 {
3800         const char *p, *new_fmt;
3801         char *q;
3802
3803         if (WARN_ON_ONCE(!fmt))
3804                 return fmt;
3805
3806         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3807                 return fmt;
3808
3809         p = fmt;
3810         new_fmt = q = iter->fmt;
3811         while (*p) {
3812                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3813                         if (!trace_iter_expand_format(iter))
3814                                 return fmt;
3815
3816                         q += iter->fmt - new_fmt;
3817                         new_fmt = iter->fmt;
3818                 }
3819
3820                 *q++ = *p++;
3821
3822                 /* Replace %p with %px */
3823                 if (p[-1] == '%') {
3824                         if (p[0] == '%') {
3825                                 *q++ = *p++;
3826                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3827                                 *q++ = *p++;
3828                                 *q++ = 'x';
3829                         }
3830                 }
3831         }
3832         *q = '\0';
3833
3834         return new_fmt;
3835 }
3836
3837 #define STATIC_TEMP_BUF_SIZE    128
3838 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3839
3840 /* Find the next real entry, without updating the iterator itself */
3841 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3842                                           int *ent_cpu, u64 *ent_ts)
3843 {
3844         /* __find_next_entry will reset ent_size */
3845         int ent_size = iter->ent_size;
3846         struct trace_entry *entry;
3847
3848         /*
3849          * If called from ftrace_dump(), then the iter->temp buffer
3850          * will be the static_temp_buf and not created from kmalloc.
3851          * If the entry size is greater than the buffer, we can
3852          * not save it. Just return NULL in that case. This is only
3853          * used to add markers when two consecutive events' time
3854          * stamps have a large delta. See trace_print_lat_context()
3855          */
3856         if (iter->temp == static_temp_buf &&
3857             STATIC_TEMP_BUF_SIZE < ent_size)
3858                 return NULL;
3859
3860         /*
3861          * The __find_next_entry() may call peek_next_entry(), which may
3862          * call ring_buffer_peek() that may make the contents of iter->ent
3863          * undefined. Need to copy iter->ent now.
3864          */
3865         if (iter->ent && iter->ent != iter->temp) {
3866                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3867                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3868                         void *temp;
3869                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3870                         if (!temp)
3871                                 return NULL;
3872                         kfree(iter->temp);
3873                         iter->temp = temp;
3874                         iter->temp_size = iter->ent_size;
3875                 }
3876                 memcpy(iter->temp, iter->ent, iter->ent_size);
3877                 iter->ent = iter->temp;
3878         }
3879         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3880         /* Put back the original ent_size */
3881         iter->ent_size = ent_size;
3882
3883         return entry;
3884 }
3885
3886 /* Find the next real entry, and increment the iterator to the next entry */
3887 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3888 {
3889         iter->ent = __find_next_entry(iter, &iter->cpu,
3890                                       &iter->lost_events, &iter->ts);
3891
3892         if (iter->ent)
3893                 trace_iterator_increment(iter);
3894
3895         return iter->ent ? iter : NULL;
3896 }
3897
3898 static void trace_consume(struct trace_iterator *iter)
3899 {
3900         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3901                             &iter->lost_events);
3902 }
3903
3904 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3905 {
3906         struct trace_iterator *iter = m->private;
3907         int i = (int)*pos;
3908         void *ent;
3909
3910         WARN_ON_ONCE(iter->leftover);
3911
3912         (*pos)++;
3913
3914         /* can't go backwards */
3915         if (iter->idx > i)
3916                 return NULL;
3917
3918         if (iter->idx < 0)
3919                 ent = trace_find_next_entry_inc(iter);
3920         else
3921                 ent = iter;
3922
3923         while (ent && iter->idx < i)
3924                 ent = trace_find_next_entry_inc(iter);
3925
3926         iter->pos = *pos;
3927
3928         return ent;
3929 }
3930
3931 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3932 {
3933         struct ring_buffer_iter *buf_iter;
3934         unsigned long entries = 0;
3935         u64 ts;
3936
3937         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3938
3939         buf_iter = trace_buffer_iter(iter, cpu);
3940         if (!buf_iter)
3941                 return;
3942
3943         ring_buffer_iter_reset(buf_iter);
3944
3945         /*
3946          * We could have the case with the max latency tracers
3947          * that a reset never took place on a cpu. This is evident
3948          * by the timestamp being before the start of the buffer.
3949          */
3950         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3951                 if (ts >= iter->array_buffer->time_start)
3952                         break;
3953                 entries++;
3954                 ring_buffer_iter_advance(buf_iter);
3955         }
3956
3957         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3958 }
3959
3960 /*
3961  * The current tracer is copied to avoid a global locking
3962  * all around.
3963  */
3964 static void *s_start(struct seq_file *m, loff_t *pos)
3965 {
3966         struct trace_iterator *iter = m->private;
3967         struct trace_array *tr = iter->tr;
3968         int cpu_file = iter->cpu_file;
3969         void *p = NULL;
3970         loff_t l = 0;
3971         int cpu;
3972
3973         mutex_lock(&trace_types_lock);
3974         if (unlikely(tr->current_trace != iter->trace)) {
3975                 /* Close iter->trace before switching to the new current tracer */
3976                 if (iter->trace->close)
3977                         iter->trace->close(iter);
3978                 iter->trace = tr->current_trace;
3979                 /* Reopen the new current tracer */
3980                 if (iter->trace->open)
3981                         iter->trace->open(iter);
3982         }
3983         mutex_unlock(&trace_types_lock);
3984
3985 #ifdef CONFIG_TRACER_MAX_TRACE
3986         if (iter->snapshot && iter->trace->use_max_tr)
3987                 return ERR_PTR(-EBUSY);
3988 #endif
3989
3990         if (*pos != iter->pos) {
3991                 iter->ent = NULL;
3992                 iter->cpu = 0;
3993                 iter->idx = -1;
3994
3995                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3996                         for_each_tracing_cpu(cpu)
3997                                 tracing_iter_reset(iter, cpu);
3998                 } else
3999                         tracing_iter_reset(iter, cpu_file);
4000
4001                 iter->leftover = 0;
4002                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4003                         ;
4004
4005         } else {
4006                 /*
4007                  * If we overflowed the seq_file before, then we want
4008                  * to just reuse the trace_seq buffer again.
4009                  */
4010                 if (iter->leftover)
4011                         p = iter;
4012                 else {
4013                         l = *pos - 1;
4014                         p = s_next(m, p, &l);
4015                 }
4016         }
4017
4018         trace_event_read_lock();
4019         trace_access_lock(cpu_file);
4020         return p;
4021 }
4022
4023 static void s_stop(struct seq_file *m, void *p)
4024 {
4025         struct trace_iterator *iter = m->private;
4026
4027 #ifdef CONFIG_TRACER_MAX_TRACE
4028         if (iter->snapshot && iter->trace->use_max_tr)
4029                 return;
4030 #endif
4031
4032         trace_access_unlock(iter->cpu_file);
4033         trace_event_read_unlock();
4034 }
4035
4036 static void
4037 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4038                       unsigned long *entries, int cpu)
4039 {
4040         unsigned long count;
4041
4042         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4043         /*
4044          * If this buffer has skipped entries, then we hold all
4045          * entries for the trace and we need to ignore the
4046          * ones before the time stamp.
4047          */
4048         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4049                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4050                 /* total is the same as the entries */
4051                 *total = count;
4052         } else
4053                 *total = count +
4054                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4055         *entries = count;
4056 }
4057
4058 static void
4059 get_total_entries(struct array_buffer *buf,
4060                   unsigned long *total, unsigned long *entries)
4061 {
4062         unsigned long t, e;
4063         int cpu;
4064
4065         *total = 0;
4066         *entries = 0;
4067
4068         for_each_tracing_cpu(cpu) {
4069                 get_total_entries_cpu(buf, &t, &e, cpu);
4070                 *total += t;
4071                 *entries += e;
4072         }
4073 }
4074
4075 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4076 {
4077         unsigned long total, entries;
4078
4079         if (!tr)
4080                 tr = &global_trace;
4081
4082         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4083
4084         return entries;
4085 }
4086
4087 unsigned long trace_total_entries(struct trace_array *tr)
4088 {
4089         unsigned long total, entries;
4090
4091         if (!tr)
4092                 tr = &global_trace;
4093
4094         get_total_entries(&tr->array_buffer, &total, &entries);
4095
4096         return entries;
4097 }
4098
4099 static void print_lat_help_header(struct seq_file *m)
4100 {
4101         seq_puts(m, "#                    _------=> CPU#            \n"
4102                     "#                   / _-----=> irqs-off/BH-disabled\n"
4103                     "#                  | / _----=> need-resched    \n"
4104                     "#                  || / _---=> hardirq/softirq \n"
4105                     "#                  ||| / _--=> preempt-depth   \n"
4106                     "#                  |||| / _-=> migrate-disable \n"
4107                     "#                  ||||| /     delay           \n"
4108                     "#  cmd     pid     |||||| time  |   caller     \n"
4109                     "#     \\   /        ||||||  \\    |    /       \n");
4110 }
4111
4112 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4113 {
4114         unsigned long total;
4115         unsigned long entries;
4116
4117         get_total_entries(buf, &total, &entries);
4118         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4119                    entries, total, num_online_cpus());
4120         seq_puts(m, "#\n");
4121 }
4122
4123 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4124                                    unsigned int flags)
4125 {
4126         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4127
4128         print_event_info(buf, m);
4129
4130         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4131         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4132 }
4133
4134 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4135                                        unsigned int flags)
4136 {
4137         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4138         static const char space[] = "            ";
4139         int prec = tgid ? 12 : 2;
4140
4141         print_event_info(buf, m);
4142
4143         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4144         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4145         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4146         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4147         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4148         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4149         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4150         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4151 }
4152
4153 void
4154 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4155 {
4156         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4157         struct array_buffer *buf = iter->array_buffer;
4158         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4159         struct tracer *type = iter->trace;
4160         unsigned long entries;
4161         unsigned long total;
4162         const char *name = type->name;
4163
4164         get_total_entries(buf, &total, &entries);
4165
4166         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4167                    name, init_utsname()->release);
4168         seq_puts(m, "# -----------------------------------"
4169                  "---------------------------------\n");
4170         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4171                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4172                    nsecs_to_usecs(data->saved_latency),
4173                    entries,
4174                    total,
4175                    buf->cpu,
4176                    preempt_model_none()      ? "server" :
4177                    preempt_model_voluntary() ? "desktop" :
4178                    preempt_model_full()      ? "preempt" :
4179                    preempt_model_rt()        ? "preempt_rt" :
4180                    "unknown",
4181                    /* These are reserved for later use */
4182                    0, 0, 0, 0);
4183 #ifdef CONFIG_SMP
4184         seq_printf(m, " #P:%d)\n", num_online_cpus());
4185 #else
4186         seq_puts(m, ")\n");
4187 #endif
4188         seq_puts(m, "#    -----------------\n");
4189         seq_printf(m, "#    | task: %.16s-%d "
4190                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4191                    data->comm, data->pid,
4192                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4193                    data->policy, data->rt_priority);
4194         seq_puts(m, "#    -----------------\n");
4195
4196         if (data->critical_start) {
4197                 seq_puts(m, "#  => started at: ");
4198                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4199                 trace_print_seq(m, &iter->seq);
4200                 seq_puts(m, "\n#  => ended at:   ");
4201                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4202                 trace_print_seq(m, &iter->seq);
4203                 seq_puts(m, "\n#\n");
4204         }
4205
4206         seq_puts(m, "#\n");
4207 }
4208
4209 static void test_cpu_buff_start(struct trace_iterator *iter)
4210 {
4211         struct trace_seq *s = &iter->seq;
4212         struct trace_array *tr = iter->tr;
4213
4214         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4215                 return;
4216
4217         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4218                 return;
4219
4220         if (cpumask_available(iter->started) &&
4221             cpumask_test_cpu(iter->cpu, iter->started))
4222                 return;
4223
4224         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4225                 return;
4226
4227         if (cpumask_available(iter->started))
4228                 cpumask_set_cpu(iter->cpu, iter->started);
4229
4230         /* Don't print started cpu buffer for the first entry of the trace */
4231         if (iter->idx > 1)
4232                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4233                                 iter->cpu);
4234 }
4235
4236 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4237 {
4238         struct trace_array *tr = iter->tr;
4239         struct trace_seq *s = &iter->seq;
4240         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4241         struct trace_entry *entry;
4242         struct trace_event *event;
4243
4244         entry = iter->ent;
4245
4246         test_cpu_buff_start(iter);
4247
4248         event = ftrace_find_event(entry->type);
4249
4250         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4251                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4252                         trace_print_lat_context(iter);
4253                 else
4254                         trace_print_context(iter);
4255         }
4256
4257         if (trace_seq_has_overflowed(s))
4258                 return TRACE_TYPE_PARTIAL_LINE;
4259
4260         if (event) {
4261                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4262                         return print_event_fields(iter, event);
4263                 return event->funcs->trace(iter, sym_flags, event);
4264         }
4265
4266         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4267
4268         return trace_handle_return(s);
4269 }
4270
4271 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4272 {
4273         struct trace_array *tr = iter->tr;
4274         struct trace_seq *s = &iter->seq;
4275         struct trace_entry *entry;
4276         struct trace_event *event;
4277
4278         entry = iter->ent;
4279
4280         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4281                 trace_seq_printf(s, "%d %d %llu ",
4282                                  entry->pid, iter->cpu, iter->ts);
4283
4284         if (trace_seq_has_overflowed(s))
4285                 return TRACE_TYPE_PARTIAL_LINE;
4286
4287         event = ftrace_find_event(entry->type);
4288         if (event)
4289                 return event->funcs->raw(iter, 0, event);
4290
4291         trace_seq_printf(s, "%d ?\n", entry->type);
4292
4293         return trace_handle_return(s);
4294 }
4295
4296 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4297 {
4298         struct trace_array *tr = iter->tr;
4299         struct trace_seq *s = &iter->seq;
4300         unsigned char newline = '\n';
4301         struct trace_entry *entry;
4302         struct trace_event *event;
4303
4304         entry = iter->ent;
4305
4306         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4307                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4308                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4309                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4310                 if (trace_seq_has_overflowed(s))
4311                         return TRACE_TYPE_PARTIAL_LINE;
4312         }
4313
4314         event = ftrace_find_event(entry->type);
4315         if (event) {
4316                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4317                 if (ret != TRACE_TYPE_HANDLED)
4318                         return ret;
4319         }
4320
4321         SEQ_PUT_FIELD(s, newline);
4322
4323         return trace_handle_return(s);
4324 }
4325
4326 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4327 {
4328         struct trace_array *tr = iter->tr;
4329         struct trace_seq *s = &iter->seq;
4330         struct trace_entry *entry;
4331         struct trace_event *event;
4332
4333         entry = iter->ent;
4334
4335         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4336                 SEQ_PUT_FIELD(s, entry->pid);
4337                 SEQ_PUT_FIELD(s, iter->cpu);
4338                 SEQ_PUT_FIELD(s, iter->ts);
4339                 if (trace_seq_has_overflowed(s))
4340                         return TRACE_TYPE_PARTIAL_LINE;
4341         }
4342
4343         event = ftrace_find_event(entry->type);
4344         return event ? event->funcs->binary(iter, 0, event) :
4345                 TRACE_TYPE_HANDLED;
4346 }
4347
4348 int trace_empty(struct trace_iterator *iter)
4349 {
4350         struct ring_buffer_iter *buf_iter;
4351         int cpu;
4352
4353         /* If we are looking at one CPU buffer, only check that one */
4354         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4355                 cpu = iter->cpu_file;
4356                 buf_iter = trace_buffer_iter(iter, cpu);
4357                 if (buf_iter) {
4358                         if (!ring_buffer_iter_empty(buf_iter))
4359                                 return 0;
4360                 } else {
4361                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4362                                 return 0;
4363                 }
4364                 return 1;
4365         }
4366
4367         for_each_tracing_cpu(cpu) {
4368                 buf_iter = trace_buffer_iter(iter, cpu);
4369                 if (buf_iter) {
4370                         if (!ring_buffer_iter_empty(buf_iter))
4371                                 return 0;
4372                 } else {
4373                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4374                                 return 0;
4375                 }
4376         }
4377
4378         return 1;
4379 }
4380
4381 /*  Called with trace_event_read_lock() held. */
4382 enum print_line_t print_trace_line(struct trace_iterator *iter)
4383 {
4384         struct trace_array *tr = iter->tr;
4385         unsigned long trace_flags = tr->trace_flags;
4386         enum print_line_t ret;
4387
4388         if (iter->lost_events) {
4389                 if (iter->lost_events == (unsigned long)-1)
4390                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4391                                          iter->cpu);
4392                 else
4393                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4394                                          iter->cpu, iter->lost_events);
4395                 if (trace_seq_has_overflowed(&iter->seq))
4396                         return TRACE_TYPE_PARTIAL_LINE;
4397         }
4398
4399         if (iter->trace && iter->trace->print_line) {
4400                 ret = iter->trace->print_line(iter);
4401                 if (ret != TRACE_TYPE_UNHANDLED)
4402                         return ret;
4403         }
4404
4405         if (iter->ent->type == TRACE_BPUTS &&
4406                         trace_flags & TRACE_ITER_PRINTK &&
4407                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4408                 return trace_print_bputs_msg_only(iter);
4409
4410         if (iter->ent->type == TRACE_BPRINT &&
4411                         trace_flags & TRACE_ITER_PRINTK &&
4412                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4413                 return trace_print_bprintk_msg_only(iter);
4414
4415         if (iter->ent->type == TRACE_PRINT &&
4416                         trace_flags & TRACE_ITER_PRINTK &&
4417                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4418                 return trace_print_printk_msg_only(iter);
4419
4420         if (trace_flags & TRACE_ITER_BIN)
4421                 return print_bin_fmt(iter);
4422
4423         if (trace_flags & TRACE_ITER_HEX)
4424                 return print_hex_fmt(iter);
4425
4426         if (trace_flags & TRACE_ITER_RAW)
4427                 return print_raw_fmt(iter);
4428
4429         return print_trace_fmt(iter);
4430 }
4431
4432 void trace_latency_header(struct seq_file *m)
4433 {
4434         struct trace_iterator *iter = m->private;
4435         struct trace_array *tr = iter->tr;
4436
4437         /* print nothing if the buffers are empty */
4438         if (trace_empty(iter))
4439                 return;
4440
4441         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4442                 print_trace_header(m, iter);
4443
4444         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4445                 print_lat_help_header(m);
4446 }
4447
4448 void trace_default_header(struct seq_file *m)
4449 {
4450         struct trace_iterator *iter = m->private;
4451         struct trace_array *tr = iter->tr;
4452         unsigned long trace_flags = tr->trace_flags;
4453
4454         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4455                 return;
4456
4457         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4458                 /* print nothing if the buffers are empty */
4459                 if (trace_empty(iter))
4460                         return;
4461                 print_trace_header(m, iter);
4462                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4463                         print_lat_help_header(m);
4464         } else {
4465                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4466                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4467                                 print_func_help_header_irq(iter->array_buffer,
4468                                                            m, trace_flags);
4469                         else
4470                                 print_func_help_header(iter->array_buffer, m,
4471                                                        trace_flags);
4472                 }
4473         }
4474 }
4475
4476 static void test_ftrace_alive(struct seq_file *m)
4477 {
4478         if (!ftrace_is_dead())
4479                 return;
4480         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4481                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4482 }
4483
4484 #ifdef CONFIG_TRACER_MAX_TRACE
4485 static void show_snapshot_main_help(struct seq_file *m)
4486 {
4487         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4488                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4489                     "#                      Takes a snapshot of the main buffer.\n"
4490                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4491                     "#                      (Doesn't have to be '2' works with any number that\n"
4492                     "#                       is not a '0' or '1')\n");
4493 }
4494
4495 static void show_snapshot_percpu_help(struct seq_file *m)
4496 {
4497         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4498 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4499         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4500                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4501 #else
4502         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4503                     "#                     Must use main snapshot file to allocate.\n");
4504 #endif
4505         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4506                     "#                      (Doesn't have to be '2' works with any number that\n"
4507                     "#                       is not a '0' or '1')\n");
4508 }
4509
4510 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4511 {
4512         if (iter->tr->allocated_snapshot)
4513                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4514         else
4515                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4516
4517         seq_puts(m, "# Snapshot commands:\n");
4518         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4519                 show_snapshot_main_help(m);
4520         else
4521                 show_snapshot_percpu_help(m);
4522 }
4523 #else
4524 /* Should never be called */
4525 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4526 #endif
4527
4528 static int s_show(struct seq_file *m, void *v)
4529 {
4530         struct trace_iterator *iter = v;
4531         int ret;
4532
4533         if (iter->ent == NULL) {
4534                 if (iter->tr) {
4535                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4536                         seq_puts(m, "#\n");
4537                         test_ftrace_alive(m);
4538                 }
4539                 if (iter->snapshot && trace_empty(iter))
4540                         print_snapshot_help(m, iter);
4541                 else if (iter->trace && iter->trace->print_header)
4542                         iter->trace->print_header(m);
4543                 else
4544                         trace_default_header(m);
4545
4546         } else if (iter->leftover) {
4547                 /*
4548                  * If we filled the seq_file buffer earlier, we
4549                  * want to just show it now.
4550                  */
4551                 ret = trace_print_seq(m, &iter->seq);
4552
4553                 /* ret should this time be zero, but you never know */
4554                 iter->leftover = ret;
4555
4556         } else {
4557                 ret = print_trace_line(iter);
4558                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4559                         iter->seq.full = 0;
4560                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4561                 }
4562                 ret = trace_print_seq(m, &iter->seq);
4563                 /*
4564                  * If we overflow the seq_file buffer, then it will
4565                  * ask us for this data again at start up.
4566                  * Use that instead.
4567                  *  ret is 0 if seq_file write succeeded.
4568                  *        -1 otherwise.
4569                  */
4570                 iter->leftover = ret;
4571         }
4572
4573         return 0;
4574 }
4575
4576 /*
4577  * Should be used after trace_array_get(), trace_types_lock
4578  * ensures that i_cdev was already initialized.
4579  */
4580 static inline int tracing_get_cpu(struct inode *inode)
4581 {
4582         if (inode->i_cdev) /* See trace_create_cpu_file() */
4583                 return (long)inode->i_cdev - 1;
4584         return RING_BUFFER_ALL_CPUS;
4585 }
4586
4587 static const struct seq_operations tracer_seq_ops = {
4588         .start          = s_start,
4589         .next           = s_next,
4590         .stop           = s_stop,
4591         .show           = s_show,
4592 };
4593
4594 /*
4595  * Note, as iter itself can be allocated and freed in different
4596  * ways, this function is only used to free its content, and not
4597  * the iterator itself. The only requirement to all the allocations
4598  * is that it must zero all fields (kzalloc), as freeing works with
4599  * ethier allocated content or NULL.
4600  */
4601 static void free_trace_iter_content(struct trace_iterator *iter)
4602 {
4603         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4604         if (iter->fmt != static_fmt_buf)
4605                 kfree(iter->fmt);
4606
4607         kfree(iter->temp);
4608         kfree(iter->buffer_iter);
4609         mutex_destroy(&iter->mutex);
4610         free_cpumask_var(iter->started);
4611 }
4612
4613 static struct trace_iterator *
4614 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4615 {
4616         struct trace_array *tr = inode->i_private;
4617         struct trace_iterator *iter;
4618         int cpu;
4619
4620         if (tracing_disabled)
4621                 return ERR_PTR(-ENODEV);
4622
4623         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4624         if (!iter)
4625                 return ERR_PTR(-ENOMEM);
4626
4627         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4628                                     GFP_KERNEL);
4629         if (!iter->buffer_iter)
4630                 goto release;
4631
4632         /*
4633          * trace_find_next_entry() may need to save off iter->ent.
4634          * It will place it into the iter->temp buffer. As most
4635          * events are less than 128, allocate a buffer of that size.
4636          * If one is greater, then trace_find_next_entry() will
4637          * allocate a new buffer to adjust for the bigger iter->ent.
4638          * It's not critical if it fails to get allocated here.
4639          */
4640         iter->temp = kmalloc(128, GFP_KERNEL);
4641         if (iter->temp)
4642                 iter->temp_size = 128;
4643
4644         /*
4645          * trace_event_printf() may need to modify given format
4646          * string to replace %p with %px so that it shows real address
4647          * instead of hash value. However, that is only for the event
4648          * tracing, other tracer may not need. Defer the allocation
4649          * until it is needed.
4650          */
4651         iter->fmt = NULL;
4652         iter->fmt_size = 0;
4653
4654         mutex_lock(&trace_types_lock);
4655         iter->trace = tr->current_trace;
4656
4657         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4658                 goto fail;
4659
4660         iter->tr = tr;
4661
4662 #ifdef CONFIG_TRACER_MAX_TRACE
4663         /* Currently only the top directory has a snapshot */
4664         if (tr->current_trace->print_max || snapshot)
4665                 iter->array_buffer = &tr->max_buffer;
4666         else
4667 #endif
4668                 iter->array_buffer = &tr->array_buffer;
4669         iter->snapshot = snapshot;
4670         iter->pos = -1;
4671         iter->cpu_file = tracing_get_cpu(inode);
4672         mutex_init(&iter->mutex);
4673
4674         /* Notify the tracer early; before we stop tracing. */
4675         if (iter->trace->open)
4676                 iter->trace->open(iter);
4677
4678         /* Annotate start of buffers if we had overruns */
4679         if (ring_buffer_overruns(iter->array_buffer->buffer))
4680                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4681
4682         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4683         if (trace_clocks[tr->clock_id].in_ns)
4684                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4685
4686         /*
4687          * If pause-on-trace is enabled, then stop the trace while
4688          * dumping, unless this is the "snapshot" file
4689          */
4690         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4691                 tracing_stop_tr(tr);
4692
4693         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4694                 for_each_tracing_cpu(cpu) {
4695                         iter->buffer_iter[cpu] =
4696                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4697                                                          cpu, GFP_KERNEL);
4698                 }
4699                 ring_buffer_read_prepare_sync();
4700                 for_each_tracing_cpu(cpu) {
4701                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4702                         tracing_iter_reset(iter, cpu);
4703                 }
4704         } else {
4705                 cpu = iter->cpu_file;
4706                 iter->buffer_iter[cpu] =
4707                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4708                                                  cpu, GFP_KERNEL);
4709                 ring_buffer_read_prepare_sync();
4710                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4711                 tracing_iter_reset(iter, cpu);
4712         }
4713
4714         mutex_unlock(&trace_types_lock);
4715
4716         return iter;
4717
4718  fail:
4719         mutex_unlock(&trace_types_lock);
4720         free_trace_iter_content(iter);
4721 release:
4722         seq_release_private(inode, file);
4723         return ERR_PTR(-ENOMEM);
4724 }
4725
4726 int tracing_open_generic(struct inode *inode, struct file *filp)
4727 {
4728         int ret;
4729
4730         ret = tracing_check_open_get_tr(NULL);
4731         if (ret)
4732                 return ret;
4733
4734         filp->private_data = inode->i_private;
4735         return 0;
4736 }
4737
4738 bool tracing_is_disabled(void)
4739 {
4740         return (tracing_disabled) ? true: false;
4741 }
4742
4743 /*
4744  * Open and update trace_array ref count.
4745  * Must have the current trace_array passed to it.
4746  */
4747 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4748 {
4749         struct trace_array *tr = inode->i_private;
4750         int ret;
4751
4752         ret = tracing_check_open_get_tr(tr);
4753         if (ret)
4754                 return ret;
4755
4756         filp->private_data = inode->i_private;
4757
4758         return 0;
4759 }
4760
4761 /*
4762  * The private pointer of the inode is the trace_event_file.
4763  * Update the tr ref count associated to it.
4764  */
4765 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4766 {
4767         struct trace_event_file *file = inode->i_private;
4768         int ret;
4769
4770         ret = tracing_check_open_get_tr(file->tr);
4771         if (ret)
4772                 return ret;
4773
4774         mutex_lock(&event_mutex);
4775
4776         /* Fail if the file is marked for removal */
4777         if (file->flags & EVENT_FILE_FL_FREED) {
4778                 trace_array_put(file->tr);
4779                 ret = -ENODEV;
4780         } else {
4781                 event_file_get(file);
4782         }
4783
4784         mutex_unlock(&event_mutex);
4785         if (ret)
4786                 return ret;
4787
4788         filp->private_data = inode->i_private;
4789
4790         return 0;
4791 }
4792
4793 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4794 {
4795         struct trace_event_file *file = inode->i_private;
4796
4797         trace_array_put(file->tr);
4798         event_file_put(file);
4799
4800         return 0;
4801 }
4802
4803 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4804 {
4805         tracing_release_file_tr(inode, filp);
4806         return single_release(inode, filp);
4807 }
4808
4809 static int tracing_mark_open(struct inode *inode, struct file *filp)
4810 {
4811         stream_open(inode, filp);
4812         return tracing_open_generic_tr(inode, filp);
4813 }
4814
4815 static int tracing_release(struct inode *inode, struct file *file)
4816 {
4817         struct trace_array *tr = inode->i_private;
4818         struct seq_file *m = file->private_data;
4819         struct trace_iterator *iter;
4820         int cpu;
4821
4822         if (!(file->f_mode & FMODE_READ)) {
4823                 trace_array_put(tr);
4824                 return 0;
4825         }
4826
4827         /* Writes do not use seq_file */
4828         iter = m->private;
4829         mutex_lock(&trace_types_lock);
4830
4831         for_each_tracing_cpu(cpu) {
4832                 if (iter->buffer_iter[cpu])
4833                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4834         }
4835
4836         if (iter->trace && iter->trace->close)
4837                 iter->trace->close(iter);
4838
4839         if (!iter->snapshot && tr->stop_count)
4840                 /* reenable tracing if it was previously enabled */
4841                 tracing_start_tr(tr);
4842
4843         __trace_array_put(tr);
4844
4845         mutex_unlock(&trace_types_lock);
4846
4847         free_trace_iter_content(iter);
4848         seq_release_private(inode, file);
4849
4850         return 0;
4851 }
4852
4853 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4854 {
4855         struct trace_array *tr = inode->i_private;
4856
4857         trace_array_put(tr);
4858         return 0;
4859 }
4860
4861 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4862 {
4863         struct trace_array *tr = inode->i_private;
4864
4865         trace_array_put(tr);
4866
4867         return single_release(inode, file);
4868 }
4869
4870 static int tracing_open(struct inode *inode, struct file *file)
4871 {
4872         struct trace_array *tr = inode->i_private;
4873         struct trace_iterator *iter;
4874         int ret;
4875
4876         ret = tracing_check_open_get_tr(tr);
4877         if (ret)
4878                 return ret;
4879
4880         /* If this file was open for write, then erase contents */
4881         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4882                 int cpu = tracing_get_cpu(inode);
4883                 struct array_buffer *trace_buf = &tr->array_buffer;
4884
4885 #ifdef CONFIG_TRACER_MAX_TRACE
4886                 if (tr->current_trace->print_max)
4887                         trace_buf = &tr->max_buffer;
4888 #endif
4889
4890                 if (cpu == RING_BUFFER_ALL_CPUS)
4891                         tracing_reset_online_cpus(trace_buf);
4892                 else
4893                         tracing_reset_cpu(trace_buf, cpu);
4894         }
4895
4896         if (file->f_mode & FMODE_READ) {
4897                 iter = __tracing_open(inode, file, false);
4898                 if (IS_ERR(iter))
4899                         ret = PTR_ERR(iter);
4900                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4901                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4902         }
4903
4904         if (ret < 0)
4905                 trace_array_put(tr);
4906
4907         return ret;
4908 }
4909
4910 /*
4911  * Some tracers are not suitable for instance buffers.
4912  * A tracer is always available for the global array (toplevel)
4913  * or if it explicitly states that it is.
4914  */
4915 static bool
4916 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4917 {
4918         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4919 }
4920
4921 /* Find the next tracer that this trace array may use */
4922 static struct tracer *
4923 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4924 {
4925         while (t && !trace_ok_for_array(t, tr))
4926                 t = t->next;
4927
4928         return t;
4929 }
4930
4931 static void *
4932 t_next(struct seq_file *m, void *v, loff_t *pos)
4933 {
4934         struct trace_array *tr = m->private;
4935         struct tracer *t = v;
4936
4937         (*pos)++;
4938
4939         if (t)
4940                 t = get_tracer_for_array(tr, t->next);
4941
4942         return t;
4943 }
4944
4945 static void *t_start(struct seq_file *m, loff_t *pos)
4946 {
4947         struct trace_array *tr = m->private;
4948         struct tracer *t;
4949         loff_t l = 0;
4950
4951         mutex_lock(&trace_types_lock);
4952
4953         t = get_tracer_for_array(tr, trace_types);
4954         for (; t && l < *pos; t = t_next(m, t, &l))
4955                         ;
4956
4957         return t;
4958 }
4959
4960 static void t_stop(struct seq_file *m, void *p)
4961 {
4962         mutex_unlock(&trace_types_lock);
4963 }
4964
4965 static int t_show(struct seq_file *m, void *v)
4966 {
4967         struct tracer *t = v;
4968
4969         if (!t)
4970                 return 0;
4971
4972         seq_puts(m, t->name);
4973         if (t->next)
4974                 seq_putc(m, ' ');
4975         else
4976                 seq_putc(m, '\n');
4977
4978         return 0;
4979 }
4980
4981 static const struct seq_operations show_traces_seq_ops = {
4982         .start          = t_start,
4983         .next           = t_next,
4984         .stop           = t_stop,
4985         .show           = t_show,
4986 };
4987
4988 static int show_traces_open(struct inode *inode, struct file *file)
4989 {
4990         struct trace_array *tr = inode->i_private;
4991         struct seq_file *m;
4992         int ret;
4993
4994         ret = tracing_check_open_get_tr(tr);
4995         if (ret)
4996                 return ret;
4997
4998         ret = seq_open(file, &show_traces_seq_ops);
4999         if (ret) {
5000                 trace_array_put(tr);
5001                 return ret;
5002         }
5003
5004         m = file->private_data;
5005         m->private = tr;
5006
5007         return 0;
5008 }
5009
5010 static int show_traces_release(struct inode *inode, struct file *file)
5011 {
5012         struct trace_array *tr = inode->i_private;
5013
5014         trace_array_put(tr);
5015         return seq_release(inode, file);
5016 }
5017
5018 static ssize_t
5019 tracing_write_stub(struct file *filp, const char __user *ubuf,
5020                    size_t count, loff_t *ppos)
5021 {
5022         return count;
5023 }
5024
5025 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5026 {
5027         int ret;
5028
5029         if (file->f_mode & FMODE_READ)
5030                 ret = seq_lseek(file, offset, whence);
5031         else
5032                 file->f_pos = ret = 0;
5033
5034         return ret;
5035 }
5036
5037 static const struct file_operations tracing_fops = {
5038         .open           = tracing_open,
5039         .read           = seq_read,
5040         .read_iter      = seq_read_iter,
5041         .splice_read    = copy_splice_read,
5042         .write          = tracing_write_stub,
5043         .llseek         = tracing_lseek,
5044         .release        = tracing_release,
5045 };
5046
5047 static const struct file_operations show_traces_fops = {
5048         .open           = show_traces_open,
5049         .read           = seq_read,
5050         .llseek         = seq_lseek,
5051         .release        = show_traces_release,
5052 };
5053
5054 static ssize_t
5055 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5056                      size_t count, loff_t *ppos)
5057 {
5058         struct trace_array *tr = file_inode(filp)->i_private;
5059         char *mask_str;
5060         int len;
5061
5062         len = snprintf(NULL, 0, "%*pb\n",
5063                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5064         mask_str = kmalloc(len, GFP_KERNEL);
5065         if (!mask_str)
5066                 return -ENOMEM;
5067
5068         len = snprintf(mask_str, len, "%*pb\n",
5069                        cpumask_pr_args(tr->tracing_cpumask));
5070         if (len >= count) {
5071                 count = -EINVAL;
5072                 goto out_err;
5073         }
5074         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5075
5076 out_err:
5077         kfree(mask_str);
5078
5079         return count;
5080 }
5081
5082 int tracing_set_cpumask(struct trace_array *tr,
5083                         cpumask_var_t tracing_cpumask_new)
5084 {
5085         int cpu;
5086
5087         if (!tr)
5088                 return -EINVAL;
5089
5090         local_irq_disable();
5091         arch_spin_lock(&tr->max_lock);
5092         for_each_tracing_cpu(cpu) {
5093                 /*
5094                  * Increase/decrease the disabled counter if we are
5095                  * about to flip a bit in the cpumask:
5096                  */
5097                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5098                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5099                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5100                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5101 #ifdef CONFIG_TRACER_MAX_TRACE
5102                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5103 #endif
5104                 }
5105                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5106                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5107                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5108                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5109 #ifdef CONFIG_TRACER_MAX_TRACE
5110                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5111 #endif
5112                 }
5113         }
5114         arch_spin_unlock(&tr->max_lock);
5115         local_irq_enable();
5116
5117         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5118
5119         return 0;
5120 }
5121
5122 static ssize_t
5123 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5124                       size_t count, loff_t *ppos)
5125 {
5126         struct trace_array *tr = file_inode(filp)->i_private;
5127         cpumask_var_t tracing_cpumask_new;
5128         int err;
5129
5130         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5131                 return -ENOMEM;
5132
5133         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5134         if (err)
5135                 goto err_free;
5136
5137         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5138         if (err)
5139                 goto err_free;
5140
5141         free_cpumask_var(tracing_cpumask_new);
5142
5143         return count;
5144
5145 err_free:
5146         free_cpumask_var(tracing_cpumask_new);
5147
5148         return err;
5149 }
5150
5151 static const struct file_operations tracing_cpumask_fops = {
5152         .open           = tracing_open_generic_tr,
5153         .read           = tracing_cpumask_read,
5154         .write          = tracing_cpumask_write,
5155         .release        = tracing_release_generic_tr,
5156         .llseek         = generic_file_llseek,
5157 };
5158
5159 static int tracing_trace_options_show(struct seq_file *m, void *v)
5160 {
5161         struct tracer_opt *trace_opts;
5162         struct trace_array *tr = m->private;
5163         u32 tracer_flags;
5164         int i;
5165
5166         mutex_lock(&trace_types_lock);
5167         tracer_flags = tr->current_trace->flags->val;
5168         trace_opts = tr->current_trace->flags->opts;
5169
5170         for (i = 0; trace_options[i]; i++) {
5171                 if (tr->trace_flags & (1 << i))
5172                         seq_printf(m, "%s\n", trace_options[i]);
5173                 else
5174                         seq_printf(m, "no%s\n", trace_options[i]);
5175         }
5176
5177         for (i = 0; trace_opts[i].name; i++) {
5178                 if (tracer_flags & trace_opts[i].bit)
5179                         seq_printf(m, "%s\n", trace_opts[i].name);
5180                 else
5181                         seq_printf(m, "no%s\n", trace_opts[i].name);
5182         }
5183         mutex_unlock(&trace_types_lock);
5184
5185         return 0;
5186 }
5187
5188 static int __set_tracer_option(struct trace_array *tr,
5189                                struct tracer_flags *tracer_flags,
5190                                struct tracer_opt *opts, int neg)
5191 {
5192         struct tracer *trace = tracer_flags->trace;
5193         int ret;
5194
5195         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5196         if (ret)
5197                 return ret;
5198
5199         if (neg)
5200                 tracer_flags->val &= ~opts->bit;
5201         else
5202                 tracer_flags->val |= opts->bit;
5203         return 0;
5204 }
5205
5206 /* Try to assign a tracer specific option */
5207 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5208 {
5209         struct tracer *trace = tr->current_trace;
5210         struct tracer_flags *tracer_flags = trace->flags;
5211         struct tracer_opt *opts = NULL;
5212         int i;
5213
5214         for (i = 0; tracer_flags->opts[i].name; i++) {
5215                 opts = &tracer_flags->opts[i];
5216
5217                 if (strcmp(cmp, opts->name) == 0)
5218                         return __set_tracer_option(tr, trace->flags, opts, neg);
5219         }
5220
5221         return -EINVAL;
5222 }
5223
5224 /* Some tracers require overwrite to stay enabled */
5225 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5226 {
5227         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5228                 return -1;
5229
5230         return 0;
5231 }
5232
5233 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5234 {
5235         if ((mask == TRACE_ITER_RECORD_TGID) ||
5236             (mask == TRACE_ITER_RECORD_CMD))
5237                 lockdep_assert_held(&event_mutex);
5238
5239         /* do nothing if flag is already set */
5240         if (!!(tr->trace_flags & mask) == !!enabled)
5241                 return 0;
5242
5243         /* Give the tracer a chance to approve the change */
5244         if (tr->current_trace->flag_changed)
5245                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5246                         return -EINVAL;
5247
5248         if (enabled)
5249                 tr->trace_flags |= mask;
5250         else
5251                 tr->trace_flags &= ~mask;
5252
5253         if (mask == TRACE_ITER_RECORD_CMD)
5254                 trace_event_enable_cmd_record(enabled);
5255
5256         if (mask == TRACE_ITER_RECORD_TGID) {
5257
5258                 if (trace_alloc_tgid_map() < 0) {
5259                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5260                         return -ENOMEM;
5261                 }
5262
5263                 trace_event_enable_tgid_record(enabled);
5264         }
5265
5266         if (mask == TRACE_ITER_EVENT_FORK)
5267                 trace_event_follow_fork(tr, enabled);
5268
5269         if (mask == TRACE_ITER_FUNC_FORK)
5270                 ftrace_pid_follow_fork(tr, enabled);
5271
5272         if (mask == TRACE_ITER_OVERWRITE) {
5273                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5274 #ifdef CONFIG_TRACER_MAX_TRACE
5275                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5276 #endif
5277         }
5278
5279         if (mask == TRACE_ITER_PRINTK) {
5280                 trace_printk_start_stop_comm(enabled);
5281                 trace_printk_control(enabled);
5282         }
5283
5284         return 0;
5285 }
5286
5287 int trace_set_options(struct trace_array *tr, char *option)
5288 {
5289         char *cmp;
5290         int neg = 0;
5291         int ret;
5292         size_t orig_len = strlen(option);
5293         int len;
5294
5295         cmp = strstrip(option);
5296
5297         len = str_has_prefix(cmp, "no");
5298         if (len)
5299                 neg = 1;
5300
5301         cmp += len;
5302
5303         mutex_lock(&event_mutex);
5304         mutex_lock(&trace_types_lock);
5305
5306         ret = match_string(trace_options, -1, cmp);
5307         /* If no option could be set, test the specific tracer options */
5308         if (ret < 0)
5309                 ret = set_tracer_option(tr, cmp, neg);
5310         else
5311                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5312
5313         mutex_unlock(&trace_types_lock);
5314         mutex_unlock(&event_mutex);
5315
5316         /*
5317          * If the first trailing whitespace is replaced with '\0' by strstrip,
5318          * turn it back into a space.
5319          */
5320         if (orig_len > strlen(option))
5321                 option[strlen(option)] = ' ';
5322
5323         return ret;
5324 }
5325
5326 static void __init apply_trace_boot_options(void)
5327 {
5328         char *buf = trace_boot_options_buf;
5329         char *option;
5330
5331         while (true) {
5332                 option = strsep(&buf, ",");
5333
5334                 if (!option)
5335                         break;
5336
5337                 if (*option)
5338                         trace_set_options(&global_trace, option);
5339
5340                 /* Put back the comma to allow this to be called again */
5341                 if (buf)
5342                         *(buf - 1) = ',';
5343         }
5344 }
5345
5346 static ssize_t
5347 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5348                         size_t cnt, loff_t *ppos)
5349 {
5350         struct seq_file *m = filp->private_data;
5351         struct trace_array *tr = m->private;
5352         char buf[64];
5353         int ret;
5354
5355         if (cnt >= sizeof(buf))
5356                 return -EINVAL;
5357
5358         if (copy_from_user(buf, ubuf, cnt))
5359                 return -EFAULT;
5360
5361         buf[cnt] = 0;
5362
5363         ret = trace_set_options(tr, buf);
5364         if (ret < 0)
5365                 return ret;
5366
5367         *ppos += cnt;
5368
5369         return cnt;
5370 }
5371
5372 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5373 {
5374         struct trace_array *tr = inode->i_private;
5375         int ret;
5376
5377         ret = tracing_check_open_get_tr(tr);
5378         if (ret)
5379                 return ret;
5380
5381         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5382         if (ret < 0)
5383                 trace_array_put(tr);
5384
5385         return ret;
5386 }
5387
5388 static const struct file_operations tracing_iter_fops = {
5389         .open           = tracing_trace_options_open,
5390         .read           = seq_read,
5391         .llseek         = seq_lseek,
5392         .release        = tracing_single_release_tr,
5393         .write          = tracing_trace_options_write,
5394 };
5395
5396 static const char readme_msg[] =
5397         "tracing mini-HOWTO:\n\n"
5398         "# echo 0 > tracing_on : quick way to disable tracing\n"
5399         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5400         " Important files:\n"
5401         "  trace\t\t\t- The static contents of the buffer\n"
5402         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5403         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5404         "  current_tracer\t- function and latency tracers\n"
5405         "  available_tracers\t- list of configured tracers for current_tracer\n"
5406         "  error_log\t- error log for failed commands (that support it)\n"
5407         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5408         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5409         "  trace_clock\t\t- change the clock used to order events\n"
5410         "       local:   Per cpu clock but may not be synced across CPUs\n"
5411         "      global:   Synced across CPUs but slows tracing down.\n"
5412         "     counter:   Not a clock, but just an increment\n"
5413         "      uptime:   Jiffy counter from time of boot\n"
5414         "        perf:   Same clock that perf events use\n"
5415 #ifdef CONFIG_X86_64
5416         "     x86-tsc:   TSC cycle counter\n"
5417 #endif
5418         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5419         "       delta:   Delta difference against a buffer-wide timestamp\n"
5420         "    absolute:   Absolute (standalone) timestamp\n"
5421         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5422         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5423         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5424         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5425         "\t\t\t  Remove sub-buffer with rmdir\n"
5426         "  trace_options\t\t- Set format or modify how tracing happens\n"
5427         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5428         "\t\t\t  option name\n"
5429         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5430 #ifdef CONFIG_DYNAMIC_FTRACE
5431         "\n  available_filter_functions - list of functions that can be filtered on\n"
5432         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5433         "\t\t\t  functions\n"
5434         "\t     accepts: func_full_name or glob-matching-pattern\n"
5435         "\t     modules: Can select a group via module\n"
5436         "\t      Format: :mod:<module-name>\n"
5437         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5438         "\t    triggers: a command to perform when function is hit\n"
5439         "\t      Format: <function>:<trigger>[:count]\n"
5440         "\t     trigger: traceon, traceoff\n"
5441         "\t\t      enable_event:<system>:<event>\n"
5442         "\t\t      disable_event:<system>:<event>\n"
5443 #ifdef CONFIG_STACKTRACE
5444         "\t\t      stacktrace\n"
5445 #endif
5446 #ifdef CONFIG_TRACER_SNAPSHOT
5447         "\t\t      snapshot\n"
5448 #endif
5449         "\t\t      dump\n"
5450         "\t\t      cpudump\n"
5451         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5452         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5453         "\t     The first one will disable tracing every time do_fault is hit\n"
5454         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5455         "\t       The first time do trap is hit and it disables tracing, the\n"
5456         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5457         "\t       the counter will not decrement. It only decrements when the\n"
5458         "\t       trigger did work\n"
5459         "\t     To remove trigger without count:\n"
5460         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5461         "\t     To remove trigger with a count:\n"
5462         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5463         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5464         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5465         "\t    modules: Can select a group via module command :mod:\n"
5466         "\t    Does not accept triggers\n"
5467 #endif /* CONFIG_DYNAMIC_FTRACE */
5468 #ifdef CONFIG_FUNCTION_TRACER
5469         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5470         "\t\t    (function)\n"
5471         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5472         "\t\t    (function)\n"
5473 #endif
5474 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5475         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5476         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5477         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5481         "\t\t\t  snapshot buffer. Read the contents for more\n"
5482         "\t\t\t  information\n"
5483 #endif
5484 #ifdef CONFIG_STACK_TRACER
5485         "  stack_trace\t\t- Shows the max stack trace when active\n"
5486         "  stack_max_size\t- Shows current max stack size that was traced\n"
5487         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5488         "\t\t\t  new trace)\n"
5489 #ifdef CONFIG_DYNAMIC_FTRACE
5490         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5491         "\t\t\t  traces\n"
5492 #endif
5493 #endif /* CONFIG_STACK_TRACER */
5494 #ifdef CONFIG_DYNAMIC_EVENTS
5495         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5496         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5497 #endif
5498 #ifdef CONFIG_KPROBE_EVENTS
5499         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5500         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5501 #endif
5502 #ifdef CONFIG_UPROBE_EVENTS
5503         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5504         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5505 #endif
5506 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5507     defined(CONFIG_FPROBE_EVENTS)
5508         "\t  accepts: event-definitions (one definition per line)\n"
5509 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5510         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5511         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5512 #endif
5513 #ifdef CONFIG_FPROBE_EVENTS
5514         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5515         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5516 #endif
5517 #ifdef CONFIG_HIST_TRIGGERS
5518         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5519 #endif
5520         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5521         "\t           -:[<group>/][<event>]\n"
5522 #ifdef CONFIG_KPROBE_EVENTS
5523         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5524   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5525 #endif
5526 #ifdef CONFIG_UPROBE_EVENTS
5527   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5528 #endif
5529         "\t     args: <name>=fetcharg[:type]\n"
5530         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5531 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5532         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5533 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5534         "\t           <argname>[->field[->field|.field...]],\n"
5535 #endif
5536 #else
5537         "\t           $stack<index>, $stack, $retval, $comm,\n"
5538 #endif
5539         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5540         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5541         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5542         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5543         "\t           symstr, <type>\\[<array-size>\\]\n"
5544 #ifdef CONFIG_HIST_TRIGGERS
5545         "\t    field: <stype> <name>;\n"
5546         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5547         "\t           [unsigned] char/int/long\n"
5548 #endif
5549         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5550         "\t            of the <attached-group>/<attached-event>.\n"
5551 #endif
5552         "  events/\t\t- Directory containing all trace event subsystems:\n"
5553         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5554         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5555         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5556         "\t\t\t  events\n"
5557         "      filter\t\t- If set, only events passing filter are traced\n"
5558         "  events/<system>/<event>/\t- Directory containing control files for\n"
5559         "\t\t\t  <event>:\n"
5560         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5561         "      filter\t\t- If set, only events passing filter are traced\n"
5562         "      trigger\t\t- If set, a command to perform when event is hit\n"
5563         "\t    Format: <trigger>[:count][if <filter>]\n"
5564         "\t   trigger: traceon, traceoff\n"
5565         "\t            enable_event:<system>:<event>\n"
5566         "\t            disable_event:<system>:<event>\n"
5567 #ifdef CONFIG_HIST_TRIGGERS
5568         "\t            enable_hist:<system>:<event>\n"
5569         "\t            disable_hist:<system>:<event>\n"
5570 #endif
5571 #ifdef CONFIG_STACKTRACE
5572         "\t\t    stacktrace\n"
5573 #endif
5574 #ifdef CONFIG_TRACER_SNAPSHOT
5575         "\t\t    snapshot\n"
5576 #endif
5577 #ifdef CONFIG_HIST_TRIGGERS
5578         "\t\t    hist (see below)\n"
5579 #endif
5580         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5581         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5582         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5583         "\t                  events/block/block_unplug/trigger\n"
5584         "\t   The first disables tracing every time block_unplug is hit.\n"
5585         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5586         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5587         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5588         "\t   Like function triggers, the counter is only decremented if it\n"
5589         "\t    enabled or disabled tracing.\n"
5590         "\t   To remove a trigger without a count:\n"
5591         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5592         "\t   To remove a trigger with a count:\n"
5593         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5594         "\t   Filters can be ignored when removing a trigger.\n"
5595 #ifdef CONFIG_HIST_TRIGGERS
5596         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5597         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5598         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5599         "\t            [:values=<field1[,field2,...]>]\n"
5600         "\t            [:sort=<field1[,field2,...]>]\n"
5601         "\t            [:size=#entries]\n"
5602         "\t            [:pause][:continue][:clear]\n"
5603         "\t            [:name=histname1]\n"
5604         "\t            [:nohitcount]\n"
5605         "\t            [:<handler>.<action>]\n"
5606         "\t            [if <filter>]\n\n"
5607         "\t    Note, special fields can be used as well:\n"
5608         "\t            common_timestamp - to record current timestamp\n"
5609         "\t            common_cpu - to record the CPU the event happened on\n"
5610         "\n"
5611         "\t    A hist trigger variable can be:\n"
5612         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5613         "\t        - a reference to another variable e.g. y=$x,\n"
5614         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5615         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5616         "\n"
5617         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5618         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5619         "\t    variable reference, field or numeric literal.\n"
5620         "\n"
5621         "\t    When a matching event is hit, an entry is added to a hash\n"
5622         "\t    table using the key(s) and value(s) named, and the value of a\n"
5623         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5624         "\t    correspond to fields in the event's format description.  Keys\n"
5625         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5626         "\t    Compound keys consisting of up to two fields can be specified\n"
5627         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5628         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5629         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5630         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5631         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5632         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5633         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5634         "\t    its histogram data will be shared with other triggers of the\n"
5635         "\t    same name, and trigger hits will update this common data.\n\n"
5636         "\t    Reading the 'hist' file for the event will dump the hash\n"
5637         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5638         "\t    triggers attached to an event, there will be a table for each\n"
5639         "\t    trigger in the output.  The table displayed for a named\n"
5640         "\t    trigger will be the same as any other instance having the\n"
5641         "\t    same name.  The default format used to display a given field\n"
5642         "\t    can be modified by appending any of the following modifiers\n"
5643         "\t    to the field name, as applicable:\n\n"
5644         "\t            .hex        display a number as a hex value\n"
5645         "\t            .sym        display an address as a symbol\n"
5646         "\t            .sym-offset display an address as a symbol and offset\n"
5647         "\t            .execname   display a common_pid as a program name\n"
5648         "\t            .syscall    display a syscall id as a syscall name\n"
5649         "\t            .log2       display log2 value rather than raw number\n"
5650         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5651         "\t            .usecs      display a common_timestamp in microseconds\n"
5652         "\t            .percent    display a number of percentage value\n"
5653         "\t            .graph      display a bar-graph of a value\n\n"
5654         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5655         "\t    trigger or to start a hist trigger but not log any events\n"
5656         "\t    until told to do so.  'continue' can be used to start or\n"
5657         "\t    restart a paused hist trigger.\n\n"
5658         "\t    The 'clear' parameter will clear the contents of a running\n"
5659         "\t    hist trigger and leave its current paused/active state\n"
5660         "\t    unchanged.\n\n"
5661         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5662         "\t    raw hitcount in the histogram.\n\n"
5663         "\t    The enable_hist and disable_hist triggers can be used to\n"
5664         "\t    have one event conditionally start and stop another event's\n"
5665         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5666         "\t    the enable_event and disable_event triggers.\n\n"
5667         "\t    Hist trigger handlers and actions are executed whenever a\n"
5668         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5669         "\t        <handler>.<action>\n\n"
5670         "\t    The available handlers are:\n\n"
5671         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5672         "\t        onmax(var)               - invoke if var exceeds current max\n"
5673         "\t        onchange(var)            - invoke action if var changes\n\n"
5674         "\t    The available actions are:\n\n"
5675         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5676         "\t        save(field,...)                      - save current event fields\n"
5677 #ifdef CONFIG_TRACER_SNAPSHOT
5678         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5679 #endif
5680 #ifdef CONFIG_SYNTH_EVENTS
5681         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5682         "\t  Write into this file to define/undefine new synthetic events.\n"
5683         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5684 #endif
5685 #endif
5686 ;
5687
5688 static ssize_t
5689 tracing_readme_read(struct file *filp, char __user *ubuf,
5690                        size_t cnt, loff_t *ppos)
5691 {
5692         return simple_read_from_buffer(ubuf, cnt, ppos,
5693                                         readme_msg, strlen(readme_msg));
5694 }
5695
5696 static const struct file_operations tracing_readme_fops = {
5697         .open           = tracing_open_generic,
5698         .read           = tracing_readme_read,
5699         .llseek         = generic_file_llseek,
5700 };
5701
5702 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5703 static union trace_eval_map_item *
5704 update_eval_map(union trace_eval_map_item *ptr)
5705 {
5706         if (!ptr->map.eval_string) {
5707                 if (ptr->tail.next) {
5708                         ptr = ptr->tail.next;
5709                         /* Set ptr to the next real item (skip head) */
5710                         ptr++;
5711                 } else
5712                         return NULL;
5713         }
5714         return ptr;
5715 }
5716
5717 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5718 {
5719         union trace_eval_map_item *ptr = v;
5720
5721         /*
5722          * Paranoid! If ptr points to end, we don't want to increment past it.
5723          * This really should never happen.
5724          */
5725         (*pos)++;
5726         ptr = update_eval_map(ptr);
5727         if (WARN_ON_ONCE(!ptr))
5728                 return NULL;
5729
5730         ptr++;
5731         ptr = update_eval_map(ptr);
5732
5733         return ptr;
5734 }
5735
5736 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5737 {
5738         union trace_eval_map_item *v;
5739         loff_t l = 0;
5740
5741         mutex_lock(&trace_eval_mutex);
5742
5743         v = trace_eval_maps;
5744         if (v)
5745                 v++;
5746
5747         while (v && l < *pos) {
5748                 v = eval_map_next(m, v, &l);
5749         }
5750
5751         return v;
5752 }
5753
5754 static void eval_map_stop(struct seq_file *m, void *v)
5755 {
5756         mutex_unlock(&trace_eval_mutex);
5757 }
5758
5759 static int eval_map_show(struct seq_file *m, void *v)
5760 {
5761         union trace_eval_map_item *ptr = v;
5762
5763         seq_printf(m, "%s %ld (%s)\n",
5764                    ptr->map.eval_string, ptr->map.eval_value,
5765                    ptr->map.system);
5766
5767         return 0;
5768 }
5769
5770 static const struct seq_operations tracing_eval_map_seq_ops = {
5771         .start          = eval_map_start,
5772         .next           = eval_map_next,
5773         .stop           = eval_map_stop,
5774         .show           = eval_map_show,
5775 };
5776
5777 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5778 {
5779         int ret;
5780
5781         ret = tracing_check_open_get_tr(NULL);
5782         if (ret)
5783                 return ret;
5784
5785         return seq_open(filp, &tracing_eval_map_seq_ops);
5786 }
5787
5788 static const struct file_operations tracing_eval_map_fops = {
5789         .open           = tracing_eval_map_open,
5790         .read           = seq_read,
5791         .llseek         = seq_lseek,
5792         .release        = seq_release,
5793 };
5794
5795 static inline union trace_eval_map_item *
5796 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5797 {
5798         /* Return tail of array given the head */
5799         return ptr + ptr->head.length + 1;
5800 }
5801
5802 static void
5803 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5804                            int len)
5805 {
5806         struct trace_eval_map **stop;
5807         struct trace_eval_map **map;
5808         union trace_eval_map_item *map_array;
5809         union trace_eval_map_item *ptr;
5810
5811         stop = start + len;
5812
5813         /*
5814          * The trace_eval_maps contains the map plus a head and tail item,
5815          * where the head holds the module and length of array, and the
5816          * tail holds a pointer to the next list.
5817          */
5818         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5819         if (!map_array) {
5820                 pr_warn("Unable to allocate trace eval mapping\n");
5821                 return;
5822         }
5823
5824         mutex_lock(&trace_eval_mutex);
5825
5826         if (!trace_eval_maps)
5827                 trace_eval_maps = map_array;
5828         else {
5829                 ptr = trace_eval_maps;
5830                 for (;;) {
5831                         ptr = trace_eval_jmp_to_tail(ptr);
5832                         if (!ptr->tail.next)
5833                                 break;
5834                         ptr = ptr->tail.next;
5835
5836                 }
5837                 ptr->tail.next = map_array;
5838         }
5839         map_array->head.mod = mod;
5840         map_array->head.length = len;
5841         map_array++;
5842
5843         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5844                 map_array->map = **map;
5845                 map_array++;
5846         }
5847         memset(map_array, 0, sizeof(*map_array));
5848
5849         mutex_unlock(&trace_eval_mutex);
5850 }
5851
5852 static void trace_create_eval_file(struct dentry *d_tracer)
5853 {
5854         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5855                           NULL, &tracing_eval_map_fops);
5856 }
5857
5858 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5859 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5860 static inline void trace_insert_eval_map_file(struct module *mod,
5861                               struct trace_eval_map **start, int len) { }
5862 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5863
5864 static void trace_insert_eval_map(struct module *mod,
5865                                   struct trace_eval_map **start, int len)
5866 {
5867         struct trace_eval_map **map;
5868
5869         if (len <= 0)
5870                 return;
5871
5872         map = start;
5873
5874         trace_event_eval_update(map, len);
5875
5876         trace_insert_eval_map_file(mod, start, len);
5877 }
5878
5879 static ssize_t
5880 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5881                        size_t cnt, loff_t *ppos)
5882 {
5883         struct trace_array *tr = filp->private_data;
5884         char buf[MAX_TRACER_SIZE+2];
5885         int r;
5886
5887         mutex_lock(&trace_types_lock);
5888         r = sprintf(buf, "%s\n", tr->current_trace->name);
5889         mutex_unlock(&trace_types_lock);
5890
5891         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5892 }
5893
5894 int tracer_init(struct tracer *t, struct trace_array *tr)
5895 {
5896         tracing_reset_online_cpus(&tr->array_buffer);
5897         return t->init(tr);
5898 }
5899
5900 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5901 {
5902         int cpu;
5903
5904         for_each_tracing_cpu(cpu)
5905                 per_cpu_ptr(buf->data, cpu)->entries = val;
5906 }
5907
5908 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5909 {
5910         if (cpu == RING_BUFFER_ALL_CPUS) {
5911                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5912         } else {
5913                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5914         }
5915 }
5916
5917 #ifdef CONFIG_TRACER_MAX_TRACE
5918 /* resize @tr's buffer to the size of @size_tr's entries */
5919 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5920                                         struct array_buffer *size_buf, int cpu_id)
5921 {
5922         int cpu, ret = 0;
5923
5924         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5925                 for_each_tracing_cpu(cpu) {
5926                         ret = ring_buffer_resize(trace_buf->buffer,
5927                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5928                         if (ret < 0)
5929                                 break;
5930                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5931                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5932                 }
5933         } else {
5934                 ret = ring_buffer_resize(trace_buf->buffer,
5935                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5936                 if (ret == 0)
5937                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5938                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5939         }
5940
5941         return ret;
5942 }
5943 #endif /* CONFIG_TRACER_MAX_TRACE */
5944
5945 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5946                                         unsigned long size, int cpu)
5947 {
5948         int ret;
5949
5950         /*
5951          * If kernel or user changes the size of the ring buffer
5952          * we use the size that was given, and we can forget about
5953          * expanding it later.
5954          */
5955         trace_set_ring_buffer_expanded(tr);
5956
5957         /* May be called before buffers are initialized */
5958         if (!tr->array_buffer.buffer)
5959                 return 0;
5960
5961         /* Do not allow tracing while resizing ring buffer */
5962         tracing_stop_tr(tr);
5963
5964         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5965         if (ret < 0)
5966                 goto out_start;
5967
5968 #ifdef CONFIG_TRACER_MAX_TRACE
5969         if (!tr->allocated_snapshot)
5970                 goto out;
5971
5972         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5973         if (ret < 0) {
5974                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5975                                                      &tr->array_buffer, cpu);
5976                 if (r < 0) {
5977                         /*
5978                          * AARGH! We are left with different
5979                          * size max buffer!!!!
5980                          * The max buffer is our "snapshot" buffer.
5981                          * When a tracer needs a snapshot (one of the
5982                          * latency tracers), it swaps the max buffer
5983                          * with the saved snap shot. We succeeded to
5984                          * update the size of the main buffer, but failed to
5985                          * update the size of the max buffer. But when we tried
5986                          * to reset the main buffer to the original size, we
5987                          * failed there too. This is very unlikely to
5988                          * happen, but if it does, warn and kill all
5989                          * tracing.
5990                          */
5991                         WARN_ON(1);
5992                         tracing_disabled = 1;
5993                 }
5994                 goto out_start;
5995         }
5996
5997         update_buffer_entries(&tr->max_buffer, cpu);
5998
5999  out:
6000 #endif /* CONFIG_TRACER_MAX_TRACE */
6001
6002         update_buffer_entries(&tr->array_buffer, cpu);
6003  out_start:
6004         tracing_start_tr(tr);
6005         return ret;
6006 }
6007
6008 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6009                                   unsigned long size, int cpu_id)
6010 {
6011         int ret;
6012
6013         mutex_lock(&trace_types_lock);
6014
6015         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6016                 /* make sure, this cpu is enabled in the mask */
6017                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6018                         ret = -EINVAL;
6019                         goto out;
6020                 }
6021         }
6022
6023         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6024         if (ret < 0)
6025                 ret = -ENOMEM;
6026
6027 out:
6028         mutex_unlock(&trace_types_lock);
6029
6030         return ret;
6031 }
6032
6033
6034 /**
6035  * tracing_update_buffers - used by tracing facility to expand ring buffers
6036  * @tr: The tracing instance
6037  *
6038  * To save on memory when the tracing is never used on a system with it
6039  * configured in. The ring buffers are set to a minimum size. But once
6040  * a user starts to use the tracing facility, then they need to grow
6041  * to their default size.
6042  *
6043  * This function is to be called when a tracer is about to be used.
6044  */
6045 int tracing_update_buffers(struct trace_array *tr)
6046 {
6047         int ret = 0;
6048
6049         mutex_lock(&trace_types_lock);
6050         if (!tr->ring_buffer_expanded)
6051                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6052                                                 RING_BUFFER_ALL_CPUS);
6053         mutex_unlock(&trace_types_lock);
6054
6055         return ret;
6056 }
6057
6058 struct trace_option_dentry;
6059
6060 static void
6061 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6062
6063 /*
6064  * Used to clear out the tracer before deletion of an instance.
6065  * Must have trace_types_lock held.
6066  */
6067 static void tracing_set_nop(struct trace_array *tr)
6068 {
6069         if (tr->current_trace == &nop_trace)
6070                 return;
6071         
6072         tr->current_trace->enabled--;
6073
6074         if (tr->current_trace->reset)
6075                 tr->current_trace->reset(tr);
6076
6077         tr->current_trace = &nop_trace;
6078 }
6079
6080 static bool tracer_options_updated;
6081
6082 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6083 {
6084         /* Only enable if the directory has been created already. */
6085         if (!tr->dir)
6086                 return;
6087
6088         /* Only create trace option files after update_tracer_options finish */
6089         if (!tracer_options_updated)
6090                 return;
6091
6092         create_trace_option_files(tr, t);
6093 }
6094
6095 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6096 {
6097         struct tracer *t;
6098 #ifdef CONFIG_TRACER_MAX_TRACE
6099         bool had_max_tr;
6100 #endif
6101         int ret = 0;
6102
6103         mutex_lock(&trace_types_lock);
6104
6105         if (!tr->ring_buffer_expanded) {
6106                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6107                                                 RING_BUFFER_ALL_CPUS);
6108                 if (ret < 0)
6109                         goto out;
6110                 ret = 0;
6111         }
6112
6113         for (t = trace_types; t; t = t->next) {
6114                 if (strcmp(t->name, buf) == 0)
6115                         break;
6116         }
6117         if (!t) {
6118                 ret = -EINVAL;
6119                 goto out;
6120         }
6121         if (t == tr->current_trace)
6122                 goto out;
6123
6124 #ifdef CONFIG_TRACER_SNAPSHOT
6125         if (t->use_max_tr) {
6126                 local_irq_disable();
6127                 arch_spin_lock(&tr->max_lock);
6128                 if (tr->cond_snapshot)
6129                         ret = -EBUSY;
6130                 arch_spin_unlock(&tr->max_lock);
6131                 local_irq_enable();
6132                 if (ret)
6133                         goto out;
6134         }
6135 #endif
6136         /* Some tracers won't work on kernel command line */
6137         if (system_state < SYSTEM_RUNNING && t->noboot) {
6138                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6139                         t->name);
6140                 goto out;
6141         }
6142
6143         /* Some tracers are only allowed for the top level buffer */
6144         if (!trace_ok_for_array(t, tr)) {
6145                 ret = -EINVAL;
6146                 goto out;
6147         }
6148
6149         /* If trace pipe files are being read, we can't change the tracer */
6150         if (tr->trace_ref) {
6151                 ret = -EBUSY;
6152                 goto out;
6153         }
6154
6155         trace_branch_disable();
6156
6157         tr->current_trace->enabled--;
6158
6159         if (tr->current_trace->reset)
6160                 tr->current_trace->reset(tr);
6161
6162 #ifdef CONFIG_TRACER_MAX_TRACE
6163         had_max_tr = tr->current_trace->use_max_tr;
6164
6165         /* Current trace needs to be nop_trace before synchronize_rcu */
6166         tr->current_trace = &nop_trace;
6167
6168         if (had_max_tr && !t->use_max_tr) {
6169                 /*
6170                  * We need to make sure that the update_max_tr sees that
6171                  * current_trace changed to nop_trace to keep it from
6172                  * swapping the buffers after we resize it.
6173                  * The update_max_tr is called from interrupts disabled
6174                  * so a synchronized_sched() is sufficient.
6175                  */
6176                 synchronize_rcu();
6177                 free_snapshot(tr);
6178                 tracing_disarm_snapshot(tr);
6179         }
6180
6181         if (!had_max_tr && t->use_max_tr) {
6182                 ret = tracing_arm_snapshot_locked(tr);
6183                 if (ret)
6184                         goto out;
6185         }
6186 #else
6187         tr->current_trace = &nop_trace;
6188 #endif
6189
6190         if (t->init) {
6191                 ret = tracer_init(t, tr);
6192                 if (ret) {
6193 #ifdef CONFIG_TRACER_MAX_TRACE
6194                         if (t->use_max_tr)
6195                                 tracing_disarm_snapshot(tr);
6196 #endif
6197                         goto out;
6198                 }
6199         }
6200
6201         tr->current_trace = t;
6202         tr->current_trace->enabled++;
6203         trace_branch_enable(tr);
6204  out:
6205         mutex_unlock(&trace_types_lock);
6206
6207         return ret;
6208 }
6209
6210 static ssize_t
6211 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6212                         size_t cnt, loff_t *ppos)
6213 {
6214         struct trace_array *tr = filp->private_data;
6215         char buf[MAX_TRACER_SIZE+1];
6216         char *name;
6217         size_t ret;
6218         int err;
6219
6220         ret = cnt;
6221
6222         if (cnt > MAX_TRACER_SIZE)
6223                 cnt = MAX_TRACER_SIZE;
6224
6225         if (copy_from_user(buf, ubuf, cnt))
6226                 return -EFAULT;
6227
6228         buf[cnt] = 0;
6229
6230         name = strim(buf);
6231
6232         err = tracing_set_tracer(tr, name);
6233         if (err)
6234                 return err;
6235
6236         *ppos += ret;
6237
6238         return ret;
6239 }
6240
6241 static ssize_t
6242 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6243                    size_t cnt, loff_t *ppos)
6244 {
6245         char buf[64];
6246         int r;
6247
6248         r = snprintf(buf, sizeof(buf), "%ld\n",
6249                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6250         if (r > sizeof(buf))
6251                 r = sizeof(buf);
6252         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6253 }
6254
6255 static ssize_t
6256 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6257                     size_t cnt, loff_t *ppos)
6258 {
6259         unsigned long val;
6260         int ret;
6261
6262         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6263         if (ret)
6264                 return ret;
6265
6266         *ptr = val * 1000;
6267
6268         return cnt;
6269 }
6270
6271 static ssize_t
6272 tracing_thresh_read(struct file *filp, char __user *ubuf,
6273                     size_t cnt, loff_t *ppos)
6274 {
6275         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6276 }
6277
6278 static ssize_t
6279 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6280                      size_t cnt, loff_t *ppos)
6281 {
6282         struct trace_array *tr = filp->private_data;
6283         int ret;
6284
6285         mutex_lock(&trace_types_lock);
6286         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6287         if (ret < 0)
6288                 goto out;
6289
6290         if (tr->current_trace->update_thresh) {
6291                 ret = tr->current_trace->update_thresh(tr);
6292                 if (ret < 0)
6293                         goto out;
6294         }
6295
6296         ret = cnt;
6297 out:
6298         mutex_unlock(&trace_types_lock);
6299
6300         return ret;
6301 }
6302
6303 #ifdef CONFIG_TRACER_MAX_TRACE
6304
6305 static ssize_t
6306 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6307                      size_t cnt, loff_t *ppos)
6308 {
6309         struct trace_array *tr = filp->private_data;
6310
6311         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6312 }
6313
6314 static ssize_t
6315 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6316                       size_t cnt, loff_t *ppos)
6317 {
6318         struct trace_array *tr = filp->private_data;
6319
6320         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6321 }
6322
6323 #endif
6324
6325 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6326 {
6327         if (cpu == RING_BUFFER_ALL_CPUS) {
6328                 if (cpumask_empty(tr->pipe_cpumask)) {
6329                         cpumask_setall(tr->pipe_cpumask);
6330                         return 0;
6331                 }
6332         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6333                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6334                 return 0;
6335         }
6336         return -EBUSY;
6337 }
6338
6339 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6340 {
6341         if (cpu == RING_BUFFER_ALL_CPUS) {
6342                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6343                 cpumask_clear(tr->pipe_cpumask);
6344         } else {
6345                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6346                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6347         }
6348 }
6349
6350 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6351 {
6352         struct trace_array *tr = inode->i_private;
6353         struct trace_iterator *iter;
6354         int cpu;
6355         int ret;
6356
6357         ret = tracing_check_open_get_tr(tr);
6358         if (ret)
6359                 return ret;
6360
6361         mutex_lock(&trace_types_lock);
6362         cpu = tracing_get_cpu(inode);
6363         ret = open_pipe_on_cpu(tr, cpu);
6364         if (ret)
6365                 goto fail_pipe_on_cpu;
6366
6367         /* create a buffer to store the information to pass to userspace */
6368         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6369         if (!iter) {
6370                 ret = -ENOMEM;
6371                 goto fail_alloc_iter;
6372         }
6373
6374         trace_seq_init(&iter->seq);
6375         iter->trace = tr->current_trace;
6376
6377         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6378                 ret = -ENOMEM;
6379                 goto fail;
6380         }
6381
6382         /* trace pipe does not show start of buffer */
6383         cpumask_setall(iter->started);
6384
6385         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6386                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6387
6388         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6389         if (trace_clocks[tr->clock_id].in_ns)
6390                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6391
6392         iter->tr = tr;
6393         iter->array_buffer = &tr->array_buffer;
6394         iter->cpu_file = cpu;
6395         mutex_init(&iter->mutex);
6396         filp->private_data = iter;
6397
6398         if (iter->trace->pipe_open)
6399                 iter->trace->pipe_open(iter);
6400
6401         nonseekable_open(inode, filp);
6402
6403         tr->trace_ref++;
6404
6405         mutex_unlock(&trace_types_lock);
6406         return ret;
6407
6408 fail:
6409         kfree(iter);
6410 fail_alloc_iter:
6411         close_pipe_on_cpu(tr, cpu);
6412 fail_pipe_on_cpu:
6413         __trace_array_put(tr);
6414         mutex_unlock(&trace_types_lock);
6415         return ret;
6416 }
6417
6418 static int tracing_release_pipe(struct inode *inode, struct file *file)
6419 {
6420         struct trace_iterator *iter = file->private_data;
6421         struct trace_array *tr = inode->i_private;
6422
6423         mutex_lock(&trace_types_lock);
6424
6425         tr->trace_ref--;
6426
6427         if (iter->trace->pipe_close)
6428                 iter->trace->pipe_close(iter);
6429         close_pipe_on_cpu(tr, iter->cpu_file);
6430         mutex_unlock(&trace_types_lock);
6431
6432         free_trace_iter_content(iter);
6433         kfree(iter);
6434
6435         trace_array_put(tr);
6436
6437         return 0;
6438 }
6439
6440 static __poll_t
6441 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6442 {
6443         struct trace_array *tr = iter->tr;
6444
6445         /* Iterators are static, they should be filled or empty */
6446         if (trace_buffer_iter(iter, iter->cpu_file))
6447                 return EPOLLIN | EPOLLRDNORM;
6448
6449         if (tr->trace_flags & TRACE_ITER_BLOCK)
6450                 /*
6451                  * Always select as readable when in blocking mode
6452                  */
6453                 return EPOLLIN | EPOLLRDNORM;
6454         else
6455                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6456                                              filp, poll_table, iter->tr->buffer_percent);
6457 }
6458
6459 static __poll_t
6460 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6461 {
6462         struct trace_iterator *iter = filp->private_data;
6463
6464         return trace_poll(iter, filp, poll_table);
6465 }
6466
6467 /* Must be called with iter->mutex held. */
6468 static int tracing_wait_pipe(struct file *filp)
6469 {
6470         struct trace_iterator *iter = filp->private_data;
6471         int ret;
6472
6473         while (trace_empty(iter)) {
6474
6475                 if ((filp->f_flags & O_NONBLOCK)) {
6476                         return -EAGAIN;
6477                 }
6478
6479                 /*
6480                  * We block until we read something and tracing is disabled.
6481                  * We still block if tracing is disabled, but we have never
6482                  * read anything. This allows a user to cat this file, and
6483                  * then enable tracing. But after we have read something,
6484                  * we give an EOF when tracing is again disabled.
6485                  *
6486                  * iter->pos will be 0 if we haven't read anything.
6487                  */
6488                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6489                         break;
6490
6491                 mutex_unlock(&iter->mutex);
6492
6493                 ret = wait_on_pipe(iter, 0);
6494
6495                 mutex_lock(&iter->mutex);
6496
6497                 if (ret)
6498                         return ret;
6499         }
6500
6501         return 1;
6502 }
6503
6504 /*
6505  * Consumer reader.
6506  */
6507 static ssize_t
6508 tracing_read_pipe(struct file *filp, char __user *ubuf,
6509                   size_t cnt, loff_t *ppos)
6510 {
6511         struct trace_iterator *iter = filp->private_data;
6512         ssize_t sret;
6513
6514         /*
6515          * Avoid more than one consumer on a single file descriptor
6516          * This is just a matter of traces coherency, the ring buffer itself
6517          * is protected.
6518          */
6519         mutex_lock(&iter->mutex);
6520
6521         /* return any leftover data */
6522         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6523         if (sret != -EBUSY)
6524                 goto out;
6525
6526         trace_seq_init(&iter->seq);
6527
6528         if (iter->trace->read) {
6529                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6530                 if (sret)
6531                         goto out;
6532         }
6533
6534 waitagain:
6535         sret = tracing_wait_pipe(filp);
6536         if (sret <= 0)
6537                 goto out;
6538
6539         /* stop when tracing is finished */
6540         if (trace_empty(iter)) {
6541                 sret = 0;
6542                 goto out;
6543         }
6544
6545         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6546                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6547
6548         /* reset all but tr, trace, and overruns */
6549         trace_iterator_reset(iter);
6550         cpumask_clear(iter->started);
6551         trace_seq_init(&iter->seq);
6552
6553         trace_event_read_lock();
6554         trace_access_lock(iter->cpu_file);
6555         while (trace_find_next_entry_inc(iter) != NULL) {
6556                 enum print_line_t ret;
6557                 int save_len = iter->seq.seq.len;
6558
6559                 ret = print_trace_line(iter);
6560                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6561                         /*
6562                          * If one print_trace_line() fills entire trace_seq in one shot,
6563                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6564                          * In this case, we need to consume it, otherwise, loop will peek
6565                          * this event next time, resulting in an infinite loop.
6566                          */
6567                         if (save_len == 0) {
6568                                 iter->seq.full = 0;
6569                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6570                                 trace_consume(iter);
6571                                 break;
6572                         }
6573
6574                         /* In other cases, don't print partial lines */
6575                         iter->seq.seq.len = save_len;
6576                         break;
6577                 }
6578                 if (ret != TRACE_TYPE_NO_CONSUME)
6579                         trace_consume(iter);
6580
6581                 if (trace_seq_used(&iter->seq) >= cnt)
6582                         break;
6583
6584                 /*
6585                  * Setting the full flag means we reached the trace_seq buffer
6586                  * size and we should leave by partial output condition above.
6587                  * One of the trace_seq_* functions is not used properly.
6588                  */
6589                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6590                           iter->ent->type);
6591         }
6592         trace_access_unlock(iter->cpu_file);
6593         trace_event_read_unlock();
6594
6595         /* Now copy what we have to the user */
6596         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6597         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6598                 trace_seq_init(&iter->seq);
6599
6600         /*
6601          * If there was nothing to send to user, in spite of consuming trace
6602          * entries, go back to wait for more entries.
6603          */
6604         if (sret == -EBUSY)
6605                 goto waitagain;
6606
6607 out:
6608         mutex_unlock(&iter->mutex);
6609
6610         return sret;
6611 }
6612
6613 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6614                                      unsigned int idx)
6615 {
6616         __free_page(spd->pages[idx]);
6617 }
6618
6619 static size_t
6620 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6621 {
6622         size_t count;
6623         int save_len;
6624         int ret;
6625
6626         /* Seq buffer is page-sized, exactly what we need. */
6627         for (;;) {
6628                 save_len = iter->seq.seq.len;
6629                 ret = print_trace_line(iter);
6630
6631                 if (trace_seq_has_overflowed(&iter->seq)) {
6632                         iter->seq.seq.len = save_len;
6633                         break;
6634                 }
6635
6636                 /*
6637                  * This should not be hit, because it should only
6638                  * be set if the iter->seq overflowed. But check it
6639                  * anyway to be safe.
6640                  */
6641                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6642                         iter->seq.seq.len = save_len;
6643                         break;
6644                 }
6645
6646                 count = trace_seq_used(&iter->seq) - save_len;
6647                 if (rem < count) {
6648                         rem = 0;
6649                         iter->seq.seq.len = save_len;
6650                         break;
6651                 }
6652
6653                 if (ret != TRACE_TYPE_NO_CONSUME)
6654                         trace_consume(iter);
6655                 rem -= count;
6656                 if (!trace_find_next_entry_inc(iter))   {
6657                         rem = 0;
6658                         iter->ent = NULL;
6659                         break;
6660                 }
6661         }
6662
6663         return rem;
6664 }
6665
6666 static ssize_t tracing_splice_read_pipe(struct file *filp,
6667                                         loff_t *ppos,
6668                                         struct pipe_inode_info *pipe,
6669                                         size_t len,
6670                                         unsigned int flags)
6671 {
6672         struct page *pages_def[PIPE_DEF_BUFFERS];
6673         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6674         struct trace_iterator *iter = filp->private_data;
6675         struct splice_pipe_desc spd = {
6676                 .pages          = pages_def,
6677                 .partial        = partial_def,
6678                 .nr_pages       = 0, /* This gets updated below. */
6679                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6680                 .ops            = &default_pipe_buf_ops,
6681                 .spd_release    = tracing_spd_release_pipe,
6682         };
6683         ssize_t ret;
6684         size_t rem;
6685         unsigned int i;
6686
6687         if (splice_grow_spd(pipe, &spd))
6688                 return -ENOMEM;
6689
6690         mutex_lock(&iter->mutex);
6691
6692         if (iter->trace->splice_read) {
6693                 ret = iter->trace->splice_read(iter, filp,
6694                                                ppos, pipe, len, flags);
6695                 if (ret)
6696                         goto out_err;
6697         }
6698
6699         ret = tracing_wait_pipe(filp);
6700         if (ret <= 0)
6701                 goto out_err;
6702
6703         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6704                 ret = -EFAULT;
6705                 goto out_err;
6706         }
6707
6708         trace_event_read_lock();
6709         trace_access_lock(iter->cpu_file);
6710
6711         /* Fill as many pages as possible. */
6712         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6713                 spd.pages[i] = alloc_page(GFP_KERNEL);
6714                 if (!spd.pages[i])
6715                         break;
6716
6717                 rem = tracing_fill_pipe_page(rem, iter);
6718
6719                 /* Copy the data into the page, so we can start over. */
6720                 ret = trace_seq_to_buffer(&iter->seq,
6721                                           page_address(spd.pages[i]),
6722                                           trace_seq_used(&iter->seq));
6723                 if (ret < 0) {
6724                         __free_page(spd.pages[i]);
6725                         break;
6726                 }
6727                 spd.partial[i].offset = 0;
6728                 spd.partial[i].len = trace_seq_used(&iter->seq);
6729
6730                 trace_seq_init(&iter->seq);
6731         }
6732
6733         trace_access_unlock(iter->cpu_file);
6734         trace_event_read_unlock();
6735         mutex_unlock(&iter->mutex);
6736
6737         spd.nr_pages = i;
6738
6739         if (i)
6740                 ret = splice_to_pipe(pipe, &spd);
6741         else
6742                 ret = 0;
6743 out:
6744         splice_shrink_spd(&spd);
6745         return ret;
6746
6747 out_err:
6748         mutex_unlock(&iter->mutex);
6749         goto out;
6750 }
6751
6752 static ssize_t
6753 tracing_entries_read(struct file *filp, char __user *ubuf,
6754                      size_t cnt, loff_t *ppos)
6755 {
6756         struct inode *inode = file_inode(filp);
6757         struct trace_array *tr = inode->i_private;
6758         int cpu = tracing_get_cpu(inode);
6759         char buf[64];
6760         int r = 0;
6761         ssize_t ret;
6762
6763         mutex_lock(&trace_types_lock);
6764
6765         if (cpu == RING_BUFFER_ALL_CPUS) {
6766                 int cpu, buf_size_same;
6767                 unsigned long size;
6768
6769                 size = 0;
6770                 buf_size_same = 1;
6771                 /* check if all cpu sizes are same */
6772                 for_each_tracing_cpu(cpu) {
6773                         /* fill in the size from first enabled cpu */
6774                         if (size == 0)
6775                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6776                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6777                                 buf_size_same = 0;
6778                                 break;
6779                         }
6780                 }
6781
6782                 if (buf_size_same) {
6783                         if (!tr->ring_buffer_expanded)
6784                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6785                                             size >> 10,
6786                                             trace_buf_size >> 10);
6787                         else
6788                                 r = sprintf(buf, "%lu\n", size >> 10);
6789                 } else
6790                         r = sprintf(buf, "X\n");
6791         } else
6792                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6793
6794         mutex_unlock(&trace_types_lock);
6795
6796         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6797         return ret;
6798 }
6799
6800 static ssize_t
6801 tracing_entries_write(struct file *filp, const char __user *ubuf,
6802                       size_t cnt, loff_t *ppos)
6803 {
6804         struct inode *inode = file_inode(filp);
6805         struct trace_array *tr = inode->i_private;
6806         unsigned long val;
6807         int ret;
6808
6809         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6810         if (ret)
6811                 return ret;
6812
6813         /* must have at least 1 entry */
6814         if (!val)
6815                 return -EINVAL;
6816
6817         /* value is in KB */
6818         val <<= 10;
6819         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6820         if (ret < 0)
6821                 return ret;
6822
6823         *ppos += cnt;
6824
6825         return cnt;
6826 }
6827
6828 static ssize_t
6829 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6830                                 size_t cnt, loff_t *ppos)
6831 {
6832         struct trace_array *tr = filp->private_data;
6833         char buf[64];
6834         int r, cpu;
6835         unsigned long size = 0, expanded_size = 0;
6836
6837         mutex_lock(&trace_types_lock);
6838         for_each_tracing_cpu(cpu) {
6839                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6840                 if (!tr->ring_buffer_expanded)
6841                         expanded_size += trace_buf_size >> 10;
6842         }
6843         if (tr->ring_buffer_expanded)
6844                 r = sprintf(buf, "%lu\n", size);
6845         else
6846                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6847         mutex_unlock(&trace_types_lock);
6848
6849         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6850 }
6851
6852 static ssize_t
6853 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6854                           size_t cnt, loff_t *ppos)
6855 {
6856         /*
6857          * There is no need to read what the user has written, this function
6858          * is just to make sure that there is no error when "echo" is used
6859          */
6860
6861         *ppos += cnt;
6862
6863         return cnt;
6864 }
6865
6866 static int
6867 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6868 {
6869         struct trace_array *tr = inode->i_private;
6870
6871         /* disable tracing ? */
6872         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6873                 tracer_tracing_off(tr);
6874         /* resize the ring buffer to 0 */
6875         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6876
6877         trace_array_put(tr);
6878
6879         return 0;
6880 }
6881
6882 #define TRACE_MARKER_MAX_SIZE           4096
6883
6884 static ssize_t
6885 tracing_mark_write(struct file *filp, const char __user *ubuf,
6886                                         size_t cnt, loff_t *fpos)
6887 {
6888         struct trace_array *tr = filp->private_data;
6889         struct ring_buffer_event *event;
6890         enum event_trigger_type tt = ETT_NONE;
6891         struct trace_buffer *buffer;
6892         struct print_entry *entry;
6893         int meta_size;
6894         ssize_t written;
6895         size_t size;
6896         int len;
6897
6898 /* Used in tracing_mark_raw_write() as well */
6899 #define FAULTED_STR "<faulted>"
6900 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6901
6902         if (tracing_disabled)
6903                 return -EINVAL;
6904
6905         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6906                 return -EINVAL;
6907
6908         if ((ssize_t)cnt < 0)
6909                 return -EINVAL;
6910
6911         if (cnt > TRACE_MARKER_MAX_SIZE)
6912                 cnt = TRACE_MARKER_MAX_SIZE;
6913
6914         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6915  again:
6916         size = cnt + meta_size;
6917
6918         /* If less than "<faulted>", then make sure we can still add that */
6919         if (cnt < FAULTED_SIZE)
6920                 size += FAULTED_SIZE - cnt;
6921
6922         buffer = tr->array_buffer.buffer;
6923         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6924                                             tracing_gen_ctx());
6925         if (unlikely(!event)) {
6926                 /*
6927                  * If the size was greater than what was allowed, then
6928                  * make it smaller and try again.
6929                  */
6930                 if (size > ring_buffer_max_event_size(buffer)) {
6931                         /* cnt < FAULTED size should never be bigger than max */
6932                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6933                                 return -EBADF;
6934                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
6935                         /* The above should only happen once */
6936                         if (WARN_ON_ONCE(cnt + meta_size == size))
6937                                 return -EBADF;
6938                         goto again;
6939                 }
6940
6941                 /* Ring buffer disabled, return as if not open for write */
6942                 return -EBADF;
6943         }
6944
6945         entry = ring_buffer_event_data(event);
6946         entry->ip = _THIS_IP_;
6947
6948         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6949         if (len) {
6950                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6951                 cnt = FAULTED_SIZE;
6952                 written = -EFAULT;
6953         } else
6954                 written = cnt;
6955
6956         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6957                 /* do not add \n before testing triggers, but add \0 */
6958                 entry->buf[cnt] = '\0';
6959                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6960         }
6961
6962         if (entry->buf[cnt - 1] != '\n') {
6963                 entry->buf[cnt] = '\n';
6964                 entry->buf[cnt + 1] = '\0';
6965         } else
6966                 entry->buf[cnt] = '\0';
6967
6968         if (static_branch_unlikely(&trace_marker_exports_enabled))
6969                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6970         __buffer_unlock_commit(buffer, event);
6971
6972         if (tt)
6973                 event_triggers_post_call(tr->trace_marker_file, tt);
6974
6975         return written;
6976 }
6977
6978 static ssize_t
6979 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6980                                         size_t cnt, loff_t *fpos)
6981 {
6982         struct trace_array *tr = filp->private_data;
6983         struct ring_buffer_event *event;
6984         struct trace_buffer *buffer;
6985         struct raw_data_entry *entry;
6986         ssize_t written;
6987         int size;
6988         int len;
6989
6990 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6991
6992         if (tracing_disabled)
6993                 return -EINVAL;
6994
6995         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6996                 return -EINVAL;
6997
6998         /* The marker must at least have a tag id */
6999         if (cnt < sizeof(unsigned int))
7000                 return -EINVAL;
7001
7002         size = sizeof(*entry) + cnt;
7003         if (cnt < FAULT_SIZE_ID)
7004                 size += FAULT_SIZE_ID - cnt;
7005
7006         buffer = tr->array_buffer.buffer;
7007
7008         if (size > ring_buffer_max_event_size(buffer))
7009                 return -EINVAL;
7010
7011         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7012                                             tracing_gen_ctx());
7013         if (!event)
7014                 /* Ring buffer disabled, return as if not open for write */
7015                 return -EBADF;
7016
7017         entry = ring_buffer_event_data(event);
7018
7019         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7020         if (len) {
7021                 entry->id = -1;
7022                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7023                 written = -EFAULT;
7024         } else
7025                 written = cnt;
7026
7027         __buffer_unlock_commit(buffer, event);
7028
7029         return written;
7030 }
7031
7032 static int tracing_clock_show(struct seq_file *m, void *v)
7033 {
7034         struct trace_array *tr = m->private;
7035         int i;
7036
7037         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7038                 seq_printf(m,
7039                         "%s%s%s%s", i ? " " : "",
7040                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7041                         i == tr->clock_id ? "]" : "");
7042         seq_putc(m, '\n');
7043
7044         return 0;
7045 }
7046
7047 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7048 {
7049         int i;
7050
7051         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7052                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7053                         break;
7054         }
7055         if (i == ARRAY_SIZE(trace_clocks))
7056                 return -EINVAL;
7057
7058         mutex_lock(&trace_types_lock);
7059
7060         tr->clock_id = i;
7061
7062         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7063
7064         /*
7065          * New clock may not be consistent with the previous clock.
7066          * Reset the buffer so that it doesn't have incomparable timestamps.
7067          */
7068         tracing_reset_online_cpus(&tr->array_buffer);
7069
7070 #ifdef CONFIG_TRACER_MAX_TRACE
7071         if (tr->max_buffer.buffer)
7072                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7073         tracing_reset_online_cpus(&tr->max_buffer);
7074 #endif
7075
7076         mutex_unlock(&trace_types_lock);
7077
7078         return 0;
7079 }
7080
7081 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7082                                    size_t cnt, loff_t *fpos)
7083 {
7084         struct seq_file *m = filp->private_data;
7085         struct trace_array *tr = m->private;
7086         char buf[64];
7087         const char *clockstr;
7088         int ret;
7089
7090         if (cnt >= sizeof(buf))
7091                 return -EINVAL;
7092
7093         if (copy_from_user(buf, ubuf, cnt))
7094                 return -EFAULT;
7095
7096         buf[cnt] = 0;
7097
7098         clockstr = strstrip(buf);
7099
7100         ret = tracing_set_clock(tr, clockstr);
7101         if (ret)
7102                 return ret;
7103
7104         *fpos += cnt;
7105
7106         return cnt;
7107 }
7108
7109 static int tracing_clock_open(struct inode *inode, struct file *file)
7110 {
7111         struct trace_array *tr = inode->i_private;
7112         int ret;
7113
7114         ret = tracing_check_open_get_tr(tr);
7115         if (ret)
7116                 return ret;
7117
7118         ret = single_open(file, tracing_clock_show, inode->i_private);
7119         if (ret < 0)
7120                 trace_array_put(tr);
7121
7122         return ret;
7123 }
7124
7125 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7126 {
7127         struct trace_array *tr = m->private;
7128
7129         mutex_lock(&trace_types_lock);
7130
7131         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7132                 seq_puts(m, "delta [absolute]\n");
7133         else
7134                 seq_puts(m, "[delta] absolute\n");
7135
7136         mutex_unlock(&trace_types_lock);
7137
7138         return 0;
7139 }
7140
7141 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7142 {
7143         struct trace_array *tr = inode->i_private;
7144         int ret;
7145
7146         ret = tracing_check_open_get_tr(tr);
7147         if (ret)
7148                 return ret;
7149
7150         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7151         if (ret < 0)
7152                 trace_array_put(tr);
7153
7154         return ret;
7155 }
7156
7157 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7158 {
7159         if (rbe == this_cpu_read(trace_buffered_event))
7160                 return ring_buffer_time_stamp(buffer);
7161
7162         return ring_buffer_event_time_stamp(buffer, rbe);
7163 }
7164
7165 /*
7166  * Set or disable using the per CPU trace_buffer_event when possible.
7167  */
7168 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7169 {
7170         int ret = 0;
7171
7172         mutex_lock(&trace_types_lock);
7173
7174         if (set && tr->no_filter_buffering_ref++)
7175                 goto out;
7176
7177         if (!set) {
7178                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7179                         ret = -EINVAL;
7180                         goto out;
7181                 }
7182
7183                 --tr->no_filter_buffering_ref;
7184         }
7185  out:
7186         mutex_unlock(&trace_types_lock);
7187
7188         return ret;
7189 }
7190
7191 struct ftrace_buffer_info {
7192         struct trace_iterator   iter;
7193         void                    *spare;
7194         unsigned int            spare_cpu;
7195         unsigned int            spare_size;
7196         unsigned int            read;
7197 };
7198
7199 #ifdef CONFIG_TRACER_SNAPSHOT
7200 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7201 {
7202         struct trace_array *tr = inode->i_private;
7203         struct trace_iterator *iter;
7204         struct seq_file *m;
7205         int ret;
7206
7207         ret = tracing_check_open_get_tr(tr);
7208         if (ret)
7209                 return ret;
7210
7211         if (file->f_mode & FMODE_READ) {
7212                 iter = __tracing_open(inode, file, true);
7213                 if (IS_ERR(iter))
7214                         ret = PTR_ERR(iter);
7215         } else {
7216                 /* Writes still need the seq_file to hold the private data */
7217                 ret = -ENOMEM;
7218                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7219                 if (!m)
7220                         goto out;
7221                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7222                 if (!iter) {
7223                         kfree(m);
7224                         goto out;
7225                 }
7226                 ret = 0;
7227
7228                 iter->tr = tr;
7229                 iter->array_buffer = &tr->max_buffer;
7230                 iter->cpu_file = tracing_get_cpu(inode);
7231                 m->private = iter;
7232                 file->private_data = m;
7233         }
7234 out:
7235         if (ret < 0)
7236                 trace_array_put(tr);
7237
7238         return ret;
7239 }
7240
7241 static void tracing_swap_cpu_buffer(void *tr)
7242 {
7243         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7244 }
7245
7246 static ssize_t
7247 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7248                        loff_t *ppos)
7249 {
7250         struct seq_file *m = filp->private_data;
7251         struct trace_iterator *iter = m->private;
7252         struct trace_array *tr = iter->tr;
7253         unsigned long val;
7254         int ret;
7255
7256         ret = tracing_update_buffers(tr);
7257         if (ret < 0)
7258                 return ret;
7259
7260         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7261         if (ret)
7262                 return ret;
7263
7264         mutex_lock(&trace_types_lock);
7265
7266         if (tr->current_trace->use_max_tr) {
7267                 ret = -EBUSY;
7268                 goto out;
7269         }
7270
7271         local_irq_disable();
7272         arch_spin_lock(&tr->max_lock);
7273         if (tr->cond_snapshot)
7274                 ret = -EBUSY;
7275         arch_spin_unlock(&tr->max_lock);
7276         local_irq_enable();
7277         if (ret)
7278                 goto out;
7279
7280         switch (val) {
7281         case 0:
7282                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7283                         ret = -EINVAL;
7284                         break;
7285                 }
7286                 if (tr->allocated_snapshot)
7287                         free_snapshot(tr);
7288                 break;
7289         case 1:
7290 /* Only allow per-cpu swap if the ring buffer supports it */
7291 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7292                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7293                         ret = -EINVAL;
7294                         break;
7295                 }
7296 #endif
7297                 if (tr->allocated_snapshot)
7298                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7299                                         &tr->array_buffer, iter->cpu_file);
7300
7301                 ret = tracing_arm_snapshot_locked(tr);
7302                 if (ret)
7303                         break;
7304
7305                 /* Now, we're going to swap */
7306                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7307                         local_irq_disable();
7308                         update_max_tr(tr, current, smp_processor_id(), NULL);
7309                         local_irq_enable();
7310                 } else {
7311                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7312                                                  (void *)tr, 1);
7313                 }
7314                 tracing_disarm_snapshot(tr);
7315                 break;
7316         default:
7317                 if (tr->allocated_snapshot) {
7318                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7319                                 tracing_reset_online_cpus(&tr->max_buffer);
7320                         else
7321                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7322                 }
7323                 break;
7324         }
7325
7326         if (ret >= 0) {
7327                 *ppos += cnt;
7328                 ret = cnt;
7329         }
7330 out:
7331         mutex_unlock(&trace_types_lock);
7332         return ret;
7333 }
7334
7335 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7336 {
7337         struct seq_file *m = file->private_data;
7338         int ret;
7339
7340         ret = tracing_release(inode, file);
7341
7342         if (file->f_mode & FMODE_READ)
7343                 return ret;
7344
7345         /* If write only, the seq_file is just a stub */
7346         if (m)
7347                 kfree(m->private);
7348         kfree(m);
7349
7350         return 0;
7351 }
7352
7353 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7354 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7355                                     size_t count, loff_t *ppos);
7356 static int tracing_buffers_release(struct inode *inode, struct file *file);
7357 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7358                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7359
7360 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7361 {
7362         struct ftrace_buffer_info *info;
7363         int ret;
7364
7365         /* The following checks for tracefs lockdown */
7366         ret = tracing_buffers_open(inode, filp);
7367         if (ret < 0)
7368                 return ret;
7369
7370         info = filp->private_data;
7371
7372         if (info->iter.trace->use_max_tr) {
7373                 tracing_buffers_release(inode, filp);
7374                 return -EBUSY;
7375         }
7376
7377         info->iter.snapshot = true;
7378         info->iter.array_buffer = &info->iter.tr->max_buffer;
7379
7380         return ret;
7381 }
7382
7383 #endif /* CONFIG_TRACER_SNAPSHOT */
7384
7385
7386 static const struct file_operations tracing_thresh_fops = {
7387         .open           = tracing_open_generic,
7388         .read           = tracing_thresh_read,
7389         .write          = tracing_thresh_write,
7390         .llseek         = generic_file_llseek,
7391 };
7392
7393 #ifdef CONFIG_TRACER_MAX_TRACE
7394 static const struct file_operations tracing_max_lat_fops = {
7395         .open           = tracing_open_generic_tr,
7396         .read           = tracing_max_lat_read,
7397         .write          = tracing_max_lat_write,
7398         .llseek         = generic_file_llseek,
7399         .release        = tracing_release_generic_tr,
7400 };
7401 #endif
7402
7403 static const struct file_operations set_tracer_fops = {
7404         .open           = tracing_open_generic_tr,
7405         .read           = tracing_set_trace_read,
7406         .write          = tracing_set_trace_write,
7407         .llseek         = generic_file_llseek,
7408         .release        = tracing_release_generic_tr,
7409 };
7410
7411 static const struct file_operations tracing_pipe_fops = {
7412         .open           = tracing_open_pipe,
7413         .poll           = tracing_poll_pipe,
7414         .read           = tracing_read_pipe,
7415         .splice_read    = tracing_splice_read_pipe,
7416         .release        = tracing_release_pipe,
7417         .llseek         = no_llseek,
7418 };
7419
7420 static const struct file_operations tracing_entries_fops = {
7421         .open           = tracing_open_generic_tr,
7422         .read           = tracing_entries_read,
7423         .write          = tracing_entries_write,
7424         .llseek         = generic_file_llseek,
7425         .release        = tracing_release_generic_tr,
7426 };
7427
7428 static const struct file_operations tracing_total_entries_fops = {
7429         .open           = tracing_open_generic_tr,
7430         .read           = tracing_total_entries_read,
7431         .llseek         = generic_file_llseek,
7432         .release        = tracing_release_generic_tr,
7433 };
7434
7435 static const struct file_operations tracing_free_buffer_fops = {
7436         .open           = tracing_open_generic_tr,
7437         .write          = tracing_free_buffer_write,
7438         .release        = tracing_free_buffer_release,
7439 };
7440
7441 static const struct file_operations tracing_mark_fops = {
7442         .open           = tracing_mark_open,
7443         .write          = tracing_mark_write,
7444         .release        = tracing_release_generic_tr,
7445 };
7446
7447 static const struct file_operations tracing_mark_raw_fops = {
7448         .open           = tracing_mark_open,
7449         .write          = tracing_mark_raw_write,
7450         .release        = tracing_release_generic_tr,
7451 };
7452
7453 static const struct file_operations trace_clock_fops = {
7454         .open           = tracing_clock_open,
7455         .read           = seq_read,
7456         .llseek         = seq_lseek,
7457         .release        = tracing_single_release_tr,
7458         .write          = tracing_clock_write,
7459 };
7460
7461 static const struct file_operations trace_time_stamp_mode_fops = {
7462         .open           = tracing_time_stamp_mode_open,
7463         .read           = seq_read,
7464         .llseek         = seq_lseek,
7465         .release        = tracing_single_release_tr,
7466 };
7467
7468 #ifdef CONFIG_TRACER_SNAPSHOT
7469 static const struct file_operations snapshot_fops = {
7470         .open           = tracing_snapshot_open,
7471         .read           = seq_read,
7472         .write          = tracing_snapshot_write,
7473         .llseek         = tracing_lseek,
7474         .release        = tracing_snapshot_release,
7475 };
7476
7477 static const struct file_operations snapshot_raw_fops = {
7478         .open           = snapshot_raw_open,
7479         .read           = tracing_buffers_read,
7480         .release        = tracing_buffers_release,
7481         .splice_read    = tracing_buffers_splice_read,
7482         .llseek         = no_llseek,
7483 };
7484
7485 #endif /* CONFIG_TRACER_SNAPSHOT */
7486
7487 /*
7488  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7489  * @filp: The active open file structure
7490  * @ubuf: The userspace provided buffer to read value into
7491  * @cnt: The maximum number of bytes to read
7492  * @ppos: The current "file" position
7493  *
7494  * This function implements the write interface for a struct trace_min_max_param.
7495  * The filp->private_data must point to a trace_min_max_param structure that
7496  * defines where to write the value, the min and the max acceptable values,
7497  * and a lock to protect the write.
7498  */
7499 static ssize_t
7500 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7501 {
7502         struct trace_min_max_param *param = filp->private_data;
7503         u64 val;
7504         int err;
7505
7506         if (!param)
7507                 return -EFAULT;
7508
7509         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7510         if (err)
7511                 return err;
7512
7513         if (param->lock)
7514                 mutex_lock(param->lock);
7515
7516         if (param->min && val < *param->min)
7517                 err = -EINVAL;
7518
7519         if (param->max && val > *param->max)
7520                 err = -EINVAL;
7521
7522         if (!err)
7523                 *param->val = val;
7524
7525         if (param->lock)
7526                 mutex_unlock(param->lock);
7527
7528         if (err)
7529                 return err;
7530
7531         return cnt;
7532 }
7533
7534 /*
7535  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7536  * @filp: The active open file structure
7537  * @ubuf: The userspace provided buffer to read value into
7538  * @cnt: The maximum number of bytes to read
7539  * @ppos: The current "file" position
7540  *
7541  * This function implements the read interface for a struct trace_min_max_param.
7542  * The filp->private_data must point to a trace_min_max_param struct with valid
7543  * data.
7544  */
7545 static ssize_t
7546 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7547 {
7548         struct trace_min_max_param *param = filp->private_data;
7549         char buf[U64_STR_SIZE];
7550         int len;
7551         u64 val;
7552
7553         if (!param)
7554                 return -EFAULT;
7555
7556         val = *param->val;
7557
7558         if (cnt > sizeof(buf))
7559                 cnt = sizeof(buf);
7560
7561         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7562
7563         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7564 }
7565
7566 const struct file_operations trace_min_max_fops = {
7567         .open           = tracing_open_generic,
7568         .read           = trace_min_max_read,
7569         .write          = trace_min_max_write,
7570 };
7571
7572 #define TRACING_LOG_ERRS_MAX    8
7573 #define TRACING_LOG_LOC_MAX     128
7574
7575 #define CMD_PREFIX "  Command: "
7576
7577 struct err_info {
7578         const char      **errs; /* ptr to loc-specific array of err strings */
7579         u8              type;   /* index into errs -> specific err string */
7580         u16             pos;    /* caret position */
7581         u64             ts;
7582 };
7583
7584 struct tracing_log_err {
7585         struct list_head        list;
7586         struct err_info         info;
7587         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7588         char                    *cmd;                     /* what caused err */
7589 };
7590
7591 static DEFINE_MUTEX(tracing_err_log_lock);
7592
7593 static struct tracing_log_err *alloc_tracing_log_err(int len)
7594 {
7595         struct tracing_log_err *err;
7596
7597         err = kzalloc(sizeof(*err), GFP_KERNEL);
7598         if (!err)
7599                 return ERR_PTR(-ENOMEM);
7600
7601         err->cmd = kzalloc(len, GFP_KERNEL);
7602         if (!err->cmd) {
7603                 kfree(err);
7604                 return ERR_PTR(-ENOMEM);
7605         }
7606
7607         return err;
7608 }
7609
7610 static void free_tracing_log_err(struct tracing_log_err *err)
7611 {
7612         kfree(err->cmd);
7613         kfree(err);
7614 }
7615
7616 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7617                                                    int len)
7618 {
7619         struct tracing_log_err *err;
7620         char *cmd;
7621
7622         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7623                 err = alloc_tracing_log_err(len);
7624                 if (PTR_ERR(err) != -ENOMEM)
7625                         tr->n_err_log_entries++;
7626
7627                 return err;
7628         }
7629         cmd = kzalloc(len, GFP_KERNEL);
7630         if (!cmd)
7631                 return ERR_PTR(-ENOMEM);
7632         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7633         kfree(err->cmd);
7634         err->cmd = cmd;
7635         list_del(&err->list);
7636
7637         return err;
7638 }
7639
7640 /**
7641  * err_pos - find the position of a string within a command for error careting
7642  * @cmd: The tracing command that caused the error
7643  * @str: The string to position the caret at within @cmd
7644  *
7645  * Finds the position of the first occurrence of @str within @cmd.  The
7646  * return value can be passed to tracing_log_err() for caret placement
7647  * within @cmd.
7648  *
7649  * Returns the index within @cmd of the first occurrence of @str or 0
7650  * if @str was not found.
7651  */
7652 unsigned int err_pos(char *cmd, const char *str)
7653 {
7654         char *found;
7655
7656         if (WARN_ON(!strlen(cmd)))
7657                 return 0;
7658
7659         found = strstr(cmd, str);
7660         if (found)
7661                 return found - cmd;
7662
7663         return 0;
7664 }
7665
7666 /**
7667  * tracing_log_err - write an error to the tracing error log
7668  * @tr: The associated trace array for the error (NULL for top level array)
7669  * @loc: A string describing where the error occurred
7670  * @cmd: The tracing command that caused the error
7671  * @errs: The array of loc-specific static error strings
7672  * @type: The index into errs[], which produces the specific static err string
7673  * @pos: The position the caret should be placed in the cmd
7674  *
7675  * Writes an error into tracing/error_log of the form:
7676  *
7677  * <loc>: error: <text>
7678  *   Command: <cmd>
7679  *              ^
7680  *
7681  * tracing/error_log is a small log file containing the last
7682  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7683  * unless there has been a tracing error, and the error log can be
7684  * cleared and have its memory freed by writing the empty string in
7685  * truncation mode to it i.e. echo > tracing/error_log.
7686  *
7687  * NOTE: the @errs array along with the @type param are used to
7688  * produce a static error string - this string is not copied and saved
7689  * when the error is logged - only a pointer to it is saved.  See
7690  * existing callers for examples of how static strings are typically
7691  * defined for use with tracing_log_err().
7692  */
7693 void tracing_log_err(struct trace_array *tr,
7694                      const char *loc, const char *cmd,
7695                      const char **errs, u8 type, u16 pos)
7696 {
7697         struct tracing_log_err *err;
7698         int len = 0;
7699
7700         if (!tr)
7701                 tr = &global_trace;
7702
7703         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7704
7705         mutex_lock(&tracing_err_log_lock);
7706         err = get_tracing_log_err(tr, len);
7707         if (PTR_ERR(err) == -ENOMEM) {
7708                 mutex_unlock(&tracing_err_log_lock);
7709                 return;
7710         }
7711
7712         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7713         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7714
7715         err->info.errs = errs;
7716         err->info.type = type;
7717         err->info.pos = pos;
7718         err->info.ts = local_clock();
7719
7720         list_add_tail(&err->list, &tr->err_log);
7721         mutex_unlock(&tracing_err_log_lock);
7722 }
7723
7724 static void clear_tracing_err_log(struct trace_array *tr)
7725 {
7726         struct tracing_log_err *err, *next;
7727
7728         mutex_lock(&tracing_err_log_lock);
7729         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7730                 list_del(&err->list);
7731                 free_tracing_log_err(err);
7732         }
7733
7734         tr->n_err_log_entries = 0;
7735         mutex_unlock(&tracing_err_log_lock);
7736 }
7737
7738 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7739 {
7740         struct trace_array *tr = m->private;
7741
7742         mutex_lock(&tracing_err_log_lock);
7743
7744         return seq_list_start(&tr->err_log, *pos);
7745 }
7746
7747 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7748 {
7749         struct trace_array *tr = m->private;
7750
7751         return seq_list_next(v, &tr->err_log, pos);
7752 }
7753
7754 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7755 {
7756         mutex_unlock(&tracing_err_log_lock);
7757 }
7758
7759 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7760 {
7761         u16 i;
7762
7763         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7764                 seq_putc(m, ' ');
7765         for (i = 0; i < pos; i++)
7766                 seq_putc(m, ' ');
7767         seq_puts(m, "^\n");
7768 }
7769
7770 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7771 {
7772         struct tracing_log_err *err = v;
7773
7774         if (err) {
7775                 const char *err_text = err->info.errs[err->info.type];
7776                 u64 sec = err->info.ts;
7777                 u32 nsec;
7778
7779                 nsec = do_div(sec, NSEC_PER_SEC);
7780                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7781                            err->loc, err_text);
7782                 seq_printf(m, "%s", err->cmd);
7783                 tracing_err_log_show_pos(m, err->info.pos);
7784         }
7785
7786         return 0;
7787 }
7788
7789 static const struct seq_operations tracing_err_log_seq_ops = {
7790         .start  = tracing_err_log_seq_start,
7791         .next   = tracing_err_log_seq_next,
7792         .stop   = tracing_err_log_seq_stop,
7793         .show   = tracing_err_log_seq_show
7794 };
7795
7796 static int tracing_err_log_open(struct inode *inode, struct file *file)
7797 {
7798         struct trace_array *tr = inode->i_private;
7799         int ret = 0;
7800
7801         ret = tracing_check_open_get_tr(tr);
7802         if (ret)
7803                 return ret;
7804
7805         /* If this file was opened for write, then erase contents */
7806         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7807                 clear_tracing_err_log(tr);
7808
7809         if (file->f_mode & FMODE_READ) {
7810                 ret = seq_open(file, &tracing_err_log_seq_ops);
7811                 if (!ret) {
7812                         struct seq_file *m = file->private_data;
7813                         m->private = tr;
7814                 } else {
7815                         trace_array_put(tr);
7816                 }
7817         }
7818         return ret;
7819 }
7820
7821 static ssize_t tracing_err_log_write(struct file *file,
7822                                      const char __user *buffer,
7823                                      size_t count, loff_t *ppos)
7824 {
7825         return count;
7826 }
7827
7828 static int tracing_err_log_release(struct inode *inode, struct file *file)
7829 {
7830         struct trace_array *tr = inode->i_private;
7831
7832         trace_array_put(tr);
7833
7834         if (file->f_mode & FMODE_READ)
7835                 seq_release(inode, file);
7836
7837         return 0;
7838 }
7839
7840 static const struct file_operations tracing_err_log_fops = {
7841         .open           = tracing_err_log_open,
7842         .write          = tracing_err_log_write,
7843         .read           = seq_read,
7844         .llseek         = tracing_lseek,
7845         .release        = tracing_err_log_release,
7846 };
7847
7848 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7849 {
7850         struct trace_array *tr = inode->i_private;
7851         struct ftrace_buffer_info *info;
7852         int ret;
7853
7854         ret = tracing_check_open_get_tr(tr);
7855         if (ret)
7856                 return ret;
7857
7858         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7859         if (!info) {
7860                 trace_array_put(tr);
7861                 return -ENOMEM;
7862         }
7863
7864         mutex_lock(&trace_types_lock);
7865
7866         info->iter.tr           = tr;
7867         info->iter.cpu_file     = tracing_get_cpu(inode);
7868         info->iter.trace        = tr->current_trace;
7869         info->iter.array_buffer = &tr->array_buffer;
7870         info->spare             = NULL;
7871         /* Force reading ring buffer for first read */
7872         info->read              = (unsigned int)-1;
7873
7874         filp->private_data = info;
7875
7876         tr->trace_ref++;
7877
7878         mutex_unlock(&trace_types_lock);
7879
7880         ret = nonseekable_open(inode, filp);
7881         if (ret < 0)
7882                 trace_array_put(tr);
7883
7884         return ret;
7885 }
7886
7887 static __poll_t
7888 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7889 {
7890         struct ftrace_buffer_info *info = filp->private_data;
7891         struct trace_iterator *iter = &info->iter;
7892
7893         return trace_poll(iter, filp, poll_table);
7894 }
7895
7896 static ssize_t
7897 tracing_buffers_read(struct file *filp, char __user *ubuf,
7898                      size_t count, loff_t *ppos)
7899 {
7900         struct ftrace_buffer_info *info = filp->private_data;
7901         struct trace_iterator *iter = &info->iter;
7902         void *trace_data;
7903         int page_size;
7904         ssize_t ret = 0;
7905         ssize_t size;
7906
7907         if (!count)
7908                 return 0;
7909
7910 #ifdef CONFIG_TRACER_MAX_TRACE
7911         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7912                 return -EBUSY;
7913 #endif
7914
7915         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7916
7917         /* Make sure the spare matches the current sub buffer size */
7918         if (info->spare) {
7919                 if (page_size != info->spare_size) {
7920                         ring_buffer_free_read_page(iter->array_buffer->buffer,
7921                                                    info->spare_cpu, info->spare);
7922                         info->spare = NULL;
7923                 }
7924         }
7925
7926         if (!info->spare) {
7927                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7928                                                           iter->cpu_file);
7929                 if (IS_ERR(info->spare)) {
7930                         ret = PTR_ERR(info->spare);
7931                         info->spare = NULL;
7932                 } else {
7933                         info->spare_cpu = iter->cpu_file;
7934                         info->spare_size = page_size;
7935                 }
7936         }
7937         if (!info->spare)
7938                 return ret;
7939
7940         /* Do we have previous read data to read? */
7941         if (info->read < page_size)
7942                 goto read;
7943
7944  again:
7945         trace_access_lock(iter->cpu_file);
7946         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7947                                     info->spare,
7948                                     count,
7949                                     iter->cpu_file, 0);
7950         trace_access_unlock(iter->cpu_file);
7951
7952         if (ret < 0) {
7953                 if (trace_empty(iter)) {
7954                         if ((filp->f_flags & O_NONBLOCK))
7955                                 return -EAGAIN;
7956
7957                         ret = wait_on_pipe(iter, 0);
7958                         if (ret)
7959                                 return ret;
7960
7961                         goto again;
7962                 }
7963                 return 0;
7964         }
7965
7966         info->read = 0;
7967  read:
7968         size = page_size - info->read;
7969         if (size > count)
7970                 size = count;
7971         trace_data = ring_buffer_read_page_data(info->spare);
7972         ret = copy_to_user(ubuf, trace_data + info->read, size);
7973         if (ret == size)
7974                 return -EFAULT;
7975
7976         size -= ret;
7977
7978         *ppos += size;
7979         info->read += size;
7980
7981         return size;
7982 }
7983
7984 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7985 {
7986         struct ftrace_buffer_info *info = file->private_data;
7987         struct trace_iterator *iter = &info->iter;
7988
7989         iter->closed = true;
7990         /* Make sure the waiters see the new wait_index */
7991         (void)atomic_fetch_inc_release(&iter->wait_index);
7992
7993         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7994
7995         return 0;
7996 }
7997
7998 static int tracing_buffers_release(struct inode *inode, struct file *file)
7999 {
8000         struct ftrace_buffer_info *info = file->private_data;
8001         struct trace_iterator *iter = &info->iter;
8002
8003         mutex_lock(&trace_types_lock);
8004
8005         iter->tr->trace_ref--;
8006
8007         __trace_array_put(iter->tr);
8008
8009         if (info->spare)
8010                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8011                                            info->spare_cpu, info->spare);
8012         kvfree(info);
8013
8014         mutex_unlock(&trace_types_lock);
8015
8016         return 0;
8017 }
8018
8019 struct buffer_ref {
8020         struct trace_buffer     *buffer;
8021         void                    *page;
8022         int                     cpu;
8023         refcount_t              refcount;
8024 };
8025
8026 static void buffer_ref_release(struct buffer_ref *ref)
8027 {
8028         if (!refcount_dec_and_test(&ref->refcount))
8029                 return;
8030         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8031         kfree(ref);
8032 }
8033
8034 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8035                                     struct pipe_buffer *buf)
8036 {
8037         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8038
8039         buffer_ref_release(ref);
8040         buf->private = 0;
8041 }
8042
8043 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8044                                 struct pipe_buffer *buf)
8045 {
8046         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8047
8048         if (refcount_read(&ref->refcount) > INT_MAX/2)
8049                 return false;
8050
8051         refcount_inc(&ref->refcount);
8052         return true;
8053 }
8054
8055 /* Pipe buffer operations for a buffer. */
8056 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8057         .release                = buffer_pipe_buf_release,
8058         .get                    = buffer_pipe_buf_get,
8059 };
8060
8061 /*
8062  * Callback from splice_to_pipe(), if we need to release some pages
8063  * at the end of the spd in case we error'ed out in filling the pipe.
8064  */
8065 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8066 {
8067         struct buffer_ref *ref =
8068                 (struct buffer_ref *)spd->partial[i].private;
8069
8070         buffer_ref_release(ref);
8071         spd->partial[i].private = 0;
8072 }
8073
8074 static ssize_t
8075 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8076                             struct pipe_inode_info *pipe, size_t len,
8077                             unsigned int flags)
8078 {
8079         struct ftrace_buffer_info *info = file->private_data;
8080         struct trace_iterator *iter = &info->iter;
8081         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8082         struct page *pages_def[PIPE_DEF_BUFFERS];
8083         struct splice_pipe_desc spd = {
8084                 .pages          = pages_def,
8085                 .partial        = partial_def,
8086                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8087                 .ops            = &buffer_pipe_buf_ops,
8088                 .spd_release    = buffer_spd_release,
8089         };
8090         struct buffer_ref *ref;
8091         bool woken = false;
8092         int page_size;
8093         int entries, i;
8094         ssize_t ret = 0;
8095
8096 #ifdef CONFIG_TRACER_MAX_TRACE
8097         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8098                 return -EBUSY;
8099 #endif
8100
8101         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8102         if (*ppos & (page_size - 1))
8103                 return -EINVAL;
8104
8105         if (len & (page_size - 1)) {
8106                 if (len < page_size)
8107                         return -EINVAL;
8108                 len &= (~(page_size - 1));
8109         }
8110
8111         if (splice_grow_spd(pipe, &spd))
8112                 return -ENOMEM;
8113
8114  again:
8115         trace_access_lock(iter->cpu_file);
8116         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8117
8118         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8119                 struct page *page;
8120                 int r;
8121
8122                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8123                 if (!ref) {
8124                         ret = -ENOMEM;
8125                         break;
8126                 }
8127
8128                 refcount_set(&ref->refcount, 1);
8129                 ref->buffer = iter->array_buffer->buffer;
8130                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8131                 if (IS_ERR(ref->page)) {
8132                         ret = PTR_ERR(ref->page);
8133                         ref->page = NULL;
8134                         kfree(ref);
8135                         break;
8136                 }
8137                 ref->cpu = iter->cpu_file;
8138
8139                 r = ring_buffer_read_page(ref->buffer, ref->page,
8140                                           len, iter->cpu_file, 1);
8141                 if (r < 0) {
8142                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8143                                                    ref->page);
8144                         kfree(ref);
8145                         break;
8146                 }
8147
8148                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8149
8150                 spd.pages[i] = page;
8151                 spd.partial[i].len = page_size;
8152                 spd.partial[i].offset = 0;
8153                 spd.partial[i].private = (unsigned long)ref;
8154                 spd.nr_pages++;
8155                 *ppos += page_size;
8156
8157                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8158         }
8159
8160         trace_access_unlock(iter->cpu_file);
8161         spd.nr_pages = i;
8162
8163         /* did we read anything? */
8164         if (!spd.nr_pages) {
8165
8166                 if (ret)
8167                         goto out;
8168
8169                 if (woken)
8170                         goto out;
8171
8172                 ret = -EAGAIN;
8173                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8174                         goto out;
8175
8176                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8177                 if (ret)
8178                         goto out;
8179
8180                 /* No need to wait after waking up when tracing is off */
8181                 if (!tracer_tracing_is_on(iter->tr))
8182                         goto out;
8183
8184                 /* Iterate one more time to collect any new data then exit */
8185                 woken = true;
8186
8187                 goto again;
8188         }
8189
8190         ret = splice_to_pipe(pipe, &spd);
8191 out:
8192         splice_shrink_spd(&spd);
8193
8194         return ret;
8195 }
8196
8197 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8198 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8199 {
8200         struct ftrace_buffer_info *info = file->private_data;
8201         struct trace_iterator *iter = &info->iter;
8202
8203         if (cmd)
8204                 return -ENOIOCTLCMD;
8205
8206         mutex_lock(&trace_types_lock);
8207
8208         /* Make sure the waiters see the new wait_index */
8209         (void)atomic_fetch_inc_release(&iter->wait_index);
8210
8211         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8212
8213         mutex_unlock(&trace_types_lock);
8214         return 0;
8215 }
8216
8217 static const struct file_operations tracing_buffers_fops = {
8218         .open           = tracing_buffers_open,
8219         .read           = tracing_buffers_read,
8220         .poll           = tracing_buffers_poll,
8221         .release        = tracing_buffers_release,
8222         .flush          = tracing_buffers_flush,
8223         .splice_read    = tracing_buffers_splice_read,
8224         .unlocked_ioctl = tracing_buffers_ioctl,
8225         .llseek         = no_llseek,
8226 };
8227
8228 static ssize_t
8229 tracing_stats_read(struct file *filp, char __user *ubuf,
8230                    size_t count, loff_t *ppos)
8231 {
8232         struct inode *inode = file_inode(filp);
8233         struct trace_array *tr = inode->i_private;
8234         struct array_buffer *trace_buf = &tr->array_buffer;
8235         int cpu = tracing_get_cpu(inode);
8236         struct trace_seq *s;
8237         unsigned long cnt;
8238         unsigned long long t;
8239         unsigned long usec_rem;
8240
8241         s = kmalloc(sizeof(*s), GFP_KERNEL);
8242         if (!s)
8243                 return -ENOMEM;
8244
8245         trace_seq_init(s);
8246
8247         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8248         trace_seq_printf(s, "entries: %ld\n", cnt);
8249
8250         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8251         trace_seq_printf(s, "overrun: %ld\n", cnt);
8252
8253         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8254         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8255
8256         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8257         trace_seq_printf(s, "bytes: %ld\n", cnt);
8258
8259         if (trace_clocks[tr->clock_id].in_ns) {
8260                 /* local or global for trace_clock */
8261                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8262                 usec_rem = do_div(t, USEC_PER_SEC);
8263                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8264                                                                 t, usec_rem);
8265
8266                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8267                 usec_rem = do_div(t, USEC_PER_SEC);
8268                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8269         } else {
8270                 /* counter or tsc mode for trace_clock */
8271                 trace_seq_printf(s, "oldest event ts: %llu\n",
8272                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8273
8274                 trace_seq_printf(s, "now ts: %llu\n",
8275                                 ring_buffer_time_stamp(trace_buf->buffer));
8276         }
8277
8278         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8279         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8280
8281         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8282         trace_seq_printf(s, "read events: %ld\n", cnt);
8283
8284         count = simple_read_from_buffer(ubuf, count, ppos,
8285                                         s->buffer, trace_seq_used(s));
8286
8287         kfree(s);
8288
8289         return count;
8290 }
8291
8292 static const struct file_operations tracing_stats_fops = {
8293         .open           = tracing_open_generic_tr,
8294         .read           = tracing_stats_read,
8295         .llseek         = generic_file_llseek,
8296         .release        = tracing_release_generic_tr,
8297 };
8298
8299 #ifdef CONFIG_DYNAMIC_FTRACE
8300
8301 static ssize_t
8302 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8303                   size_t cnt, loff_t *ppos)
8304 {
8305         ssize_t ret;
8306         char *buf;
8307         int r;
8308
8309         /* 256 should be plenty to hold the amount needed */
8310         buf = kmalloc(256, GFP_KERNEL);
8311         if (!buf)
8312                 return -ENOMEM;
8313
8314         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8315                       ftrace_update_tot_cnt,
8316                       ftrace_number_of_pages,
8317                       ftrace_number_of_groups);
8318
8319         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8320         kfree(buf);
8321         return ret;
8322 }
8323
8324 static const struct file_operations tracing_dyn_info_fops = {
8325         .open           = tracing_open_generic,
8326         .read           = tracing_read_dyn_info,
8327         .llseek         = generic_file_llseek,
8328 };
8329 #endif /* CONFIG_DYNAMIC_FTRACE */
8330
8331 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8332 static void
8333 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8334                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8335                 void *data)
8336 {
8337         tracing_snapshot_instance(tr);
8338 }
8339
8340 static void
8341 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8342                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8343                       void *data)
8344 {
8345         struct ftrace_func_mapper *mapper = data;
8346         long *count = NULL;
8347
8348         if (mapper)
8349                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8350
8351         if (count) {
8352
8353                 if (*count <= 0)
8354                         return;
8355
8356                 (*count)--;
8357         }
8358
8359         tracing_snapshot_instance(tr);
8360 }
8361
8362 static int
8363 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8364                       struct ftrace_probe_ops *ops, void *data)
8365 {
8366         struct ftrace_func_mapper *mapper = data;
8367         long *count = NULL;
8368
8369         seq_printf(m, "%ps:", (void *)ip);
8370
8371         seq_puts(m, "snapshot");
8372
8373         if (mapper)
8374                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8375
8376         if (count)
8377                 seq_printf(m, ":count=%ld\n", *count);
8378         else
8379                 seq_puts(m, ":unlimited\n");
8380
8381         return 0;
8382 }
8383
8384 static int
8385 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8386                      unsigned long ip, void *init_data, void **data)
8387 {
8388         struct ftrace_func_mapper *mapper = *data;
8389
8390         if (!mapper) {
8391                 mapper = allocate_ftrace_func_mapper();
8392                 if (!mapper)
8393                         return -ENOMEM;
8394                 *data = mapper;
8395         }
8396
8397         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8398 }
8399
8400 static void
8401 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8402                      unsigned long ip, void *data)
8403 {
8404         struct ftrace_func_mapper *mapper = data;
8405
8406         if (!ip) {
8407                 if (!mapper)
8408                         return;
8409                 free_ftrace_func_mapper(mapper, NULL);
8410                 return;
8411         }
8412
8413         ftrace_func_mapper_remove_ip(mapper, ip);
8414 }
8415
8416 static struct ftrace_probe_ops snapshot_probe_ops = {
8417         .func                   = ftrace_snapshot,
8418         .print                  = ftrace_snapshot_print,
8419 };
8420
8421 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8422         .func                   = ftrace_count_snapshot,
8423         .print                  = ftrace_snapshot_print,
8424         .init                   = ftrace_snapshot_init,
8425         .free                   = ftrace_snapshot_free,
8426 };
8427
8428 static int
8429 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8430                                char *glob, char *cmd, char *param, int enable)
8431 {
8432         struct ftrace_probe_ops *ops;
8433         void *count = (void *)-1;
8434         char *number;
8435         int ret;
8436
8437         if (!tr)
8438                 return -ENODEV;
8439
8440         /* hash funcs only work with set_ftrace_filter */
8441         if (!enable)
8442                 return -EINVAL;
8443
8444         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8445
8446         if (glob[0] == '!') {
8447                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8448                 if (!ret)
8449                         tracing_disarm_snapshot(tr);
8450
8451                 return ret;
8452         }
8453
8454         if (!param)
8455                 goto out_reg;
8456
8457         number = strsep(&param, ":");
8458
8459         if (!strlen(number))
8460                 goto out_reg;
8461
8462         /*
8463          * We use the callback data field (which is a pointer)
8464          * as our counter.
8465          */
8466         ret = kstrtoul(number, 0, (unsigned long *)&count);
8467         if (ret)
8468                 return ret;
8469
8470  out_reg:
8471         ret = tracing_arm_snapshot(tr);
8472         if (ret < 0)
8473                 goto out;
8474
8475         ret = register_ftrace_function_probe(glob, tr, ops, count);
8476         if (ret < 0)
8477                 tracing_disarm_snapshot(tr);
8478  out:
8479         return ret < 0 ? ret : 0;
8480 }
8481
8482 static struct ftrace_func_command ftrace_snapshot_cmd = {
8483         .name                   = "snapshot",
8484         .func                   = ftrace_trace_snapshot_callback,
8485 };
8486
8487 static __init int register_snapshot_cmd(void)
8488 {
8489         return register_ftrace_command(&ftrace_snapshot_cmd);
8490 }
8491 #else
8492 static inline __init int register_snapshot_cmd(void) { return 0; }
8493 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8494
8495 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8496 {
8497         if (WARN_ON(!tr->dir))
8498                 return ERR_PTR(-ENODEV);
8499
8500         /* Top directory uses NULL as the parent */
8501         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8502                 return NULL;
8503
8504         /* All sub buffers have a descriptor */
8505         return tr->dir;
8506 }
8507
8508 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8509 {
8510         struct dentry *d_tracer;
8511
8512         if (tr->percpu_dir)
8513                 return tr->percpu_dir;
8514
8515         d_tracer = tracing_get_dentry(tr);
8516         if (IS_ERR(d_tracer))
8517                 return NULL;
8518
8519         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8520
8521         MEM_FAIL(!tr->percpu_dir,
8522                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8523
8524         return tr->percpu_dir;
8525 }
8526
8527 static struct dentry *
8528 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8529                       void *data, long cpu, const struct file_operations *fops)
8530 {
8531         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8532
8533         if (ret) /* See tracing_get_cpu() */
8534                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8535         return ret;
8536 }
8537
8538 static void
8539 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8540 {
8541         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8542         struct dentry *d_cpu;
8543         char cpu_dir[30]; /* 30 characters should be more than enough */
8544
8545         if (!d_percpu)
8546                 return;
8547
8548         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8549         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8550         if (!d_cpu) {
8551                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8552                 return;
8553         }
8554
8555         /* per cpu trace_pipe */
8556         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8557                                 tr, cpu, &tracing_pipe_fops);
8558
8559         /* per cpu trace */
8560         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8561                                 tr, cpu, &tracing_fops);
8562
8563         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8564                                 tr, cpu, &tracing_buffers_fops);
8565
8566         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8567                                 tr, cpu, &tracing_stats_fops);
8568
8569         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8570                                 tr, cpu, &tracing_entries_fops);
8571
8572 #ifdef CONFIG_TRACER_SNAPSHOT
8573         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8574                                 tr, cpu, &snapshot_fops);
8575
8576         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8577                                 tr, cpu, &snapshot_raw_fops);
8578 #endif
8579 }
8580
8581 #ifdef CONFIG_FTRACE_SELFTEST
8582 /* Let selftest have access to static functions in this file */
8583 #include "trace_selftest.c"
8584 #endif
8585
8586 static ssize_t
8587 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8588                         loff_t *ppos)
8589 {
8590         struct trace_option_dentry *topt = filp->private_data;
8591         char *buf;
8592
8593         if (topt->flags->val & topt->opt->bit)
8594                 buf = "1\n";
8595         else
8596                 buf = "0\n";
8597
8598         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8599 }
8600
8601 static ssize_t
8602 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8603                          loff_t *ppos)
8604 {
8605         struct trace_option_dentry *topt = filp->private_data;
8606         unsigned long val;
8607         int ret;
8608
8609         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8610         if (ret)
8611                 return ret;
8612
8613         if (val != 0 && val != 1)
8614                 return -EINVAL;
8615
8616         if (!!(topt->flags->val & topt->opt->bit) != val) {
8617                 mutex_lock(&trace_types_lock);
8618                 ret = __set_tracer_option(topt->tr, topt->flags,
8619                                           topt->opt, !val);
8620                 mutex_unlock(&trace_types_lock);
8621                 if (ret)
8622                         return ret;
8623         }
8624
8625         *ppos += cnt;
8626
8627         return cnt;
8628 }
8629
8630 static int tracing_open_options(struct inode *inode, struct file *filp)
8631 {
8632         struct trace_option_dentry *topt = inode->i_private;
8633         int ret;
8634
8635         ret = tracing_check_open_get_tr(topt->tr);
8636         if (ret)
8637                 return ret;
8638
8639         filp->private_data = inode->i_private;
8640         return 0;
8641 }
8642
8643 static int tracing_release_options(struct inode *inode, struct file *file)
8644 {
8645         struct trace_option_dentry *topt = file->private_data;
8646
8647         trace_array_put(topt->tr);
8648         return 0;
8649 }
8650
8651 static const struct file_operations trace_options_fops = {
8652         .open = tracing_open_options,
8653         .read = trace_options_read,
8654         .write = trace_options_write,
8655         .llseek = generic_file_llseek,
8656         .release = tracing_release_options,
8657 };
8658
8659 /*
8660  * In order to pass in both the trace_array descriptor as well as the index
8661  * to the flag that the trace option file represents, the trace_array
8662  * has a character array of trace_flags_index[], which holds the index
8663  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8664  * The address of this character array is passed to the flag option file
8665  * read/write callbacks.
8666  *
8667  * In order to extract both the index and the trace_array descriptor,
8668  * get_tr_index() uses the following algorithm.
8669  *
8670  *   idx = *ptr;
8671  *
8672  * As the pointer itself contains the address of the index (remember
8673  * index[1] == 1).
8674  *
8675  * Then to get the trace_array descriptor, by subtracting that index
8676  * from the ptr, we get to the start of the index itself.
8677  *
8678  *   ptr - idx == &index[0]
8679  *
8680  * Then a simple container_of() from that pointer gets us to the
8681  * trace_array descriptor.
8682  */
8683 static void get_tr_index(void *data, struct trace_array **ptr,
8684                          unsigned int *pindex)
8685 {
8686         *pindex = *(unsigned char *)data;
8687
8688         *ptr = container_of(data - *pindex, struct trace_array,
8689                             trace_flags_index);
8690 }
8691
8692 static ssize_t
8693 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8694                         loff_t *ppos)
8695 {
8696         void *tr_index = filp->private_data;
8697         struct trace_array *tr;
8698         unsigned int index;
8699         char *buf;
8700
8701         get_tr_index(tr_index, &tr, &index);
8702
8703         if (tr->trace_flags & (1 << index))
8704                 buf = "1\n";
8705         else
8706                 buf = "0\n";
8707
8708         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8709 }
8710
8711 static ssize_t
8712 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8713                          loff_t *ppos)
8714 {
8715         void *tr_index = filp->private_data;
8716         struct trace_array *tr;
8717         unsigned int index;
8718         unsigned long val;
8719         int ret;
8720
8721         get_tr_index(tr_index, &tr, &index);
8722
8723         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8724         if (ret)
8725                 return ret;
8726
8727         if (val != 0 && val != 1)
8728                 return -EINVAL;
8729
8730         mutex_lock(&event_mutex);
8731         mutex_lock(&trace_types_lock);
8732         ret = set_tracer_flag(tr, 1 << index, val);
8733         mutex_unlock(&trace_types_lock);
8734         mutex_unlock(&event_mutex);
8735
8736         if (ret < 0)
8737                 return ret;
8738
8739         *ppos += cnt;
8740
8741         return cnt;
8742 }
8743
8744 static const struct file_operations trace_options_core_fops = {
8745         .open = tracing_open_generic,
8746         .read = trace_options_core_read,
8747         .write = trace_options_core_write,
8748         .llseek = generic_file_llseek,
8749 };
8750
8751 struct dentry *trace_create_file(const char *name,
8752                                  umode_t mode,
8753                                  struct dentry *parent,
8754                                  void *data,
8755                                  const struct file_operations *fops)
8756 {
8757         struct dentry *ret;
8758
8759         ret = tracefs_create_file(name, mode, parent, data, fops);
8760         if (!ret)
8761                 pr_warn("Could not create tracefs '%s' entry\n", name);
8762
8763         return ret;
8764 }
8765
8766
8767 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8768 {
8769         struct dentry *d_tracer;
8770
8771         if (tr->options)
8772                 return tr->options;
8773
8774         d_tracer = tracing_get_dentry(tr);
8775         if (IS_ERR(d_tracer))
8776                 return NULL;
8777
8778         tr->options = tracefs_create_dir("options", d_tracer);
8779         if (!tr->options) {
8780                 pr_warn("Could not create tracefs directory 'options'\n");
8781                 return NULL;
8782         }
8783
8784         return tr->options;
8785 }
8786
8787 static void
8788 create_trace_option_file(struct trace_array *tr,
8789                          struct trace_option_dentry *topt,
8790                          struct tracer_flags *flags,
8791                          struct tracer_opt *opt)
8792 {
8793         struct dentry *t_options;
8794
8795         t_options = trace_options_init_dentry(tr);
8796         if (!t_options)
8797                 return;
8798
8799         topt->flags = flags;
8800         topt->opt = opt;
8801         topt->tr = tr;
8802
8803         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8804                                         t_options, topt, &trace_options_fops);
8805
8806 }
8807
8808 static void
8809 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8810 {
8811         struct trace_option_dentry *topts;
8812         struct trace_options *tr_topts;
8813         struct tracer_flags *flags;
8814         struct tracer_opt *opts;
8815         int cnt;
8816         int i;
8817
8818         if (!tracer)
8819                 return;
8820
8821         flags = tracer->flags;
8822
8823         if (!flags || !flags->opts)
8824                 return;
8825
8826         /*
8827          * If this is an instance, only create flags for tracers
8828          * the instance may have.
8829          */
8830         if (!trace_ok_for_array(tracer, tr))
8831                 return;
8832
8833         for (i = 0; i < tr->nr_topts; i++) {
8834                 /* Make sure there's no duplicate flags. */
8835                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8836                         return;
8837         }
8838
8839         opts = flags->opts;
8840
8841         for (cnt = 0; opts[cnt].name; cnt++)
8842                 ;
8843
8844         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8845         if (!topts)
8846                 return;
8847
8848         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8849                             GFP_KERNEL);
8850         if (!tr_topts) {
8851                 kfree(topts);
8852                 return;
8853         }
8854
8855         tr->topts = tr_topts;
8856         tr->topts[tr->nr_topts].tracer = tracer;
8857         tr->topts[tr->nr_topts].topts = topts;
8858         tr->nr_topts++;
8859
8860         for (cnt = 0; opts[cnt].name; cnt++) {
8861                 create_trace_option_file(tr, &topts[cnt], flags,
8862                                          &opts[cnt]);
8863                 MEM_FAIL(topts[cnt].entry == NULL,
8864                           "Failed to create trace option: %s",
8865                           opts[cnt].name);
8866         }
8867 }
8868
8869 static struct dentry *
8870 create_trace_option_core_file(struct trace_array *tr,
8871                               const char *option, long index)
8872 {
8873         struct dentry *t_options;
8874
8875         t_options = trace_options_init_dentry(tr);
8876         if (!t_options)
8877                 return NULL;
8878
8879         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8880                                  (void *)&tr->trace_flags_index[index],
8881                                  &trace_options_core_fops);
8882 }
8883
8884 static void create_trace_options_dir(struct trace_array *tr)
8885 {
8886         struct dentry *t_options;
8887         bool top_level = tr == &global_trace;
8888         int i;
8889
8890         t_options = trace_options_init_dentry(tr);
8891         if (!t_options)
8892                 return;
8893
8894         for (i = 0; trace_options[i]; i++) {
8895                 if (top_level ||
8896                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8897                         create_trace_option_core_file(tr, trace_options[i], i);
8898         }
8899 }
8900
8901 static ssize_t
8902 rb_simple_read(struct file *filp, char __user *ubuf,
8903                size_t cnt, loff_t *ppos)
8904 {
8905         struct trace_array *tr = filp->private_data;
8906         char buf[64];
8907         int r;
8908
8909         r = tracer_tracing_is_on(tr);
8910         r = sprintf(buf, "%d\n", r);
8911
8912         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8913 }
8914
8915 static ssize_t
8916 rb_simple_write(struct file *filp, const char __user *ubuf,
8917                 size_t cnt, loff_t *ppos)
8918 {
8919         struct trace_array *tr = filp->private_data;
8920         struct trace_buffer *buffer = tr->array_buffer.buffer;
8921         unsigned long val;
8922         int ret;
8923
8924         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8925         if (ret)
8926                 return ret;
8927
8928         if (buffer) {
8929                 mutex_lock(&trace_types_lock);
8930                 if (!!val == tracer_tracing_is_on(tr)) {
8931                         val = 0; /* do nothing */
8932                 } else if (val) {
8933                         tracer_tracing_on(tr);
8934                         if (tr->current_trace->start)
8935                                 tr->current_trace->start(tr);
8936                 } else {
8937                         tracer_tracing_off(tr);
8938                         if (tr->current_trace->stop)
8939                                 tr->current_trace->stop(tr);
8940                         /* Wake up any waiters */
8941                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
8942                 }
8943                 mutex_unlock(&trace_types_lock);
8944         }
8945
8946         (*ppos)++;
8947
8948         return cnt;
8949 }
8950
8951 static const struct file_operations rb_simple_fops = {
8952         .open           = tracing_open_generic_tr,
8953         .read           = rb_simple_read,
8954         .write          = rb_simple_write,
8955         .release        = tracing_release_generic_tr,
8956         .llseek         = default_llseek,
8957 };
8958
8959 static ssize_t
8960 buffer_percent_read(struct file *filp, char __user *ubuf,
8961                     size_t cnt, loff_t *ppos)
8962 {
8963         struct trace_array *tr = filp->private_data;
8964         char buf[64];
8965         int r;
8966
8967         r = tr->buffer_percent;
8968         r = sprintf(buf, "%d\n", r);
8969
8970         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8971 }
8972
8973 static ssize_t
8974 buffer_percent_write(struct file *filp, const char __user *ubuf,
8975                      size_t cnt, loff_t *ppos)
8976 {
8977         struct trace_array *tr = filp->private_data;
8978         unsigned long val;
8979         int ret;
8980
8981         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8982         if (ret)
8983                 return ret;
8984
8985         if (val > 100)
8986                 return -EINVAL;
8987
8988         tr->buffer_percent = val;
8989
8990         (*ppos)++;
8991
8992         return cnt;
8993 }
8994
8995 static const struct file_operations buffer_percent_fops = {
8996         .open           = tracing_open_generic_tr,
8997         .read           = buffer_percent_read,
8998         .write          = buffer_percent_write,
8999         .release        = tracing_release_generic_tr,
9000         .llseek         = default_llseek,
9001 };
9002
9003 static ssize_t
9004 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9005 {
9006         struct trace_array *tr = filp->private_data;
9007         size_t size;
9008         char buf[64];
9009         int order;
9010         int r;
9011
9012         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9013         size = (PAGE_SIZE << order) / 1024;
9014
9015         r = sprintf(buf, "%zd\n", size);
9016
9017         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9018 }
9019
9020 static ssize_t
9021 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9022                          size_t cnt, loff_t *ppos)
9023 {
9024         struct trace_array *tr = filp->private_data;
9025         unsigned long val;
9026         int old_order;
9027         int order;
9028         int pages;
9029         int ret;
9030
9031         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9032         if (ret)
9033                 return ret;
9034
9035         val *= 1024; /* value passed in is in KB */
9036
9037         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9038         order = fls(pages - 1);
9039
9040         /* limit between 1 and 128 system pages */
9041         if (order < 0 || order > 7)
9042                 return -EINVAL;
9043
9044         /* Do not allow tracing while changing the order of the ring buffer */
9045         tracing_stop_tr(tr);
9046
9047         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9048         if (old_order == order)
9049                 goto out;
9050
9051         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9052         if (ret)
9053                 goto out;
9054
9055 #ifdef CONFIG_TRACER_MAX_TRACE
9056
9057         if (!tr->allocated_snapshot)
9058                 goto out_max;
9059
9060         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9061         if (ret) {
9062                 /* Put back the old order */
9063                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9064                 if (WARN_ON_ONCE(cnt)) {
9065                         /*
9066                          * AARGH! We are left with different orders!
9067                          * The max buffer is our "snapshot" buffer.
9068                          * When a tracer needs a snapshot (one of the
9069                          * latency tracers), it swaps the max buffer
9070                          * with the saved snap shot. We succeeded to
9071                          * update the order of the main buffer, but failed to
9072                          * update the order of the max buffer. But when we tried
9073                          * to reset the main buffer to the original size, we
9074                          * failed there too. This is very unlikely to
9075                          * happen, but if it does, warn and kill all
9076                          * tracing.
9077                          */
9078                         tracing_disabled = 1;
9079                 }
9080                 goto out;
9081         }
9082  out_max:
9083 #endif
9084         (*ppos)++;
9085  out:
9086         if (ret)
9087                 cnt = ret;
9088         tracing_start_tr(tr);
9089         return cnt;
9090 }
9091
9092 static const struct file_operations buffer_subbuf_size_fops = {
9093         .open           = tracing_open_generic_tr,
9094         .read           = buffer_subbuf_size_read,
9095         .write          = buffer_subbuf_size_write,
9096         .release        = tracing_release_generic_tr,
9097         .llseek         = default_llseek,
9098 };
9099
9100 static struct dentry *trace_instance_dir;
9101
9102 static void
9103 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9104
9105 static int
9106 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9107 {
9108         enum ring_buffer_flags rb_flags;
9109
9110         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9111
9112         buf->tr = tr;
9113
9114         buf->buffer = ring_buffer_alloc(size, rb_flags);
9115         if (!buf->buffer)
9116                 return -ENOMEM;
9117
9118         buf->data = alloc_percpu(struct trace_array_cpu);
9119         if (!buf->data) {
9120                 ring_buffer_free(buf->buffer);
9121                 buf->buffer = NULL;
9122                 return -ENOMEM;
9123         }
9124
9125         /* Allocate the first page for all buffers */
9126         set_buffer_entries(&tr->array_buffer,
9127                            ring_buffer_size(tr->array_buffer.buffer, 0));
9128
9129         return 0;
9130 }
9131
9132 static void free_trace_buffer(struct array_buffer *buf)
9133 {
9134         if (buf->buffer) {
9135                 ring_buffer_free(buf->buffer);
9136                 buf->buffer = NULL;
9137                 free_percpu(buf->data);
9138                 buf->data = NULL;
9139         }
9140 }
9141
9142 static int allocate_trace_buffers(struct trace_array *tr, int size)
9143 {
9144         int ret;
9145
9146         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9147         if (ret)
9148                 return ret;
9149
9150 #ifdef CONFIG_TRACER_MAX_TRACE
9151         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9152                                     allocate_snapshot ? size : 1);
9153         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9154                 free_trace_buffer(&tr->array_buffer);
9155                 return -ENOMEM;
9156         }
9157         tr->allocated_snapshot = allocate_snapshot;
9158
9159         allocate_snapshot = false;
9160 #endif
9161
9162         return 0;
9163 }
9164
9165 static void free_trace_buffers(struct trace_array *tr)
9166 {
9167         if (!tr)
9168                 return;
9169
9170         free_trace_buffer(&tr->array_buffer);
9171
9172 #ifdef CONFIG_TRACER_MAX_TRACE
9173         free_trace_buffer(&tr->max_buffer);
9174 #endif
9175 }
9176
9177 static void init_trace_flags_index(struct trace_array *tr)
9178 {
9179         int i;
9180
9181         /* Used by the trace options files */
9182         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9183                 tr->trace_flags_index[i] = i;
9184 }
9185
9186 static void __update_tracer_options(struct trace_array *tr)
9187 {
9188         struct tracer *t;
9189
9190         for (t = trace_types; t; t = t->next)
9191                 add_tracer_options(tr, t);
9192 }
9193
9194 static void update_tracer_options(struct trace_array *tr)
9195 {
9196         mutex_lock(&trace_types_lock);
9197         tracer_options_updated = true;
9198         __update_tracer_options(tr);
9199         mutex_unlock(&trace_types_lock);
9200 }
9201
9202 /* Must have trace_types_lock held */
9203 struct trace_array *trace_array_find(const char *instance)
9204 {
9205         struct trace_array *tr, *found = NULL;
9206
9207         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9208                 if (tr->name && strcmp(tr->name, instance) == 0) {
9209                         found = tr;
9210                         break;
9211                 }
9212         }
9213
9214         return found;
9215 }
9216
9217 struct trace_array *trace_array_find_get(const char *instance)
9218 {
9219         struct trace_array *tr;
9220
9221         mutex_lock(&trace_types_lock);
9222         tr = trace_array_find(instance);
9223         if (tr)
9224                 tr->ref++;
9225         mutex_unlock(&trace_types_lock);
9226
9227         return tr;
9228 }
9229
9230 static int trace_array_create_dir(struct trace_array *tr)
9231 {
9232         int ret;
9233
9234         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9235         if (!tr->dir)
9236                 return -EINVAL;
9237
9238         ret = event_trace_add_tracer(tr->dir, tr);
9239         if (ret) {
9240                 tracefs_remove(tr->dir);
9241                 return ret;
9242         }
9243
9244         init_tracer_tracefs(tr, tr->dir);
9245         __update_tracer_options(tr);
9246
9247         return ret;
9248 }
9249
9250 static struct trace_array *
9251 trace_array_create_systems(const char *name, const char *systems)
9252 {
9253         struct trace_array *tr;
9254         int ret;
9255
9256         ret = -ENOMEM;
9257         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9258         if (!tr)
9259                 return ERR_PTR(ret);
9260
9261         tr->name = kstrdup(name, GFP_KERNEL);
9262         if (!tr->name)
9263                 goto out_free_tr;
9264
9265         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9266                 goto out_free_tr;
9267
9268         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9269                 goto out_free_tr;
9270
9271         if (systems) {
9272                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9273                 if (!tr->system_names)
9274                         goto out_free_tr;
9275         }
9276
9277         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9278
9279         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9280
9281         raw_spin_lock_init(&tr->start_lock);
9282
9283         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9284 #ifdef CONFIG_TRACER_MAX_TRACE
9285         spin_lock_init(&tr->snapshot_trigger_lock);
9286 #endif
9287         tr->current_trace = &nop_trace;
9288
9289         INIT_LIST_HEAD(&tr->systems);
9290         INIT_LIST_HEAD(&tr->events);
9291         INIT_LIST_HEAD(&tr->hist_vars);
9292         INIT_LIST_HEAD(&tr->err_log);
9293
9294         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9295                 goto out_free_tr;
9296
9297         /* The ring buffer is defaultly expanded */
9298         trace_set_ring_buffer_expanded(tr);
9299
9300         if (ftrace_allocate_ftrace_ops(tr) < 0)
9301                 goto out_free_tr;
9302
9303         ftrace_init_trace_array(tr);
9304
9305         init_trace_flags_index(tr);
9306
9307         if (trace_instance_dir) {
9308                 ret = trace_array_create_dir(tr);
9309                 if (ret)
9310                         goto out_free_tr;
9311         } else
9312                 __trace_early_add_events(tr);
9313
9314         list_add(&tr->list, &ftrace_trace_arrays);
9315
9316         tr->ref++;
9317
9318         return tr;
9319
9320  out_free_tr:
9321         ftrace_free_ftrace_ops(tr);
9322         free_trace_buffers(tr);
9323         free_cpumask_var(tr->pipe_cpumask);
9324         free_cpumask_var(tr->tracing_cpumask);
9325         kfree_const(tr->system_names);
9326         kfree(tr->name);
9327         kfree(tr);
9328
9329         return ERR_PTR(ret);
9330 }
9331
9332 static struct trace_array *trace_array_create(const char *name)
9333 {
9334         return trace_array_create_systems(name, NULL);
9335 }
9336
9337 static int instance_mkdir(const char *name)
9338 {
9339         struct trace_array *tr;
9340         int ret;
9341
9342         mutex_lock(&event_mutex);
9343         mutex_lock(&trace_types_lock);
9344
9345         ret = -EEXIST;
9346         if (trace_array_find(name))
9347                 goto out_unlock;
9348
9349         tr = trace_array_create(name);
9350
9351         ret = PTR_ERR_OR_ZERO(tr);
9352
9353 out_unlock:
9354         mutex_unlock(&trace_types_lock);
9355         mutex_unlock(&event_mutex);
9356         return ret;
9357 }
9358
9359 /**
9360  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9361  * @name: The name of the trace array to be looked up/created.
9362  * @systems: A list of systems to create event directories for (NULL for all)
9363  *
9364  * Returns pointer to trace array with given name.
9365  * NULL, if it cannot be created.
9366  *
9367  * NOTE: This function increments the reference counter associated with the
9368  * trace array returned. This makes sure it cannot be freed while in use.
9369  * Use trace_array_put() once the trace array is no longer needed.
9370  * If the trace_array is to be freed, trace_array_destroy() needs to
9371  * be called after the trace_array_put(), or simply let user space delete
9372  * it from the tracefs instances directory. But until the
9373  * trace_array_put() is called, user space can not delete it.
9374  *
9375  */
9376 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9377 {
9378         struct trace_array *tr;
9379
9380         mutex_lock(&event_mutex);
9381         mutex_lock(&trace_types_lock);
9382
9383         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9384                 if (tr->name && strcmp(tr->name, name) == 0)
9385                         goto out_unlock;
9386         }
9387
9388         tr = trace_array_create_systems(name, systems);
9389
9390         if (IS_ERR(tr))
9391                 tr = NULL;
9392 out_unlock:
9393         if (tr)
9394                 tr->ref++;
9395
9396         mutex_unlock(&trace_types_lock);
9397         mutex_unlock(&event_mutex);
9398         return tr;
9399 }
9400 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9401
9402 static int __remove_instance(struct trace_array *tr)
9403 {
9404         int i;
9405
9406         /* Reference counter for a newly created trace array = 1. */
9407         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9408                 return -EBUSY;
9409
9410         list_del(&tr->list);
9411
9412         /* Disable all the flags that were enabled coming in */
9413         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9414                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9415                         set_tracer_flag(tr, 1 << i, 0);
9416         }
9417
9418         tracing_set_nop(tr);
9419         clear_ftrace_function_probes(tr);
9420         event_trace_del_tracer(tr);
9421         ftrace_clear_pids(tr);
9422         ftrace_destroy_function_files(tr);
9423         tracefs_remove(tr->dir);
9424         free_percpu(tr->last_func_repeats);
9425         free_trace_buffers(tr);
9426         clear_tracing_err_log(tr);
9427
9428         for (i = 0; i < tr->nr_topts; i++) {
9429                 kfree(tr->topts[i].topts);
9430         }
9431         kfree(tr->topts);
9432
9433         free_cpumask_var(tr->pipe_cpumask);
9434         free_cpumask_var(tr->tracing_cpumask);
9435         kfree_const(tr->system_names);
9436         kfree(tr->name);
9437         kfree(tr);
9438
9439         return 0;
9440 }
9441
9442 int trace_array_destroy(struct trace_array *this_tr)
9443 {
9444         struct trace_array *tr;
9445         int ret;
9446
9447         if (!this_tr)
9448                 return -EINVAL;
9449
9450         mutex_lock(&event_mutex);
9451         mutex_lock(&trace_types_lock);
9452
9453         ret = -ENODEV;
9454
9455         /* Making sure trace array exists before destroying it. */
9456         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9457                 if (tr == this_tr) {
9458                         ret = __remove_instance(tr);
9459                         break;
9460                 }
9461         }
9462
9463         mutex_unlock(&trace_types_lock);
9464         mutex_unlock(&event_mutex);
9465
9466         return ret;
9467 }
9468 EXPORT_SYMBOL_GPL(trace_array_destroy);
9469
9470 static int instance_rmdir(const char *name)
9471 {
9472         struct trace_array *tr;
9473         int ret;
9474
9475         mutex_lock(&event_mutex);
9476         mutex_lock(&trace_types_lock);
9477
9478         ret = -ENODEV;
9479         tr = trace_array_find(name);
9480         if (tr)
9481                 ret = __remove_instance(tr);
9482
9483         mutex_unlock(&trace_types_lock);
9484         mutex_unlock(&event_mutex);
9485
9486         return ret;
9487 }
9488
9489 static __init void create_trace_instances(struct dentry *d_tracer)
9490 {
9491         struct trace_array *tr;
9492
9493         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9494                                                          instance_mkdir,
9495                                                          instance_rmdir);
9496         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9497                 return;
9498
9499         mutex_lock(&event_mutex);
9500         mutex_lock(&trace_types_lock);
9501
9502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9503                 if (!tr->name)
9504                         continue;
9505                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9506                              "Failed to create instance directory\n"))
9507                         break;
9508         }
9509
9510         mutex_unlock(&trace_types_lock);
9511         mutex_unlock(&event_mutex);
9512 }
9513
9514 static void
9515 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9516 {
9517         int cpu;
9518
9519         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9520                         tr, &show_traces_fops);
9521
9522         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9523                         tr, &set_tracer_fops);
9524
9525         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9526                           tr, &tracing_cpumask_fops);
9527
9528         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9529                           tr, &tracing_iter_fops);
9530
9531         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9532                           tr, &tracing_fops);
9533
9534         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9535                           tr, &tracing_pipe_fops);
9536
9537         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9538                           tr, &tracing_entries_fops);
9539
9540         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9541                           tr, &tracing_total_entries_fops);
9542
9543         trace_create_file("free_buffer", 0200, d_tracer,
9544                           tr, &tracing_free_buffer_fops);
9545
9546         trace_create_file("trace_marker", 0220, d_tracer,
9547                           tr, &tracing_mark_fops);
9548
9549         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9550
9551         trace_create_file("trace_marker_raw", 0220, d_tracer,
9552                           tr, &tracing_mark_raw_fops);
9553
9554         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9555                           &trace_clock_fops);
9556
9557         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9558                           tr, &rb_simple_fops);
9559
9560         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9561                           &trace_time_stamp_mode_fops);
9562
9563         tr->buffer_percent = 50;
9564
9565         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9566                         tr, &buffer_percent_fops);
9567
9568         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9569                           tr, &buffer_subbuf_size_fops);
9570
9571         create_trace_options_dir(tr);
9572
9573 #ifdef CONFIG_TRACER_MAX_TRACE
9574         trace_create_maxlat_file(tr, d_tracer);
9575 #endif
9576
9577         if (ftrace_create_function_files(tr, d_tracer))
9578                 MEM_FAIL(1, "Could not allocate function filter files");
9579
9580 #ifdef CONFIG_TRACER_SNAPSHOT
9581         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9582                           tr, &snapshot_fops);
9583 #endif
9584
9585         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9586                           tr, &tracing_err_log_fops);
9587
9588         for_each_tracing_cpu(cpu)
9589                 tracing_init_tracefs_percpu(tr, cpu);
9590
9591         ftrace_init_tracefs(tr, d_tracer);
9592 }
9593
9594 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9595 {
9596         struct vfsmount *mnt;
9597         struct file_system_type *type;
9598
9599         /*
9600          * To maintain backward compatibility for tools that mount
9601          * debugfs to get to the tracing facility, tracefs is automatically
9602          * mounted to the debugfs/tracing directory.
9603          */
9604         type = get_fs_type("tracefs");
9605         if (!type)
9606                 return NULL;
9607         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9608         put_filesystem(type);
9609         if (IS_ERR(mnt))
9610                 return NULL;
9611         mntget(mnt);
9612
9613         return mnt;
9614 }
9615
9616 /**
9617  * tracing_init_dentry - initialize top level trace array
9618  *
9619  * This is called when creating files or directories in the tracing
9620  * directory. It is called via fs_initcall() by any of the boot up code
9621  * and expects to return the dentry of the top level tracing directory.
9622  */
9623 int tracing_init_dentry(void)
9624 {
9625         struct trace_array *tr = &global_trace;
9626
9627         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9628                 pr_warn("Tracing disabled due to lockdown\n");
9629                 return -EPERM;
9630         }
9631
9632         /* The top level trace array uses  NULL as parent */
9633         if (tr->dir)
9634                 return 0;
9635
9636         if (WARN_ON(!tracefs_initialized()))
9637                 return -ENODEV;
9638
9639         /*
9640          * As there may still be users that expect the tracing
9641          * files to exist in debugfs/tracing, we must automount
9642          * the tracefs file system there, so older tools still
9643          * work with the newer kernel.
9644          */
9645         tr->dir = debugfs_create_automount("tracing", NULL,
9646                                            trace_automount, NULL);
9647
9648         return 0;
9649 }
9650
9651 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9652 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9653
9654 static struct workqueue_struct *eval_map_wq __initdata;
9655 static struct work_struct eval_map_work __initdata;
9656 static struct work_struct tracerfs_init_work __initdata;
9657
9658 static void __init eval_map_work_func(struct work_struct *work)
9659 {
9660         int len;
9661
9662         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9663         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9664 }
9665
9666 static int __init trace_eval_init(void)
9667 {
9668         INIT_WORK(&eval_map_work, eval_map_work_func);
9669
9670         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9671         if (!eval_map_wq) {
9672                 pr_err("Unable to allocate eval_map_wq\n");
9673                 /* Do work here */
9674                 eval_map_work_func(&eval_map_work);
9675                 return -ENOMEM;
9676         }
9677
9678         queue_work(eval_map_wq, &eval_map_work);
9679         return 0;
9680 }
9681
9682 subsys_initcall(trace_eval_init);
9683
9684 static int __init trace_eval_sync(void)
9685 {
9686         /* Make sure the eval map updates are finished */
9687         if (eval_map_wq)
9688                 destroy_workqueue(eval_map_wq);
9689         return 0;
9690 }
9691
9692 late_initcall_sync(trace_eval_sync);
9693
9694
9695 #ifdef CONFIG_MODULES
9696 static void trace_module_add_evals(struct module *mod)
9697 {
9698         if (!mod->num_trace_evals)
9699                 return;
9700
9701         /*
9702          * Modules with bad taint do not have events created, do
9703          * not bother with enums either.
9704          */
9705         if (trace_module_has_bad_taint(mod))
9706                 return;
9707
9708         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9709 }
9710
9711 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9712 static void trace_module_remove_evals(struct module *mod)
9713 {
9714         union trace_eval_map_item *map;
9715         union trace_eval_map_item **last = &trace_eval_maps;
9716
9717         if (!mod->num_trace_evals)
9718                 return;
9719
9720         mutex_lock(&trace_eval_mutex);
9721
9722         map = trace_eval_maps;
9723
9724         while (map) {
9725                 if (map->head.mod == mod)
9726                         break;
9727                 map = trace_eval_jmp_to_tail(map);
9728                 last = &map->tail.next;
9729                 map = map->tail.next;
9730         }
9731         if (!map)
9732                 goto out;
9733
9734         *last = trace_eval_jmp_to_tail(map)->tail.next;
9735         kfree(map);
9736  out:
9737         mutex_unlock(&trace_eval_mutex);
9738 }
9739 #else
9740 static inline void trace_module_remove_evals(struct module *mod) { }
9741 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9742
9743 static int trace_module_notify(struct notifier_block *self,
9744                                unsigned long val, void *data)
9745 {
9746         struct module *mod = data;
9747
9748         switch (val) {
9749         case MODULE_STATE_COMING:
9750                 trace_module_add_evals(mod);
9751                 break;
9752         case MODULE_STATE_GOING:
9753                 trace_module_remove_evals(mod);
9754                 break;
9755         }
9756
9757         return NOTIFY_OK;
9758 }
9759
9760 static struct notifier_block trace_module_nb = {
9761         .notifier_call = trace_module_notify,
9762         .priority = 0,
9763 };
9764 #endif /* CONFIG_MODULES */
9765
9766 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9767 {
9768
9769         event_trace_init();
9770
9771         init_tracer_tracefs(&global_trace, NULL);
9772         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9773
9774         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9775                         &global_trace, &tracing_thresh_fops);
9776
9777         trace_create_file("README", TRACE_MODE_READ, NULL,
9778                         NULL, &tracing_readme_fops);
9779
9780         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9781                         NULL, &tracing_saved_cmdlines_fops);
9782
9783         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9784                           NULL, &tracing_saved_cmdlines_size_fops);
9785
9786         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9787                         NULL, &tracing_saved_tgids_fops);
9788
9789         trace_create_eval_file(NULL);
9790
9791 #ifdef CONFIG_MODULES
9792         register_module_notifier(&trace_module_nb);
9793 #endif
9794
9795 #ifdef CONFIG_DYNAMIC_FTRACE
9796         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9797                         NULL, &tracing_dyn_info_fops);
9798 #endif
9799
9800         create_trace_instances(NULL);
9801
9802         update_tracer_options(&global_trace);
9803 }
9804
9805 static __init int tracer_init_tracefs(void)
9806 {
9807         int ret;
9808
9809         trace_access_lock_init();
9810
9811         ret = tracing_init_dentry();
9812         if (ret)
9813                 return 0;
9814
9815         if (eval_map_wq) {
9816                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9817                 queue_work(eval_map_wq, &tracerfs_init_work);
9818         } else {
9819                 tracer_init_tracefs_work_func(NULL);
9820         }
9821
9822         rv_init_interface();
9823
9824         return 0;
9825 }
9826
9827 fs_initcall(tracer_init_tracefs);
9828
9829 static int trace_die_panic_handler(struct notifier_block *self,
9830                                 unsigned long ev, void *unused);
9831
9832 static struct notifier_block trace_panic_notifier = {
9833         .notifier_call = trace_die_panic_handler,
9834         .priority = INT_MAX - 1,
9835 };
9836
9837 static struct notifier_block trace_die_notifier = {
9838         .notifier_call = trace_die_panic_handler,
9839         .priority = INT_MAX - 1,
9840 };
9841
9842 /*
9843  * The idea is to execute the following die/panic callback early, in order
9844  * to avoid showing irrelevant information in the trace (like other panic
9845  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9846  * warnings get disabled (to prevent potential log flooding).
9847  */
9848 static int trace_die_panic_handler(struct notifier_block *self,
9849                                 unsigned long ev, void *unused)
9850 {
9851         if (!ftrace_dump_on_oops_enabled())
9852                 return NOTIFY_DONE;
9853
9854         /* The die notifier requires DIE_OOPS to trigger */
9855         if (self == &trace_die_notifier && ev != DIE_OOPS)
9856                 return NOTIFY_DONE;
9857
9858         ftrace_dump(DUMP_PARAM);
9859
9860         return NOTIFY_DONE;
9861 }
9862
9863 /*
9864  * printk is set to max of 1024, we really don't need it that big.
9865  * Nothing should be printing 1000 characters anyway.
9866  */
9867 #define TRACE_MAX_PRINT         1000
9868
9869 /*
9870  * Define here KERN_TRACE so that we have one place to modify
9871  * it if we decide to change what log level the ftrace dump
9872  * should be at.
9873  */
9874 #define KERN_TRACE              KERN_EMERG
9875
9876 void
9877 trace_printk_seq(struct trace_seq *s)
9878 {
9879         /* Probably should print a warning here. */
9880         if (s->seq.len >= TRACE_MAX_PRINT)
9881                 s->seq.len = TRACE_MAX_PRINT;
9882
9883         /*
9884          * More paranoid code. Although the buffer size is set to
9885          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9886          * an extra layer of protection.
9887          */
9888         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9889                 s->seq.len = s->seq.size - 1;
9890
9891         /* should be zero ended, but we are paranoid. */
9892         s->buffer[s->seq.len] = 0;
9893
9894         printk(KERN_TRACE "%s", s->buffer);
9895
9896         trace_seq_init(s);
9897 }
9898
9899 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9900 {
9901         iter->tr = tr;
9902         iter->trace = iter->tr->current_trace;
9903         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9904         iter->array_buffer = &tr->array_buffer;
9905
9906         if (iter->trace && iter->trace->open)
9907                 iter->trace->open(iter);
9908
9909         /* Annotate start of buffers if we had overruns */
9910         if (ring_buffer_overruns(iter->array_buffer->buffer))
9911                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9912
9913         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9914         if (trace_clocks[iter->tr->clock_id].in_ns)
9915                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9916
9917         /* Can not use kmalloc for iter.temp and iter.fmt */
9918         iter->temp = static_temp_buf;
9919         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9920         iter->fmt = static_fmt_buf;
9921         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9922 }
9923
9924 void trace_init_global_iter(struct trace_iterator *iter)
9925 {
9926         trace_init_iter(iter, &global_trace);
9927 }
9928
9929 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
9930 {
9931         /* use static because iter can be a bit big for the stack */
9932         static struct trace_iterator iter;
9933         unsigned int old_userobj;
9934         unsigned long flags;
9935         int cnt = 0, cpu;
9936
9937         /*
9938          * Always turn off tracing when we dump.
9939          * We don't need to show trace output of what happens
9940          * between multiple crashes.
9941          *
9942          * If the user does a sysrq-z, then they can re-enable
9943          * tracing with echo 1 > tracing_on.
9944          */
9945         tracer_tracing_off(tr);
9946
9947         local_irq_save(flags);
9948
9949         /* Simulate the iterator */
9950         trace_init_iter(&iter, tr);
9951
9952         for_each_tracing_cpu(cpu) {
9953                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9954         }
9955
9956         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9957
9958         /* don't look at user memory in panic mode */
9959         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9960
9961         if (dump_mode == DUMP_ORIG)
9962                 iter.cpu_file = raw_smp_processor_id();
9963         else
9964                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9965
9966         if (tr == &global_trace)
9967                 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9968         else
9969                 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
9970
9971         /* Did function tracer already get disabled? */
9972         if (ftrace_is_dead()) {
9973                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9974                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9975         }
9976
9977         /*
9978          * We need to stop all tracing on all CPUS to read
9979          * the next buffer. This is a bit expensive, but is
9980          * not done often. We fill all what we can read,
9981          * and then release the locks again.
9982          */
9983
9984         while (!trace_empty(&iter)) {
9985
9986                 if (!cnt)
9987                         printk(KERN_TRACE "---------------------------------\n");
9988
9989                 cnt++;
9990
9991                 trace_iterator_reset(&iter);
9992                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9993
9994                 if (trace_find_next_entry_inc(&iter) != NULL) {
9995                         int ret;
9996
9997                         ret = print_trace_line(&iter);
9998                         if (ret != TRACE_TYPE_NO_CONSUME)
9999                                 trace_consume(&iter);
10000                 }
10001                 touch_nmi_watchdog();
10002
10003                 trace_printk_seq(&iter.seq);
10004         }
10005
10006         if (!cnt)
10007                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10008         else
10009                 printk(KERN_TRACE "---------------------------------\n");
10010
10011         tr->trace_flags |= old_userobj;
10012
10013         for_each_tracing_cpu(cpu) {
10014                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10015         }
10016         local_irq_restore(flags);
10017 }
10018
10019 static void ftrace_dump_by_param(void)
10020 {
10021         bool first_param = true;
10022         char dump_param[MAX_TRACER_SIZE];
10023         char *buf, *token, *inst_name;
10024         struct trace_array *tr;
10025
10026         strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10027         buf = dump_param;
10028
10029         while ((token = strsep(&buf, ",")) != NULL) {
10030                 if (first_param) {
10031                         first_param = false;
10032                         if (!strcmp("0", token))
10033                                 continue;
10034                         else if (!strcmp("1", token)) {
10035                                 ftrace_dump_one(&global_trace, DUMP_ALL);
10036                                 continue;
10037                         }
10038                         else if (!strcmp("2", token) ||
10039                           !strcmp("orig_cpu", token)) {
10040                                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10041                                 continue;
10042                         }
10043                 }
10044
10045                 inst_name = strsep(&token, "=");
10046                 tr = trace_array_find(inst_name);
10047                 if (!tr) {
10048                         printk(KERN_TRACE "Instance %s not found\n", inst_name);
10049                         continue;
10050                 }
10051
10052                 if (token && (!strcmp("2", token) ||
10053                           !strcmp("orig_cpu", token)))
10054                         ftrace_dump_one(tr, DUMP_ORIG);
10055                 else
10056                         ftrace_dump_one(tr, DUMP_ALL);
10057         }
10058 }
10059
10060 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10061 {
10062         static atomic_t dump_running;
10063
10064         /* Only allow one dump user at a time. */
10065         if (atomic_inc_return(&dump_running) != 1) {
10066                 atomic_dec(&dump_running);
10067                 return;
10068         }
10069
10070         switch (oops_dump_mode) {
10071         case DUMP_ALL:
10072                 ftrace_dump_one(&global_trace, DUMP_ALL);
10073                 break;
10074         case DUMP_ORIG:
10075                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10076                 break;
10077         case DUMP_PARAM:
10078                 ftrace_dump_by_param();
10079                 break;
10080         case DUMP_NONE:
10081                 break;
10082         default:
10083                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10084                 ftrace_dump_one(&global_trace, DUMP_ALL);
10085         }
10086
10087         atomic_dec(&dump_running);
10088 }
10089 EXPORT_SYMBOL_GPL(ftrace_dump);
10090
10091 #define WRITE_BUFSIZE  4096
10092
10093 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10094                                 size_t count, loff_t *ppos,
10095                                 int (*createfn)(const char *))
10096 {
10097         char *kbuf, *buf, *tmp;
10098         int ret = 0;
10099         size_t done = 0;
10100         size_t size;
10101
10102         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10103         if (!kbuf)
10104                 return -ENOMEM;
10105
10106         while (done < count) {
10107                 size = count - done;
10108
10109                 if (size >= WRITE_BUFSIZE)
10110                         size = WRITE_BUFSIZE - 1;
10111
10112                 if (copy_from_user(kbuf, buffer + done, size)) {
10113                         ret = -EFAULT;
10114                         goto out;
10115                 }
10116                 kbuf[size] = '\0';
10117                 buf = kbuf;
10118                 do {
10119                         tmp = strchr(buf, '\n');
10120                         if (tmp) {
10121                                 *tmp = '\0';
10122                                 size = tmp - buf + 1;
10123                         } else {
10124                                 size = strlen(buf);
10125                                 if (done + size < count) {
10126                                         if (buf != kbuf)
10127                                                 break;
10128                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10129                                         pr_warn("Line length is too long: Should be less than %d\n",
10130                                                 WRITE_BUFSIZE - 2);
10131                                         ret = -EINVAL;
10132                                         goto out;
10133                                 }
10134                         }
10135                         done += size;
10136
10137                         /* Remove comments */
10138                         tmp = strchr(buf, '#');
10139
10140                         if (tmp)
10141                                 *tmp = '\0';
10142
10143                         ret = createfn(buf);
10144                         if (ret)
10145                                 goto out;
10146                         buf += size;
10147
10148                 } while (done < count);
10149         }
10150         ret = done;
10151
10152 out:
10153         kfree(kbuf);
10154
10155         return ret;
10156 }
10157
10158 #ifdef CONFIG_TRACER_MAX_TRACE
10159 __init static bool tr_needs_alloc_snapshot(const char *name)
10160 {
10161         char *test;
10162         int len = strlen(name);
10163         bool ret;
10164
10165         if (!boot_snapshot_index)
10166                 return false;
10167
10168         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10169             boot_snapshot_info[len] == '\t')
10170                 return true;
10171
10172         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10173         if (!test)
10174                 return false;
10175
10176         sprintf(test, "\t%s\t", name);
10177         ret = strstr(boot_snapshot_info, test) == NULL;
10178         kfree(test);
10179         return ret;
10180 }
10181
10182 __init static void do_allocate_snapshot(const char *name)
10183 {
10184         if (!tr_needs_alloc_snapshot(name))
10185                 return;
10186
10187         /*
10188          * When allocate_snapshot is set, the next call to
10189          * allocate_trace_buffers() (called by trace_array_get_by_name())
10190          * will allocate the snapshot buffer. That will alse clear
10191          * this flag.
10192          */
10193         allocate_snapshot = true;
10194 }
10195 #else
10196 static inline void do_allocate_snapshot(const char *name) { }
10197 #endif
10198
10199 __init static void enable_instances(void)
10200 {
10201         struct trace_array *tr;
10202         char *curr_str;
10203         char *str;
10204         char *tok;
10205
10206         /* A tab is always appended */
10207         boot_instance_info[boot_instance_index - 1] = '\0';
10208         str = boot_instance_info;
10209
10210         while ((curr_str = strsep(&str, "\t"))) {
10211
10212                 tok = strsep(&curr_str, ",");
10213
10214                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10215                         do_allocate_snapshot(tok);
10216
10217                 tr = trace_array_get_by_name(tok, NULL);
10218                 if (!tr) {
10219                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10220                         continue;
10221                 }
10222                 /* Allow user space to delete it */
10223                 trace_array_put(tr);
10224
10225                 while ((tok = strsep(&curr_str, ","))) {
10226                         early_enable_events(tr, tok, true);
10227                 }
10228         }
10229 }
10230
10231 __init static int tracer_alloc_buffers(void)
10232 {
10233         int ring_buf_size;
10234         int ret = -ENOMEM;
10235
10236
10237         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10238                 pr_warn("Tracing disabled due to lockdown\n");
10239                 return -EPERM;
10240         }
10241
10242         /*
10243          * Make sure we don't accidentally add more trace options
10244          * than we have bits for.
10245          */
10246         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10247
10248         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10249                 goto out;
10250
10251         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10252                 goto out_free_buffer_mask;
10253
10254         /* Only allocate trace_printk buffers if a trace_printk exists */
10255         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10256                 /* Must be called before global_trace.buffer is allocated */
10257                 trace_printk_init_buffers();
10258
10259         /* To save memory, keep the ring buffer size to its minimum */
10260         if (global_trace.ring_buffer_expanded)
10261                 ring_buf_size = trace_buf_size;
10262         else
10263                 ring_buf_size = 1;
10264
10265         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10266         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10267
10268         raw_spin_lock_init(&global_trace.start_lock);
10269
10270         /*
10271          * The prepare callbacks allocates some memory for the ring buffer. We
10272          * don't free the buffer if the CPU goes down. If we were to free
10273          * the buffer, then the user would lose any trace that was in the
10274          * buffer. The memory will be removed once the "instance" is removed.
10275          */
10276         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10277                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10278                                       NULL);
10279         if (ret < 0)
10280                 goto out_free_cpumask;
10281         /* Used for event triggers */
10282         ret = -ENOMEM;
10283         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10284         if (!temp_buffer)
10285                 goto out_rm_hp_state;
10286
10287         if (trace_create_savedcmd() < 0)
10288                 goto out_free_temp_buffer;
10289
10290         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10291                 goto out_free_savedcmd;
10292
10293         /* TODO: make the number of buffers hot pluggable with CPUS */
10294         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10295                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10296                 goto out_free_pipe_cpumask;
10297         }
10298         if (global_trace.buffer_disabled)
10299                 tracing_off();
10300
10301         if (trace_boot_clock) {
10302                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10303                 if (ret < 0)
10304                         pr_warn("Trace clock %s not defined, going back to default\n",
10305                                 trace_boot_clock);
10306         }
10307
10308         /*
10309          * register_tracer() might reference current_trace, so it
10310          * needs to be set before we register anything. This is
10311          * just a bootstrap of current_trace anyway.
10312          */
10313         global_trace.current_trace = &nop_trace;
10314
10315         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10316 #ifdef CONFIG_TRACER_MAX_TRACE
10317         spin_lock_init(&global_trace.snapshot_trigger_lock);
10318 #endif
10319         ftrace_init_global_array_ops(&global_trace);
10320
10321         init_trace_flags_index(&global_trace);
10322
10323         register_tracer(&nop_trace);
10324
10325         /* Function tracing may start here (via kernel command line) */
10326         init_function_trace();
10327
10328         /* All seems OK, enable tracing */
10329         tracing_disabled = 0;
10330
10331         atomic_notifier_chain_register(&panic_notifier_list,
10332                                        &trace_panic_notifier);
10333
10334         register_die_notifier(&trace_die_notifier);
10335
10336         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10337
10338         INIT_LIST_HEAD(&global_trace.systems);
10339         INIT_LIST_HEAD(&global_trace.events);
10340         INIT_LIST_HEAD(&global_trace.hist_vars);
10341         INIT_LIST_HEAD(&global_trace.err_log);
10342         list_add(&global_trace.list, &ftrace_trace_arrays);
10343
10344         apply_trace_boot_options();
10345
10346         register_snapshot_cmd();
10347
10348         test_can_verify();
10349
10350         return 0;
10351
10352 out_free_pipe_cpumask:
10353         free_cpumask_var(global_trace.pipe_cpumask);
10354 out_free_savedcmd:
10355         trace_free_saved_cmdlines_buffer();
10356 out_free_temp_buffer:
10357         ring_buffer_free(temp_buffer);
10358 out_rm_hp_state:
10359         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10360 out_free_cpumask:
10361         free_cpumask_var(global_trace.tracing_cpumask);
10362 out_free_buffer_mask:
10363         free_cpumask_var(tracing_buffer_mask);
10364 out:
10365         return ret;
10366 }
10367
10368 void __init ftrace_boot_snapshot(void)
10369 {
10370 #ifdef CONFIG_TRACER_MAX_TRACE
10371         struct trace_array *tr;
10372
10373         if (!snapshot_at_boot)
10374                 return;
10375
10376         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10377                 if (!tr->allocated_snapshot)
10378                         continue;
10379
10380                 tracing_snapshot_instance(tr);
10381                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10382         }
10383 #endif
10384 }
10385
10386 void __init early_trace_init(void)
10387 {
10388         if (tracepoint_printk) {
10389                 tracepoint_print_iter =
10390                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10391                 if (MEM_FAIL(!tracepoint_print_iter,
10392                              "Failed to allocate trace iterator\n"))
10393                         tracepoint_printk = 0;
10394                 else
10395                         static_key_enable(&tracepoint_printk_key.key);
10396         }
10397         tracer_alloc_buffers();
10398
10399         init_events();
10400 }
10401
10402 void __init trace_init(void)
10403 {
10404         trace_event_init();
10405
10406         if (boot_instance_index)
10407                 enable_instances();
10408 }
10409
10410 __init static void clear_boot_tracer(void)
10411 {
10412         /*
10413          * The default tracer at boot buffer is an init section.
10414          * This function is called in lateinit. If we did not
10415          * find the boot tracer, then clear it out, to prevent
10416          * later registration from accessing the buffer that is
10417          * about to be freed.
10418          */
10419         if (!default_bootup_tracer)
10420                 return;
10421
10422         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10423                default_bootup_tracer);
10424         default_bootup_tracer = NULL;
10425 }
10426
10427 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10428 __init static void tracing_set_default_clock(void)
10429 {
10430         /* sched_clock_stable() is determined in late_initcall */
10431         if (!trace_boot_clock && !sched_clock_stable()) {
10432                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10433                         pr_warn("Can not set tracing clock due to lockdown\n");
10434                         return;
10435                 }
10436
10437                 printk(KERN_WARNING
10438                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10439                        "If you want to keep using the local clock, then add:\n"
10440                        "  \"trace_clock=local\"\n"
10441                        "on the kernel command line\n");
10442                 tracing_set_clock(&global_trace, "global");
10443         }
10444 }
10445 #else
10446 static inline void tracing_set_default_clock(void) { }
10447 #endif
10448
10449 __init static int late_trace_init(void)
10450 {
10451         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10452                 static_key_disable(&tracepoint_printk_key.key);
10453                 tracepoint_printk = 0;
10454         }
10455
10456         tracing_set_default_clock();
10457         clear_boot_tracer();
10458         return 0;
10459 }
10460
10461 late_initcall_sync(late_trace_init);