81801dc31784957741d98650fe9e634203bfe298
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77
78 #ifdef CONFIG_FTRACE_STARTUP_TEST
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #endif
87
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
96         { }
97 };
98
99 static int
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
101 {
102         return 0;
103 }
104
105 /*
106  * To prevent the comm cache from being overwritten when no
107  * tracing is active, only save the comm when a trace event
108  * occurred.
109  */
110 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
111
112 /*
113  * Kill all tracing for good (never come back).
114  * It is initialized to 1 but will turn to zero if the initialization
115  * of the tracer is successful. But that is the only place that sets
116  * this back to zero.
117  */
118 static int tracing_disabled = 1;
119
120 cpumask_var_t __read_mostly     tracing_buffer_mask;
121
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  */
137
138 enum ftrace_dump_mode ftrace_dump_on_oops;
139
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149
150 union trace_eval_map_item;
151
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160
161 static DEFINE_MUTEX(trace_eval_mutex);
162
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183
184 #define MAX_TRACER_SIZE         100
185 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
186 static char *default_bootup_tracer;
187
188 static bool allocate_snapshot;
189 static bool snapshot_at_boot;
190
191 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_instance_index;
193
194 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_snapshot_index;
196
197 static int __init set_cmdline_ftrace(char *str)
198 {
199         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
200         default_bootup_tracer = bootup_tracer_buf;
201         /* We are using ftrace early, expand it */
202         ring_buffer_expanded = true;
203         return 1;
204 }
205 __setup("ftrace=", set_cmdline_ftrace);
206
207 static int __init set_ftrace_dump_on_oops(char *str)
208 {
209         if (*str++ != '=' || !*str || !strcmp("1", str)) {
210                 ftrace_dump_on_oops = DUMP_ALL;
211                 return 1;
212         }
213
214         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
215                 ftrace_dump_on_oops = DUMP_ORIG;
216                 return 1;
217         }
218
219         return 0;
220 }
221 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
222
223 static int __init stop_trace_on_warning(char *str)
224 {
225         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
226                 __disable_trace_on_warning = 1;
227         return 1;
228 }
229 __setup("traceoff_on_warning", stop_trace_on_warning);
230
231 static int __init boot_alloc_snapshot(char *str)
232 {
233         char *slot = boot_snapshot_info + boot_snapshot_index;
234         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
235         int ret;
236
237         if (str[0] == '=') {
238                 str++;
239                 if (strlen(str) >= left)
240                         return -1;
241
242                 ret = snprintf(slot, left, "%s\t", str);
243                 boot_snapshot_index += ret;
244         } else {
245                 allocate_snapshot = true;
246                 /* We also need the main ring buffer expanded */
247                 ring_buffer_expanded = true;
248         }
249         return 1;
250 }
251 __setup("alloc_snapshot", boot_alloc_snapshot);
252
253
254 static int __init boot_snapshot(char *str)
255 {
256         snapshot_at_boot = true;
257         boot_alloc_snapshot(str);
258         return 1;
259 }
260 __setup("ftrace_boot_snapshot", boot_snapshot);
261
262
263 static int __init boot_instance(char *str)
264 {
265         char *slot = boot_instance_info + boot_instance_index;
266         int left = sizeof(boot_instance_info) - boot_instance_index;
267         int ret;
268
269         if (strlen(str) >= left)
270                 return -1;
271
272         ret = snprintf(slot, left, "%s\t", str);
273         boot_instance_index += ret;
274
275         return 1;
276 }
277 __setup("trace_instance=", boot_instance);
278
279
280 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
281
282 static int __init set_trace_boot_options(char *str)
283 {
284         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
285         return 1;
286 }
287 __setup("trace_options=", set_trace_boot_options);
288
289 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
290 static char *trace_boot_clock __initdata;
291
292 static int __init set_trace_boot_clock(char *str)
293 {
294         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
295         trace_boot_clock = trace_boot_clock_buf;
296         return 1;
297 }
298 __setup("trace_clock=", set_trace_boot_clock);
299
300 static int __init set_tracepoint_printk(char *str)
301 {
302         /* Ignore the "tp_printk_stop_on_boot" param */
303         if (*str == '_')
304                 return 0;
305
306         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
307                 tracepoint_printk = 1;
308         return 1;
309 }
310 __setup("tp_printk", set_tracepoint_printk);
311
312 static int __init set_tracepoint_printk_stop(char *str)
313 {
314         tracepoint_printk_stop_on_boot = true;
315         return 1;
316 }
317 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
318
319 unsigned long long ns2usecs(u64 nsec)
320 {
321         nsec += 500;
322         do_div(nsec, 1000);
323         return nsec;
324 }
325
326 static void
327 trace_process_export(struct trace_export *export,
328                struct ring_buffer_event *event, int flag)
329 {
330         struct trace_entry *entry;
331         unsigned int size = 0;
332
333         if (export->flags & flag) {
334                 entry = ring_buffer_event_data(event);
335                 size = ring_buffer_event_length(event);
336                 export->write(export, entry, size);
337         }
338 }
339
340 static DEFINE_MUTEX(ftrace_export_lock);
341
342 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
343
344 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
345 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
346 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
347
348 static inline void ftrace_exports_enable(struct trace_export *export)
349 {
350         if (export->flags & TRACE_EXPORT_FUNCTION)
351                 static_branch_inc(&trace_function_exports_enabled);
352
353         if (export->flags & TRACE_EXPORT_EVENT)
354                 static_branch_inc(&trace_event_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_MARKER)
357                 static_branch_inc(&trace_marker_exports_enabled);
358 }
359
360 static inline void ftrace_exports_disable(struct trace_export *export)
361 {
362         if (export->flags & TRACE_EXPORT_FUNCTION)
363                 static_branch_dec(&trace_function_exports_enabled);
364
365         if (export->flags & TRACE_EXPORT_EVENT)
366                 static_branch_dec(&trace_event_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_MARKER)
369                 static_branch_dec(&trace_marker_exports_enabled);
370 }
371
372 static void ftrace_exports(struct ring_buffer_event *event, int flag)
373 {
374         struct trace_export *export;
375
376         preempt_disable_notrace();
377
378         export = rcu_dereference_raw_check(ftrace_exports_list);
379         while (export) {
380                 trace_process_export(export, event, flag);
381                 export = rcu_dereference_raw_check(export->next);
382         }
383
384         preempt_enable_notrace();
385 }
386
387 static inline void
388 add_trace_export(struct trace_export **list, struct trace_export *export)
389 {
390         rcu_assign_pointer(export->next, *list);
391         /*
392          * We are entering export into the list but another
393          * CPU might be walking that list. We need to make sure
394          * the export->next pointer is valid before another CPU sees
395          * the export pointer included into the list.
396          */
397         rcu_assign_pointer(*list, export);
398 }
399
400 static inline int
401 rm_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403         struct trace_export **p;
404
405         for (p = list; *p != NULL; p = &(*p)->next)
406                 if (*p == export)
407                         break;
408
409         if (*p != export)
410                 return -1;
411
412         rcu_assign_pointer(*p, (*p)->next);
413
414         return 0;
415 }
416
417 static inline void
418 add_ftrace_export(struct trace_export **list, struct trace_export *export)
419 {
420         ftrace_exports_enable(export);
421
422         add_trace_export(list, export);
423 }
424
425 static inline int
426 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
427 {
428         int ret;
429
430         ret = rm_trace_export(list, export);
431         ftrace_exports_disable(export);
432
433         return ret;
434 }
435
436 int register_ftrace_export(struct trace_export *export)
437 {
438         if (WARN_ON_ONCE(!export->write))
439                 return -1;
440
441         mutex_lock(&ftrace_export_lock);
442
443         add_ftrace_export(&ftrace_exports_list, export);
444
445         mutex_unlock(&ftrace_export_lock);
446
447         return 0;
448 }
449 EXPORT_SYMBOL_GPL(register_ftrace_export);
450
451 int unregister_ftrace_export(struct trace_export *export)
452 {
453         int ret;
454
455         mutex_lock(&ftrace_export_lock);
456
457         ret = rm_ftrace_export(&ftrace_exports_list, export);
458
459         mutex_unlock(&ftrace_export_lock);
460
461         return ret;
462 }
463 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
464
465 /* trace_flags holds trace_options default values */
466 #define TRACE_DEFAULT_FLAGS                                             \
467         (FUNCTION_DEFAULT_FLAGS |                                       \
468          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
469          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
470          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
471          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
472          TRACE_ITER_HASH_PTR)
473
474 /* trace_options that are only supported by global_trace */
475 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
476                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
477
478 /* trace_flags that are default zero for instances */
479 #define ZEROED_TRACE_FLAGS \
480         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
481
482 /*
483  * The global_trace is the descriptor that holds the top-level tracing
484  * buffers for the live tracing.
485  */
486 static struct trace_array global_trace = {
487         .trace_flags = TRACE_DEFAULT_FLAGS,
488 };
489
490 LIST_HEAD(ftrace_trace_arrays);
491
492 int trace_array_get(struct trace_array *this_tr)
493 {
494         struct trace_array *tr;
495         int ret = -ENODEV;
496
497         mutex_lock(&trace_types_lock);
498         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
499                 if (tr == this_tr) {
500                         tr->ref++;
501                         ret = 0;
502                         break;
503                 }
504         }
505         mutex_unlock(&trace_types_lock);
506
507         return ret;
508 }
509
510 static void __trace_array_put(struct trace_array *this_tr)
511 {
512         WARN_ON(!this_tr->ref);
513         this_tr->ref--;
514 }
515
516 /**
517  * trace_array_put - Decrement the reference counter for this trace array.
518  * @this_tr : pointer to the trace array
519  *
520  * NOTE: Use this when we no longer need the trace array returned by
521  * trace_array_get_by_name(). This ensures the trace array can be later
522  * destroyed.
523  *
524  */
525 void trace_array_put(struct trace_array *this_tr)
526 {
527         if (!this_tr)
528                 return;
529
530         mutex_lock(&trace_types_lock);
531         __trace_array_put(this_tr);
532         mutex_unlock(&trace_types_lock);
533 }
534 EXPORT_SYMBOL_GPL(trace_array_put);
535
536 int tracing_check_open_get_tr(struct trace_array *tr)
537 {
538         int ret;
539
540         ret = security_locked_down(LOCKDOWN_TRACEFS);
541         if (ret)
542                 return ret;
543
544         if (tracing_disabled)
545                 return -ENODEV;
546
547         if (tr && trace_array_get(tr) < 0)
548                 return -ENODEV;
549
550         return 0;
551 }
552
553 int call_filter_check_discard(struct trace_event_call *call, void *rec,
554                               struct trace_buffer *buffer,
555                               struct ring_buffer_event *event)
556 {
557         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
558             !filter_match_preds(call->filter, rec)) {
559                 __trace_event_discard_commit(buffer, event);
560                 return 1;
561         }
562
563         return 0;
564 }
565
566 /**
567  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
568  * @filtered_pids: The list of pids to check
569  * @search_pid: The PID to find in @filtered_pids
570  *
571  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
572  */
573 bool
574 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
575 {
576         return trace_pid_list_is_set(filtered_pids, search_pid);
577 }
578
579 /**
580  * trace_ignore_this_task - should a task be ignored for tracing
581  * @filtered_pids: The list of pids to check
582  * @filtered_no_pids: The list of pids not to be traced
583  * @task: The task that should be ignored if not filtered
584  *
585  * Checks if @task should be traced or not from @filtered_pids.
586  * Returns true if @task should *NOT* be traced.
587  * Returns false if @task should be traced.
588  */
589 bool
590 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
591                        struct trace_pid_list *filtered_no_pids,
592                        struct task_struct *task)
593 {
594         /*
595          * If filtered_no_pids is not empty, and the task's pid is listed
596          * in filtered_no_pids, then return true.
597          * Otherwise, if filtered_pids is empty, that means we can
598          * trace all tasks. If it has content, then only trace pids
599          * within filtered_pids.
600          */
601
602         return (filtered_pids &&
603                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
604                 (filtered_no_pids &&
605                  trace_find_filtered_pid(filtered_no_pids, task->pid));
606 }
607
608 /**
609  * trace_filter_add_remove_task - Add or remove a task from a pid_list
610  * @pid_list: The list to modify
611  * @self: The current task for fork or NULL for exit
612  * @task: The task to add or remove
613  *
614  * If adding a task, if @self is defined, the task is only added if @self
615  * is also included in @pid_list. This happens on fork and tasks should
616  * only be added when the parent is listed. If @self is NULL, then the
617  * @task pid will be removed from the list, which would happen on exit
618  * of a task.
619  */
620 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
621                                   struct task_struct *self,
622                                   struct task_struct *task)
623 {
624         if (!pid_list)
625                 return;
626
627         /* For forks, we only add if the forking task is listed */
628         if (self) {
629                 if (!trace_find_filtered_pid(pid_list, self->pid))
630                         return;
631         }
632
633         /* "self" is set for forks, and NULL for exits */
634         if (self)
635                 trace_pid_list_set(pid_list, task->pid);
636         else
637                 trace_pid_list_clear(pid_list, task->pid);
638 }
639
640 /**
641  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
642  * @pid_list: The pid list to show
643  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
644  * @pos: The position of the file
645  *
646  * This is used by the seq_file "next" operation to iterate the pids
647  * listed in a trace_pid_list structure.
648  *
649  * Returns the pid+1 as we want to display pid of zero, but NULL would
650  * stop the iteration.
651  */
652 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
653 {
654         long pid = (unsigned long)v;
655         unsigned int next;
656
657         (*pos)++;
658
659         /* pid already is +1 of the actual previous bit */
660         if (trace_pid_list_next(pid_list, pid, &next) < 0)
661                 return NULL;
662
663         pid = next;
664
665         /* Return pid + 1 to allow zero to be represented */
666         return (void *)(pid + 1);
667 }
668
669 /**
670  * trace_pid_start - Used for seq_file to start reading pid lists
671  * @pid_list: The pid list to show
672  * @pos: The position of the file
673  *
674  * This is used by seq_file "start" operation to start the iteration
675  * of listing pids.
676  *
677  * Returns the pid+1 as we want to display pid of zero, but NULL would
678  * stop the iteration.
679  */
680 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
681 {
682         unsigned long pid;
683         unsigned int first;
684         loff_t l = 0;
685
686         if (trace_pid_list_first(pid_list, &first) < 0)
687                 return NULL;
688
689         pid = first;
690
691         /* Return pid + 1 so that zero can be the exit value */
692         for (pid++; pid && l < *pos;
693              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
694                 ;
695         return (void *)pid;
696 }
697
698 /**
699  * trace_pid_show - show the current pid in seq_file processing
700  * @m: The seq_file structure to write into
701  * @v: A void pointer of the pid (+1) value to display
702  *
703  * Can be directly used by seq_file operations to display the current
704  * pid value.
705  */
706 int trace_pid_show(struct seq_file *m, void *v)
707 {
708         unsigned long pid = (unsigned long)v - 1;
709
710         seq_printf(m, "%lu\n", pid);
711         return 0;
712 }
713
714 /* 128 should be much more than enough */
715 #define PID_BUF_SIZE            127
716
717 int trace_pid_write(struct trace_pid_list *filtered_pids,
718                     struct trace_pid_list **new_pid_list,
719                     const char __user *ubuf, size_t cnt)
720 {
721         struct trace_pid_list *pid_list;
722         struct trace_parser parser;
723         unsigned long val;
724         int nr_pids = 0;
725         ssize_t read = 0;
726         ssize_t ret;
727         loff_t pos;
728         pid_t pid;
729
730         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
731                 return -ENOMEM;
732
733         /*
734          * Always recreate a new array. The write is an all or nothing
735          * operation. Always create a new array when adding new pids by
736          * the user. If the operation fails, then the current list is
737          * not modified.
738          */
739         pid_list = trace_pid_list_alloc();
740         if (!pid_list) {
741                 trace_parser_put(&parser);
742                 return -ENOMEM;
743         }
744
745         if (filtered_pids) {
746                 /* copy the current bits to the new max */
747                 ret = trace_pid_list_first(filtered_pids, &pid);
748                 while (!ret) {
749                         trace_pid_list_set(pid_list, pid);
750                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
751                         nr_pids++;
752                 }
753         }
754
755         ret = 0;
756         while (cnt > 0) {
757
758                 pos = 0;
759
760                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
761                 if (ret < 0)
762                         break;
763
764                 read += ret;
765                 ubuf += ret;
766                 cnt -= ret;
767
768                 if (!trace_parser_loaded(&parser))
769                         break;
770
771                 ret = -EINVAL;
772                 if (kstrtoul(parser.buffer, 0, &val))
773                         break;
774
775                 pid = (pid_t)val;
776
777                 if (trace_pid_list_set(pid_list, pid) < 0) {
778                         ret = -1;
779                         break;
780                 }
781                 nr_pids++;
782
783                 trace_parser_clear(&parser);
784                 ret = 0;
785         }
786         trace_parser_put(&parser);
787
788         if (ret < 0) {
789                 trace_pid_list_free(pid_list);
790                 return ret;
791         }
792
793         if (!nr_pids) {
794                 /* Cleared the list of pids */
795                 trace_pid_list_free(pid_list);
796                 pid_list = NULL;
797         }
798
799         *new_pid_list = pid_list;
800
801         return read;
802 }
803
804 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
805 {
806         u64 ts;
807
808         /* Early boot up does not have a buffer yet */
809         if (!buf->buffer)
810                 return trace_clock_local();
811
812         ts = ring_buffer_time_stamp(buf->buffer);
813         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
814
815         return ts;
816 }
817
818 u64 ftrace_now(int cpu)
819 {
820         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
821 }
822
823 /**
824  * tracing_is_enabled - Show if global_trace has been enabled
825  *
826  * Shows if the global trace has been enabled or not. It uses the
827  * mirror flag "buffer_disabled" to be used in fast paths such as for
828  * the irqsoff tracer. But it may be inaccurate due to races. If you
829  * need to know the accurate state, use tracing_is_on() which is a little
830  * slower, but accurate.
831  */
832 int tracing_is_enabled(void)
833 {
834         /*
835          * For quick access (irqsoff uses this in fast path), just
836          * return the mirror variable of the state of the ring buffer.
837          * It's a little racy, but we don't really care.
838          */
839         smp_rmb();
840         return !global_trace.buffer_disabled;
841 }
842
843 /*
844  * trace_buf_size is the size in bytes that is allocated
845  * for a buffer. Note, the number of bytes is always rounded
846  * to page size.
847  *
848  * This number is purposely set to a low number of 16384.
849  * If the dump on oops happens, it will be much appreciated
850  * to not have to wait for all that output. Anyway this can be
851  * boot time and run time configurable.
852  */
853 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
854
855 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
856
857 /* trace_types holds a link list of available tracers. */
858 static struct tracer            *trace_types __read_mostly;
859
860 /*
861  * trace_types_lock is used to protect the trace_types list.
862  */
863 DEFINE_MUTEX(trace_types_lock);
864
865 /*
866  * serialize the access of the ring buffer
867  *
868  * ring buffer serializes readers, but it is low level protection.
869  * The validity of the events (which returns by ring_buffer_peek() ..etc)
870  * are not protected by ring buffer.
871  *
872  * The content of events may become garbage if we allow other process consumes
873  * these events concurrently:
874  *   A) the page of the consumed events may become a normal page
875  *      (not reader page) in ring buffer, and this page will be rewritten
876  *      by events producer.
877  *   B) The page of the consumed events may become a page for splice_read,
878  *      and this page will be returned to system.
879  *
880  * These primitives allow multi process access to different cpu ring buffer
881  * concurrently.
882  *
883  * These primitives don't distinguish read-only and read-consume access.
884  * Multi read-only access are also serialized.
885  */
886
887 #ifdef CONFIG_SMP
888 static DECLARE_RWSEM(all_cpu_access_lock);
889 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         if (cpu == RING_BUFFER_ALL_CPUS) {
894                 /* gain it for accessing the whole ring buffer. */
895                 down_write(&all_cpu_access_lock);
896         } else {
897                 /* gain it for accessing a cpu ring buffer. */
898
899                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
900                 down_read(&all_cpu_access_lock);
901
902                 /* Secondly block other access to this @cpu ring buffer. */
903                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
904         }
905 }
906
907 static inline void trace_access_unlock(int cpu)
908 {
909         if (cpu == RING_BUFFER_ALL_CPUS) {
910                 up_write(&all_cpu_access_lock);
911         } else {
912                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
913                 up_read(&all_cpu_access_lock);
914         }
915 }
916
917 static inline void trace_access_lock_init(void)
918 {
919         int cpu;
920
921         for_each_possible_cpu(cpu)
922                 mutex_init(&per_cpu(cpu_access_lock, cpu));
923 }
924
925 #else
926
927 static DEFINE_MUTEX(access_lock);
928
929 static inline void trace_access_lock(int cpu)
930 {
931         (void)cpu;
932         mutex_lock(&access_lock);
933 }
934
935 static inline void trace_access_unlock(int cpu)
936 {
937         (void)cpu;
938         mutex_unlock(&access_lock);
939 }
940
941 static inline void trace_access_lock_init(void)
942 {
943 }
944
945 #endif
946
947 #ifdef CONFIG_STACKTRACE
948 static void __ftrace_trace_stack(struct trace_buffer *buffer,
949                                  unsigned int trace_ctx,
950                                  int skip, struct pt_regs *regs);
951 static inline void ftrace_trace_stack(struct trace_array *tr,
952                                       struct trace_buffer *buffer,
953                                       unsigned int trace_ctx,
954                                       int skip, struct pt_regs *regs);
955
956 #else
957 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
958                                         unsigned int trace_ctx,
959                                         int skip, struct pt_regs *regs)
960 {
961 }
962 static inline void ftrace_trace_stack(struct trace_array *tr,
963                                       struct trace_buffer *buffer,
964                                       unsigned long trace_ctx,
965                                       int skip, struct pt_regs *regs)
966 {
967 }
968
969 #endif
970
971 static __always_inline void
972 trace_event_setup(struct ring_buffer_event *event,
973                   int type, unsigned int trace_ctx)
974 {
975         struct trace_entry *ent = ring_buffer_event_data(event);
976
977         tracing_generic_entry_update(ent, type, trace_ctx);
978 }
979
980 static __always_inline struct ring_buffer_event *
981 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
982                           int type,
983                           unsigned long len,
984                           unsigned int trace_ctx)
985 {
986         struct ring_buffer_event *event;
987
988         event = ring_buffer_lock_reserve(buffer, len);
989         if (event != NULL)
990                 trace_event_setup(event, type, trace_ctx);
991
992         return event;
993 }
994
995 void tracer_tracing_on(struct trace_array *tr)
996 {
997         if (tr->array_buffer.buffer)
998                 ring_buffer_record_on(tr->array_buffer.buffer);
999         /*
1000          * This flag is looked at when buffers haven't been allocated
1001          * yet, or by some tracers (like irqsoff), that just want to
1002          * know if the ring buffer has been disabled, but it can handle
1003          * races of where it gets disabled but we still do a record.
1004          * As the check is in the fast path of the tracers, it is more
1005          * important to be fast than accurate.
1006          */
1007         tr->buffer_disabled = 0;
1008         /* Make the flag seen by readers */
1009         smp_wmb();
1010 }
1011
1012 /**
1013  * tracing_on - enable tracing buffers
1014  *
1015  * This function enables tracing buffers that may have been
1016  * disabled with tracing_off.
1017  */
1018 void tracing_on(void)
1019 {
1020         tracer_tracing_on(&global_trace);
1021 }
1022 EXPORT_SYMBOL_GPL(tracing_on);
1023
1024
1025 static __always_inline void
1026 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1027 {
1028         __this_cpu_write(trace_taskinfo_save, true);
1029
1030         /* If this is the temp buffer, we need to commit fully */
1031         if (this_cpu_read(trace_buffered_event) == event) {
1032                 /* Length is in event->array[0] */
1033                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1034                 /* Release the temp buffer */
1035                 this_cpu_dec(trace_buffered_event_cnt);
1036                 /* ring_buffer_unlock_commit() enables preemption */
1037                 preempt_enable_notrace();
1038         } else
1039                 ring_buffer_unlock_commit(buffer);
1040 }
1041
1042 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1043                        const char *str, int size)
1044 {
1045         struct ring_buffer_event *event;
1046         struct trace_buffer *buffer;
1047         struct print_entry *entry;
1048         unsigned int trace_ctx;
1049         int alloc;
1050
1051         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1052                 return 0;
1053
1054         if (unlikely(tracing_selftest_running || tracing_disabled))
1055                 return 0;
1056
1057         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1058
1059         trace_ctx = tracing_gen_ctx();
1060         buffer = tr->array_buffer.buffer;
1061         ring_buffer_nest_start(buffer);
1062         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1063                                             trace_ctx);
1064         if (!event) {
1065                 size = 0;
1066                 goto out;
1067         }
1068
1069         entry = ring_buffer_event_data(event);
1070         entry->ip = ip;
1071
1072         memcpy(&entry->buf, str, size);
1073
1074         /* Add a newline if necessary */
1075         if (entry->buf[size - 1] != '\n') {
1076                 entry->buf[size] = '\n';
1077                 entry->buf[size + 1] = '\0';
1078         } else
1079                 entry->buf[size] = '\0';
1080
1081         __buffer_unlock_commit(buffer, event);
1082         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1083  out:
1084         ring_buffer_nest_end(buffer);
1085         return size;
1086 }
1087 EXPORT_SYMBOL_GPL(__trace_array_puts);
1088
1089 /**
1090  * __trace_puts - write a constant string into the trace buffer.
1091  * @ip:    The address of the caller
1092  * @str:   The constant string to write
1093  * @size:  The size of the string.
1094  */
1095 int __trace_puts(unsigned long ip, const char *str, int size)
1096 {
1097         return __trace_array_puts(&global_trace, ip, str, size);
1098 }
1099 EXPORT_SYMBOL_GPL(__trace_puts);
1100
1101 /**
1102  * __trace_bputs - write the pointer to a constant string into trace buffer
1103  * @ip:    The address of the caller
1104  * @str:   The constant string to write to the buffer to
1105  */
1106 int __trace_bputs(unsigned long ip, const char *str)
1107 {
1108         struct ring_buffer_event *event;
1109         struct trace_buffer *buffer;
1110         struct bputs_entry *entry;
1111         unsigned int trace_ctx;
1112         int size = sizeof(struct bputs_entry);
1113         int ret = 0;
1114
1115         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1116                 return 0;
1117
1118         if (unlikely(tracing_selftest_running || tracing_disabled))
1119                 return 0;
1120
1121         trace_ctx = tracing_gen_ctx();
1122         buffer = global_trace.array_buffer.buffer;
1123
1124         ring_buffer_nest_start(buffer);
1125         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1126                                             trace_ctx);
1127         if (!event)
1128                 goto out;
1129
1130         entry = ring_buffer_event_data(event);
1131         entry->ip                       = ip;
1132         entry->str                      = str;
1133
1134         __buffer_unlock_commit(buffer, event);
1135         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1136
1137         ret = 1;
1138  out:
1139         ring_buffer_nest_end(buffer);
1140         return ret;
1141 }
1142 EXPORT_SYMBOL_GPL(__trace_bputs);
1143
1144 #ifdef CONFIG_TRACER_SNAPSHOT
1145 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1146                                            void *cond_data)
1147 {
1148         struct tracer *tracer = tr->current_trace;
1149         unsigned long flags;
1150
1151         if (in_nmi()) {
1152                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1153                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1154                 return;
1155         }
1156
1157         if (!tr->allocated_snapshot) {
1158                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1159                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1160                 tracer_tracing_off(tr);
1161                 return;
1162         }
1163
1164         /* Note, snapshot can not be used when the tracer uses it */
1165         if (tracer->use_max_tr) {
1166                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1167                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1168                 return;
1169         }
1170
1171         local_irq_save(flags);
1172         update_max_tr(tr, current, smp_processor_id(), cond_data);
1173         local_irq_restore(flags);
1174 }
1175
1176 void tracing_snapshot_instance(struct trace_array *tr)
1177 {
1178         tracing_snapshot_instance_cond(tr, NULL);
1179 }
1180
1181 /**
1182  * tracing_snapshot - take a snapshot of the current buffer.
1183  *
1184  * This causes a swap between the snapshot buffer and the current live
1185  * tracing buffer. You can use this to take snapshots of the live
1186  * trace when some condition is triggered, but continue to trace.
1187  *
1188  * Note, make sure to allocate the snapshot with either
1189  * a tracing_snapshot_alloc(), or by doing it manually
1190  * with: echo 1 > /sys/kernel/tracing/snapshot
1191  *
1192  * If the snapshot buffer is not allocated, it will stop tracing.
1193  * Basically making a permanent snapshot.
1194  */
1195 void tracing_snapshot(void)
1196 {
1197         struct trace_array *tr = &global_trace;
1198
1199         tracing_snapshot_instance(tr);
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot);
1202
1203 /**
1204  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1205  * @tr:         The tracing instance to snapshot
1206  * @cond_data:  The data to be tested conditionally, and possibly saved
1207  *
1208  * This is the same as tracing_snapshot() except that the snapshot is
1209  * conditional - the snapshot will only happen if the
1210  * cond_snapshot.update() implementation receiving the cond_data
1211  * returns true, which means that the trace array's cond_snapshot
1212  * update() operation used the cond_data to determine whether the
1213  * snapshot should be taken, and if it was, presumably saved it along
1214  * with the snapshot.
1215  */
1216 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1217 {
1218         tracing_snapshot_instance_cond(tr, cond_data);
1219 }
1220 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1221
1222 /**
1223  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1224  * @tr:         The tracing instance
1225  *
1226  * When the user enables a conditional snapshot using
1227  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1228  * with the snapshot.  This accessor is used to retrieve it.
1229  *
1230  * Should not be called from cond_snapshot.update(), since it takes
1231  * the tr->max_lock lock, which the code calling
1232  * cond_snapshot.update() has already done.
1233  *
1234  * Returns the cond_data associated with the trace array's snapshot.
1235  */
1236 void *tracing_cond_snapshot_data(struct trace_array *tr)
1237 {
1238         void *cond_data = NULL;
1239
1240         local_irq_disable();
1241         arch_spin_lock(&tr->max_lock);
1242
1243         if (tr->cond_snapshot)
1244                 cond_data = tr->cond_snapshot->cond_data;
1245
1246         arch_spin_unlock(&tr->max_lock);
1247         local_irq_enable();
1248
1249         return cond_data;
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1252
1253 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1254                                         struct array_buffer *size_buf, int cpu_id);
1255 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1256
1257 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1258 {
1259         int ret;
1260
1261         if (!tr->allocated_snapshot) {
1262
1263                 /* allocate spare buffer */
1264                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1265                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1266                 if (ret < 0)
1267                         return ret;
1268
1269                 tr->allocated_snapshot = true;
1270         }
1271
1272         return 0;
1273 }
1274
1275 static void free_snapshot(struct trace_array *tr)
1276 {
1277         /*
1278          * We don't free the ring buffer. instead, resize it because
1279          * The max_tr ring buffer has some state (e.g. ring->clock) and
1280          * we want preserve it.
1281          */
1282         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1283         set_buffer_entries(&tr->max_buffer, 1);
1284         tracing_reset_online_cpus(&tr->max_buffer);
1285         tr->allocated_snapshot = false;
1286 }
1287
1288 /**
1289  * tracing_alloc_snapshot - allocate snapshot buffer.
1290  *
1291  * This only allocates the snapshot buffer if it isn't already
1292  * allocated - it doesn't also take a snapshot.
1293  *
1294  * This is meant to be used in cases where the snapshot buffer needs
1295  * to be set up for events that can't sleep but need to be able to
1296  * trigger a snapshot.
1297  */
1298 int tracing_alloc_snapshot(void)
1299 {
1300         struct trace_array *tr = &global_trace;
1301         int ret;
1302
1303         ret = tracing_alloc_snapshot_instance(tr);
1304         WARN_ON(ret < 0);
1305
1306         return ret;
1307 }
1308 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1309
1310 /**
1311  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1312  *
1313  * This is similar to tracing_snapshot(), but it will allocate the
1314  * snapshot buffer if it isn't already allocated. Use this only
1315  * where it is safe to sleep, as the allocation may sleep.
1316  *
1317  * This causes a swap between the snapshot buffer and the current live
1318  * tracing buffer. You can use this to take snapshots of the live
1319  * trace when some condition is triggered, but continue to trace.
1320  */
1321 void tracing_snapshot_alloc(void)
1322 {
1323         int ret;
1324
1325         ret = tracing_alloc_snapshot();
1326         if (ret < 0)
1327                 return;
1328
1329         tracing_snapshot();
1330 }
1331 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1332
1333 /**
1334  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1335  * @tr:         The tracing instance
1336  * @cond_data:  User data to associate with the snapshot
1337  * @update:     Implementation of the cond_snapshot update function
1338  *
1339  * Check whether the conditional snapshot for the given instance has
1340  * already been enabled, or if the current tracer is already using a
1341  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1342  * save the cond_data and update function inside.
1343  *
1344  * Returns 0 if successful, error otherwise.
1345  */
1346 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1347                                  cond_update_fn_t update)
1348 {
1349         struct cond_snapshot *cond_snapshot;
1350         int ret = 0;
1351
1352         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1353         if (!cond_snapshot)
1354                 return -ENOMEM;
1355
1356         cond_snapshot->cond_data = cond_data;
1357         cond_snapshot->update = update;
1358
1359         mutex_lock(&trace_types_lock);
1360
1361         ret = tracing_alloc_snapshot_instance(tr);
1362         if (ret)
1363                 goto fail_unlock;
1364
1365         if (tr->current_trace->use_max_tr) {
1366                 ret = -EBUSY;
1367                 goto fail_unlock;
1368         }
1369
1370         /*
1371          * The cond_snapshot can only change to NULL without the
1372          * trace_types_lock. We don't care if we race with it going
1373          * to NULL, but we want to make sure that it's not set to
1374          * something other than NULL when we get here, which we can
1375          * do safely with only holding the trace_types_lock and not
1376          * having to take the max_lock.
1377          */
1378         if (tr->cond_snapshot) {
1379                 ret = -EBUSY;
1380                 goto fail_unlock;
1381         }
1382
1383         local_irq_disable();
1384         arch_spin_lock(&tr->max_lock);
1385         tr->cond_snapshot = cond_snapshot;
1386         arch_spin_unlock(&tr->max_lock);
1387         local_irq_enable();
1388
1389         mutex_unlock(&trace_types_lock);
1390
1391         return ret;
1392
1393  fail_unlock:
1394         mutex_unlock(&trace_types_lock);
1395         kfree(cond_snapshot);
1396         return ret;
1397 }
1398 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1399
1400 /**
1401  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1402  * @tr:         The tracing instance
1403  *
1404  * Check whether the conditional snapshot for the given instance is
1405  * enabled; if so, free the cond_snapshot associated with it,
1406  * otherwise return -EINVAL.
1407  *
1408  * Returns 0 if successful, error otherwise.
1409  */
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         int ret = 0;
1413
1414         local_irq_disable();
1415         arch_spin_lock(&tr->max_lock);
1416
1417         if (!tr->cond_snapshot)
1418                 ret = -EINVAL;
1419         else {
1420                 kfree(tr->cond_snapshot);
1421                 tr->cond_snapshot = NULL;
1422         }
1423
1424         arch_spin_unlock(&tr->max_lock);
1425         local_irq_enable();
1426
1427         return ret;
1428 }
1429 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1430 #else
1431 void tracing_snapshot(void)
1432 {
1433         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot);
1436 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1437 {
1438         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1439 }
1440 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1441 int tracing_alloc_snapshot(void)
1442 {
1443         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1444         return -ENODEV;
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1447 void tracing_snapshot_alloc(void)
1448 {
1449         /* Give warning */
1450         tracing_snapshot();
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1453 void *tracing_cond_snapshot_data(struct trace_array *tr)
1454 {
1455         return NULL;
1456 }
1457 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1458 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1459 {
1460         return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1463 int tracing_snapshot_cond_disable(struct trace_array *tr)
1464 {
1465         return false;
1466 }
1467 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1468 #define free_snapshot(tr)       do { } while (0)
1469 #endif /* CONFIG_TRACER_SNAPSHOT */
1470
1471 void tracer_tracing_off(struct trace_array *tr)
1472 {
1473         if (tr->array_buffer.buffer)
1474                 ring_buffer_record_off(tr->array_buffer.buffer);
1475         /*
1476          * This flag is looked at when buffers haven't been allocated
1477          * yet, or by some tracers (like irqsoff), that just want to
1478          * know if the ring buffer has been disabled, but it can handle
1479          * races of where it gets disabled but we still do a record.
1480          * As the check is in the fast path of the tracers, it is more
1481          * important to be fast than accurate.
1482          */
1483         tr->buffer_disabled = 1;
1484         /* Make the flag seen by readers */
1485         smp_wmb();
1486 }
1487
1488 /**
1489  * tracing_off - turn off tracing buffers
1490  *
1491  * This function stops the tracing buffers from recording data.
1492  * It does not disable any overhead the tracers themselves may
1493  * be causing. This function simply causes all recording to
1494  * the ring buffers to fail.
1495  */
1496 void tracing_off(void)
1497 {
1498         tracer_tracing_off(&global_trace);
1499 }
1500 EXPORT_SYMBOL_GPL(tracing_off);
1501
1502 void disable_trace_on_warning(void)
1503 {
1504         if (__disable_trace_on_warning) {
1505                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1506                         "Disabling tracing due to warning\n");
1507                 tracing_off();
1508         }
1509 }
1510
1511 /**
1512  * tracer_tracing_is_on - show real state of ring buffer enabled
1513  * @tr : the trace array to know if ring buffer is enabled
1514  *
1515  * Shows real state of the ring buffer if it is enabled or not.
1516  */
1517 bool tracer_tracing_is_on(struct trace_array *tr)
1518 {
1519         if (tr->array_buffer.buffer)
1520                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1521         return !tr->buffer_disabled;
1522 }
1523
1524 /**
1525  * tracing_is_on - show state of ring buffers enabled
1526  */
1527 int tracing_is_on(void)
1528 {
1529         return tracer_tracing_is_on(&global_trace);
1530 }
1531 EXPORT_SYMBOL_GPL(tracing_is_on);
1532
1533 static int __init set_buf_size(char *str)
1534 {
1535         unsigned long buf_size;
1536
1537         if (!str)
1538                 return 0;
1539         buf_size = memparse(str, &str);
1540         /*
1541          * nr_entries can not be zero and the startup
1542          * tests require some buffer space. Therefore
1543          * ensure we have at least 4096 bytes of buffer.
1544          */
1545         trace_buf_size = max(4096UL, buf_size);
1546         return 1;
1547 }
1548 __setup("trace_buf_size=", set_buf_size);
1549
1550 static int __init set_tracing_thresh(char *str)
1551 {
1552         unsigned long threshold;
1553         int ret;
1554
1555         if (!str)
1556                 return 0;
1557         ret = kstrtoul(str, 0, &threshold);
1558         if (ret < 0)
1559                 return 0;
1560         tracing_thresh = threshold * 1000;
1561         return 1;
1562 }
1563 __setup("tracing_thresh=", set_tracing_thresh);
1564
1565 unsigned long nsecs_to_usecs(unsigned long nsecs)
1566 {
1567         return nsecs / 1000;
1568 }
1569
1570 /*
1571  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1572  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1573  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1574  * of strings in the order that the evals (enum) were defined.
1575  */
1576 #undef C
1577 #define C(a, b) b
1578
1579 /* These must match the bit positions in trace_iterator_flags */
1580 static const char *trace_options[] = {
1581         TRACE_FLAGS
1582         NULL
1583 };
1584
1585 static struct {
1586         u64 (*func)(void);
1587         const char *name;
1588         int in_ns;              /* is this clock in nanoseconds? */
1589 } trace_clocks[] = {
1590         { trace_clock_local,            "local",        1 },
1591         { trace_clock_global,           "global",       1 },
1592         { trace_clock_counter,          "counter",      0 },
1593         { trace_clock_jiffies,          "uptime",       0 },
1594         { trace_clock,                  "perf",         1 },
1595         { ktime_get_mono_fast_ns,       "mono",         1 },
1596         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1597         { ktime_get_boot_fast_ns,       "boot",         1 },
1598         { ktime_get_tai_fast_ns,        "tai",          1 },
1599         ARCH_TRACE_CLOCKS
1600 };
1601
1602 bool trace_clock_in_ns(struct trace_array *tr)
1603 {
1604         if (trace_clocks[tr->clock_id].in_ns)
1605                 return true;
1606
1607         return false;
1608 }
1609
1610 /*
1611  * trace_parser_get_init - gets the buffer for trace parser
1612  */
1613 int trace_parser_get_init(struct trace_parser *parser, int size)
1614 {
1615         memset(parser, 0, sizeof(*parser));
1616
1617         parser->buffer = kmalloc(size, GFP_KERNEL);
1618         if (!parser->buffer)
1619                 return 1;
1620
1621         parser->size = size;
1622         return 0;
1623 }
1624
1625 /*
1626  * trace_parser_put - frees the buffer for trace parser
1627  */
1628 void trace_parser_put(struct trace_parser *parser)
1629 {
1630         kfree(parser->buffer);
1631         parser->buffer = NULL;
1632 }
1633
1634 /*
1635  * trace_get_user - reads the user input string separated by  space
1636  * (matched by isspace(ch))
1637  *
1638  * For each string found the 'struct trace_parser' is updated,
1639  * and the function returns.
1640  *
1641  * Returns number of bytes read.
1642  *
1643  * See kernel/trace/trace.h for 'struct trace_parser' details.
1644  */
1645 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1646         size_t cnt, loff_t *ppos)
1647 {
1648         char ch;
1649         size_t read = 0;
1650         ssize_t ret;
1651
1652         if (!*ppos)
1653                 trace_parser_clear(parser);
1654
1655         ret = get_user(ch, ubuf++);
1656         if (ret)
1657                 goto out;
1658
1659         read++;
1660         cnt--;
1661
1662         /*
1663          * The parser is not finished with the last write,
1664          * continue reading the user input without skipping spaces.
1665          */
1666         if (!parser->cont) {
1667                 /* skip white space */
1668                 while (cnt && isspace(ch)) {
1669                         ret = get_user(ch, ubuf++);
1670                         if (ret)
1671                                 goto out;
1672                         read++;
1673                         cnt--;
1674                 }
1675
1676                 parser->idx = 0;
1677
1678                 /* only spaces were written */
1679                 if (isspace(ch) || !ch) {
1680                         *ppos += read;
1681                         ret = read;
1682                         goto out;
1683                 }
1684         }
1685
1686         /* read the non-space input */
1687         while (cnt && !isspace(ch) && ch) {
1688                 if (parser->idx < parser->size - 1)
1689                         parser->buffer[parser->idx++] = ch;
1690                 else {
1691                         ret = -EINVAL;
1692                         goto out;
1693                 }
1694                 ret = get_user(ch, ubuf++);
1695                 if (ret)
1696                         goto out;
1697                 read++;
1698                 cnt--;
1699         }
1700
1701         /* We either got finished input or we have to wait for another call. */
1702         if (isspace(ch) || !ch) {
1703                 parser->buffer[parser->idx] = 0;
1704                 parser->cont = false;
1705         } else if (parser->idx < parser->size - 1) {
1706                 parser->cont = true;
1707                 parser->buffer[parser->idx++] = ch;
1708                 /* Make sure the parsed string always terminates with '\0'. */
1709                 parser->buffer[parser->idx] = 0;
1710         } else {
1711                 ret = -EINVAL;
1712                 goto out;
1713         }
1714
1715         *ppos += read;
1716         ret = read;
1717
1718 out:
1719         return ret;
1720 }
1721
1722 /* TODO add a seq_buf_to_buffer() */
1723 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1724 {
1725         int len;
1726
1727         if (trace_seq_used(s) <= s->seq.readpos)
1728                 return -EBUSY;
1729
1730         len = trace_seq_used(s) - s->seq.readpos;
1731         if (cnt > len)
1732                 cnt = len;
1733         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1734
1735         s->seq.readpos += cnt;
1736         return cnt;
1737 }
1738
1739 unsigned long __read_mostly     tracing_thresh;
1740
1741 #ifdef CONFIG_TRACER_MAX_TRACE
1742 static const struct file_operations tracing_max_lat_fops;
1743
1744 #ifdef LATENCY_FS_NOTIFY
1745
1746 static struct workqueue_struct *fsnotify_wq;
1747
1748 static void latency_fsnotify_workfn(struct work_struct *work)
1749 {
1750         struct trace_array *tr = container_of(work, struct trace_array,
1751                                               fsnotify_work);
1752         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1753 }
1754
1755 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1756 {
1757         struct trace_array *tr = container_of(iwork, struct trace_array,
1758                                               fsnotify_irqwork);
1759         queue_work(fsnotify_wq, &tr->fsnotify_work);
1760 }
1761
1762 static void trace_create_maxlat_file(struct trace_array *tr,
1763                                      struct dentry *d_tracer)
1764 {
1765         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1766         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1767         tr->d_max_latency = trace_create_file("tracing_max_latency",
1768                                               TRACE_MODE_WRITE,
1769                                               d_tracer, &tr->max_latency,
1770                                               &tracing_max_lat_fops);
1771 }
1772
1773 __init static int latency_fsnotify_init(void)
1774 {
1775         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1776                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1777         if (!fsnotify_wq) {
1778                 pr_err("Unable to allocate tr_max_lat_wq\n");
1779                 return -ENOMEM;
1780         }
1781         return 0;
1782 }
1783
1784 late_initcall_sync(latency_fsnotify_init);
1785
1786 void latency_fsnotify(struct trace_array *tr)
1787 {
1788         if (!fsnotify_wq)
1789                 return;
1790         /*
1791          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1792          * possible that we are called from __schedule() or do_idle(), which
1793          * could cause a deadlock.
1794          */
1795         irq_work_queue(&tr->fsnotify_irqwork);
1796 }
1797
1798 #else /* !LATENCY_FS_NOTIFY */
1799
1800 #define trace_create_maxlat_file(tr, d_tracer)                          \
1801         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1802                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1803
1804 #endif
1805
1806 /*
1807  * Copy the new maximum trace into the separate maximum-trace
1808  * structure. (this way the maximum trace is permanently saved,
1809  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1810  */
1811 static void
1812 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1813 {
1814         struct array_buffer *trace_buf = &tr->array_buffer;
1815         struct array_buffer *max_buf = &tr->max_buffer;
1816         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1817         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1818
1819         max_buf->cpu = cpu;
1820         max_buf->time_start = data->preempt_timestamp;
1821
1822         max_data->saved_latency = tr->max_latency;
1823         max_data->critical_start = data->critical_start;
1824         max_data->critical_end = data->critical_end;
1825
1826         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1827         max_data->pid = tsk->pid;
1828         /*
1829          * If tsk == current, then use current_uid(), as that does not use
1830          * RCU. The irq tracer can be called out of RCU scope.
1831          */
1832         if (tsk == current)
1833                 max_data->uid = current_uid();
1834         else
1835                 max_data->uid = task_uid(tsk);
1836
1837         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1838         max_data->policy = tsk->policy;
1839         max_data->rt_priority = tsk->rt_priority;
1840
1841         /* record this tasks comm */
1842         tracing_record_cmdline(tsk);
1843         latency_fsnotify(tr);
1844 }
1845
1846 /**
1847  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1848  * @tr: tracer
1849  * @tsk: the task with the latency
1850  * @cpu: The cpu that initiated the trace.
1851  * @cond_data: User data associated with a conditional snapshot
1852  *
1853  * Flip the buffers between the @tr and the max_tr and record information
1854  * about which task was the cause of this latency.
1855  */
1856 void
1857 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1858               void *cond_data)
1859 {
1860         if (tr->stop_count)
1861                 return;
1862
1863         WARN_ON_ONCE(!irqs_disabled());
1864
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         /* Inherit the recordable setting from array_buffer */
1874         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1875                 ring_buffer_record_on(tr->max_buffer.buffer);
1876         else
1877                 ring_buffer_record_off(tr->max_buffer.buffer);
1878
1879 #ifdef CONFIG_TRACER_SNAPSHOT
1880         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1881                 arch_spin_unlock(&tr->max_lock);
1882                 return;
1883         }
1884 #endif
1885         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1886
1887         __update_max_tr(tr, tsk, cpu);
1888
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891
1892 /**
1893  * update_max_tr_single - only copy one trace over, and reset the rest
1894  * @tr: tracer
1895  * @tsk: task with the latency
1896  * @cpu: the cpu of the buffer to copy.
1897  *
1898  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1899  */
1900 void
1901 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1902 {
1903         int ret;
1904
1905         if (tr->stop_count)
1906                 return;
1907
1908         WARN_ON_ONCE(!irqs_disabled());
1909         if (!tr->allocated_snapshot) {
1910                 /* Only the nop tracer should hit this when disabling */
1911                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1912                 return;
1913         }
1914
1915         arch_spin_lock(&tr->max_lock);
1916
1917         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1918
1919         if (ret == -EBUSY) {
1920                 /*
1921                  * We failed to swap the buffer due to a commit taking
1922                  * place on this CPU. We fail to record, but we reset
1923                  * the max trace buffer (no one writes directly to it)
1924                  * and flag that it failed.
1925                  */
1926                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1927                         "Failed to swap buffers due to commit in progress\n");
1928         }
1929
1930         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1931
1932         __update_max_tr(tr, tsk, cpu);
1933         arch_spin_unlock(&tr->max_lock);
1934 }
1935
1936 #endif /* CONFIG_TRACER_MAX_TRACE */
1937
1938 static int wait_on_pipe(struct trace_iterator *iter, int full)
1939 {
1940         /* Iterators are static, they should be filled or empty */
1941         if (trace_buffer_iter(iter, iter->cpu_file))
1942                 return 0;
1943
1944         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1945                                 full);
1946 }
1947
1948 #ifdef CONFIG_FTRACE_STARTUP_TEST
1949 static bool selftests_can_run;
1950
1951 struct trace_selftests {
1952         struct list_head                list;
1953         struct tracer                   *type;
1954 };
1955
1956 static LIST_HEAD(postponed_selftests);
1957
1958 static int save_selftest(struct tracer *type)
1959 {
1960         struct trace_selftests *selftest;
1961
1962         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1963         if (!selftest)
1964                 return -ENOMEM;
1965
1966         selftest->type = type;
1967         list_add(&selftest->list, &postponed_selftests);
1968         return 0;
1969 }
1970
1971 static int run_tracer_selftest(struct tracer *type)
1972 {
1973         struct trace_array *tr = &global_trace;
1974         struct tracer *saved_tracer = tr->current_trace;
1975         int ret;
1976
1977         if (!type->selftest || tracing_selftest_disabled)
1978                 return 0;
1979
1980         /*
1981          * If a tracer registers early in boot up (before scheduling is
1982          * initialized and such), then do not run its selftests yet.
1983          * Instead, run it a little later in the boot process.
1984          */
1985         if (!selftests_can_run)
1986                 return save_selftest(type);
1987
1988         if (!tracing_is_on()) {
1989                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1990                         type->name);
1991                 return 0;
1992         }
1993
1994         /*
1995          * Run a selftest on this tracer.
1996          * Here we reset the trace buffer, and set the current
1997          * tracer to be this tracer. The tracer can then run some
1998          * internal tracing to verify that everything is in order.
1999          * If we fail, we do not register this tracer.
2000          */
2001         tracing_reset_online_cpus(&tr->array_buffer);
2002
2003         tr->current_trace = type;
2004
2005 #ifdef CONFIG_TRACER_MAX_TRACE
2006         if (type->use_max_tr) {
2007                 /* If we expanded the buffers, make sure the max is expanded too */
2008                 if (ring_buffer_expanded)
2009                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2010                                            RING_BUFFER_ALL_CPUS);
2011                 tr->allocated_snapshot = true;
2012         }
2013 #endif
2014
2015         /* the test is responsible for initializing and enabling */
2016         pr_info("Testing tracer %s: ", type->name);
2017         ret = type->selftest(type, tr);
2018         /* the test is responsible for resetting too */
2019         tr->current_trace = saved_tracer;
2020         if (ret) {
2021                 printk(KERN_CONT "FAILED!\n");
2022                 /* Add the warning after printing 'FAILED' */
2023                 WARN_ON(1);
2024                 return -1;
2025         }
2026         /* Only reset on passing, to avoid touching corrupted buffers */
2027         tracing_reset_online_cpus(&tr->array_buffer);
2028
2029 #ifdef CONFIG_TRACER_MAX_TRACE
2030         if (type->use_max_tr) {
2031                 tr->allocated_snapshot = false;
2032
2033                 /* Shrink the max buffer again */
2034                 if (ring_buffer_expanded)
2035                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2036                                            RING_BUFFER_ALL_CPUS);
2037         }
2038 #endif
2039
2040         printk(KERN_CONT "PASSED\n");
2041         return 0;
2042 }
2043
2044 static __init int init_trace_selftests(void)
2045 {
2046         struct trace_selftests *p, *n;
2047         struct tracer *t, **last;
2048         int ret;
2049
2050         selftests_can_run = true;
2051
2052         mutex_lock(&trace_types_lock);
2053
2054         if (list_empty(&postponed_selftests))
2055                 goto out;
2056
2057         pr_info("Running postponed tracer tests:\n");
2058
2059         tracing_selftest_running = true;
2060         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2061                 /* This loop can take minutes when sanitizers are enabled, so
2062                  * lets make sure we allow RCU processing.
2063                  */
2064                 cond_resched();
2065                 ret = run_tracer_selftest(p->type);
2066                 /* If the test fails, then warn and remove from available_tracers */
2067                 if (ret < 0) {
2068                         WARN(1, "tracer: %s failed selftest, disabling\n",
2069                              p->type->name);
2070                         last = &trace_types;
2071                         for (t = trace_types; t; t = t->next) {
2072                                 if (t == p->type) {
2073                                         *last = t->next;
2074                                         break;
2075                                 }
2076                                 last = &t->next;
2077                         }
2078                 }
2079                 list_del(&p->list);
2080                 kfree(p);
2081         }
2082         tracing_selftest_running = false;
2083
2084  out:
2085         mutex_unlock(&trace_types_lock);
2086
2087         return 0;
2088 }
2089 core_initcall(init_trace_selftests);
2090 #else
2091 static inline int run_tracer_selftest(struct tracer *type)
2092 {
2093         return 0;
2094 }
2095 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2096
2097 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2098
2099 static void __init apply_trace_boot_options(void);
2100
2101 /**
2102  * register_tracer - register a tracer with the ftrace system.
2103  * @type: the plugin for the tracer
2104  *
2105  * Register a new plugin tracer.
2106  */
2107 int __init register_tracer(struct tracer *type)
2108 {
2109         struct tracer *t;
2110         int ret = 0;
2111
2112         if (!type->name) {
2113                 pr_info("Tracer must have a name\n");
2114                 return -1;
2115         }
2116
2117         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2118                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2119                 return -1;
2120         }
2121
2122         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2123                 pr_warn("Can not register tracer %s due to lockdown\n",
2124                            type->name);
2125                 return -EPERM;
2126         }
2127
2128         mutex_lock(&trace_types_lock);
2129
2130         tracing_selftest_running = true;
2131
2132         for (t = trace_types; t; t = t->next) {
2133                 if (strcmp(type->name, t->name) == 0) {
2134                         /* already found */
2135                         pr_info("Tracer %s already registered\n",
2136                                 type->name);
2137                         ret = -1;
2138                         goto out;
2139                 }
2140         }
2141
2142         if (!type->set_flag)
2143                 type->set_flag = &dummy_set_flag;
2144         if (!type->flags) {
2145                 /*allocate a dummy tracer_flags*/
2146                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2147                 if (!type->flags) {
2148                         ret = -ENOMEM;
2149                         goto out;
2150                 }
2151                 type->flags->val = 0;
2152                 type->flags->opts = dummy_tracer_opt;
2153         } else
2154                 if (!type->flags->opts)
2155                         type->flags->opts = dummy_tracer_opt;
2156
2157         /* store the tracer for __set_tracer_option */
2158         type->flags->trace = type;
2159
2160         ret = run_tracer_selftest(type);
2161         if (ret < 0)
2162                 goto out;
2163
2164         type->next = trace_types;
2165         trace_types = type;
2166         add_tracer_options(&global_trace, type);
2167
2168  out:
2169         tracing_selftest_running = false;
2170         mutex_unlock(&trace_types_lock);
2171
2172         if (ret || !default_bootup_tracer)
2173                 goto out_unlock;
2174
2175         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2176                 goto out_unlock;
2177
2178         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2179         /* Do we want this tracer to start on bootup? */
2180         tracing_set_tracer(&global_trace, type->name);
2181         default_bootup_tracer = NULL;
2182
2183         apply_trace_boot_options();
2184
2185         /* disable other selftests, since this will break it. */
2186         disable_tracing_selftest("running a tracer");
2187
2188  out_unlock:
2189         return ret;
2190 }
2191
2192 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2193 {
2194         struct trace_buffer *buffer = buf->buffer;
2195
2196         if (!buffer)
2197                 return;
2198
2199         ring_buffer_record_disable(buffer);
2200
2201         /* Make sure all commits have finished */
2202         synchronize_rcu();
2203         ring_buffer_reset_cpu(buffer, cpu);
2204
2205         ring_buffer_record_enable(buffer);
2206 }
2207
2208 void tracing_reset_online_cpus(struct array_buffer *buf)
2209 {
2210         struct trace_buffer *buffer = buf->buffer;
2211
2212         if (!buffer)
2213                 return;
2214
2215         ring_buffer_record_disable(buffer);
2216
2217         /* Make sure all commits have finished */
2218         synchronize_rcu();
2219
2220         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2221
2222         ring_buffer_reset_online_cpus(buffer);
2223
2224         ring_buffer_record_enable(buffer);
2225 }
2226
2227 /* Must have trace_types_lock held */
2228 void tracing_reset_all_online_cpus_unlocked(void)
2229 {
2230         struct trace_array *tr;
2231
2232         lockdep_assert_held(&trace_types_lock);
2233
2234         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2235                 if (!tr->clear_trace)
2236                         continue;
2237                 tr->clear_trace = false;
2238                 tracing_reset_online_cpus(&tr->array_buffer);
2239 #ifdef CONFIG_TRACER_MAX_TRACE
2240                 tracing_reset_online_cpus(&tr->max_buffer);
2241 #endif
2242         }
2243 }
2244
2245 void tracing_reset_all_online_cpus(void)
2246 {
2247         mutex_lock(&trace_types_lock);
2248         tracing_reset_all_online_cpus_unlocked();
2249         mutex_unlock(&trace_types_lock);
2250 }
2251
2252 /*
2253  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2254  * is the tgid last observed corresponding to pid=i.
2255  */
2256 static int *tgid_map;
2257
2258 /* The maximum valid index into tgid_map. */
2259 static size_t tgid_map_max;
2260
2261 #define SAVED_CMDLINES_DEFAULT 128
2262 #define NO_CMDLINE_MAP UINT_MAX
2263 /*
2264  * Preemption must be disabled before acquiring trace_cmdline_lock.
2265  * The various trace_arrays' max_lock must be acquired in a context
2266  * where interrupt is disabled.
2267  */
2268 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2269 struct saved_cmdlines_buffer {
2270         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2271         unsigned *map_cmdline_to_pid;
2272         unsigned cmdline_num;
2273         int cmdline_idx;
2274         char *saved_cmdlines;
2275 };
2276 static struct saved_cmdlines_buffer *savedcmd;
2277
2278 static inline char *get_saved_cmdlines(int idx)
2279 {
2280         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2281 }
2282
2283 static inline void set_cmdline(int idx, const char *cmdline)
2284 {
2285         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2286 }
2287
2288 static int allocate_cmdlines_buffer(unsigned int val,
2289                                     struct saved_cmdlines_buffer *s)
2290 {
2291         s->map_cmdline_to_pid = kmalloc_array(val,
2292                                               sizeof(*s->map_cmdline_to_pid),
2293                                               GFP_KERNEL);
2294         if (!s->map_cmdline_to_pid)
2295                 return -ENOMEM;
2296
2297         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2298         if (!s->saved_cmdlines) {
2299                 kfree(s->map_cmdline_to_pid);
2300                 return -ENOMEM;
2301         }
2302
2303         s->cmdline_idx = 0;
2304         s->cmdline_num = val;
2305         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2306                sizeof(s->map_pid_to_cmdline));
2307         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2308                val * sizeof(*s->map_cmdline_to_pid));
2309
2310         return 0;
2311 }
2312
2313 static int trace_create_savedcmd(void)
2314 {
2315         int ret;
2316
2317         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2318         if (!savedcmd)
2319                 return -ENOMEM;
2320
2321         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2322         if (ret < 0) {
2323                 kfree(savedcmd);
2324                 savedcmd = NULL;
2325                 return -ENOMEM;
2326         }
2327
2328         return 0;
2329 }
2330
2331 int is_tracing_stopped(void)
2332 {
2333         return global_trace.stop_count;
2334 }
2335
2336 /**
2337  * tracing_start - quick start of the tracer
2338  *
2339  * If tracing is enabled but was stopped by tracing_stop,
2340  * this will start the tracer back up.
2341  */
2342 void tracing_start(void)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         if (tracing_disabled)
2348                 return;
2349
2350         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2351         if (--global_trace.stop_count) {
2352                 if (global_trace.stop_count < 0) {
2353                         /* Someone screwed up their debugging */
2354                         WARN_ON_ONCE(1);
2355                         global_trace.stop_count = 0;
2356                 }
2357                 goto out;
2358         }
2359
2360         /* Prevent the buffers from switching */
2361         arch_spin_lock(&global_trace.max_lock);
2362
2363         buffer = global_trace.array_buffer.buffer;
2364         if (buffer)
2365                 ring_buffer_record_enable(buffer);
2366
2367 #ifdef CONFIG_TRACER_MAX_TRACE
2368         buffer = global_trace.max_buffer.buffer;
2369         if (buffer)
2370                 ring_buffer_record_enable(buffer);
2371 #endif
2372
2373         arch_spin_unlock(&global_trace.max_lock);
2374
2375  out:
2376         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2377 }
2378
2379 static void tracing_start_tr(struct trace_array *tr)
2380 {
2381         struct trace_buffer *buffer;
2382         unsigned long flags;
2383
2384         if (tracing_disabled)
2385                 return;
2386
2387         /* If global, we need to also start the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_start();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392
2393         if (--tr->stop_count) {
2394                 if (tr->stop_count < 0) {
2395                         /* Someone screwed up their debugging */
2396                         WARN_ON_ONCE(1);
2397                         tr->stop_count = 0;
2398                 }
2399                 goto out;
2400         }
2401
2402         buffer = tr->array_buffer.buffer;
2403         if (buffer)
2404                 ring_buffer_record_enable(buffer);
2405
2406  out:
2407         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2408 }
2409
2410 /**
2411  * tracing_stop - quick stop of the tracer
2412  *
2413  * Light weight way to stop tracing. Use in conjunction with
2414  * tracing_start.
2415  */
2416 void tracing_stop(void)
2417 {
2418         struct trace_buffer *buffer;
2419         unsigned long flags;
2420
2421         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2422         if (global_trace.stop_count++)
2423                 goto out;
2424
2425         /* Prevent the buffers from switching */
2426         arch_spin_lock(&global_trace.max_lock);
2427
2428         buffer = global_trace.array_buffer.buffer;
2429         if (buffer)
2430                 ring_buffer_record_disable(buffer);
2431
2432 #ifdef CONFIG_TRACER_MAX_TRACE
2433         buffer = global_trace.max_buffer.buffer;
2434         if (buffer)
2435                 ring_buffer_record_disable(buffer);
2436 #endif
2437
2438         arch_spin_unlock(&global_trace.max_lock);
2439
2440  out:
2441         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2442 }
2443
2444 static void tracing_stop_tr(struct trace_array *tr)
2445 {
2446         struct trace_buffer *buffer;
2447         unsigned long flags;
2448
2449         /* If global, we need to also stop the max tracer */
2450         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2451                 return tracing_stop();
2452
2453         raw_spin_lock_irqsave(&tr->start_lock, flags);
2454         if (tr->stop_count++)
2455                 goto out;
2456
2457         buffer = tr->array_buffer.buffer;
2458         if (buffer)
2459                 ring_buffer_record_disable(buffer);
2460
2461  out:
2462         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2463 }
2464
2465 static int trace_save_cmdline(struct task_struct *tsk)
2466 {
2467         unsigned tpid, idx;
2468
2469         /* treat recording of idle task as a success */
2470         if (!tsk->pid)
2471                 return 1;
2472
2473         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2474
2475         /*
2476          * It's not the end of the world if we don't get
2477          * the lock, but we also don't want to spin
2478          * nor do we want to disable interrupts,
2479          * so if we miss here, then better luck next time.
2480          *
2481          * This is called within the scheduler and wake up, so interrupts
2482          * had better been disabled and run queue lock been held.
2483          */
2484         lockdep_assert_preemption_disabled();
2485         if (!arch_spin_trylock(&trace_cmdline_lock))
2486                 return 0;
2487
2488         idx = savedcmd->map_pid_to_cmdline[tpid];
2489         if (idx == NO_CMDLINE_MAP) {
2490                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2491
2492                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2493                 savedcmd->cmdline_idx = idx;
2494         }
2495
2496         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2497         set_cmdline(idx, tsk->comm);
2498
2499         arch_spin_unlock(&trace_cmdline_lock);
2500
2501         return 1;
2502 }
2503
2504 static void __trace_find_cmdline(int pid, char comm[])
2505 {
2506         unsigned map;
2507         int tpid;
2508
2509         if (!pid) {
2510                 strcpy(comm, "<idle>");
2511                 return;
2512         }
2513
2514         if (WARN_ON_ONCE(pid < 0)) {
2515                 strcpy(comm, "<XXX>");
2516                 return;
2517         }
2518
2519         tpid = pid & (PID_MAX_DEFAULT - 1);
2520         map = savedcmd->map_pid_to_cmdline[tpid];
2521         if (map != NO_CMDLINE_MAP) {
2522                 tpid = savedcmd->map_cmdline_to_pid[map];
2523                 if (tpid == pid) {
2524                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2525                         return;
2526                 }
2527         }
2528         strcpy(comm, "<...>");
2529 }
2530
2531 void trace_find_cmdline(int pid, char comm[])
2532 {
2533         preempt_disable();
2534         arch_spin_lock(&trace_cmdline_lock);
2535
2536         __trace_find_cmdline(pid, comm);
2537
2538         arch_spin_unlock(&trace_cmdline_lock);
2539         preempt_enable();
2540 }
2541
2542 static int *trace_find_tgid_ptr(int pid)
2543 {
2544         /*
2545          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2546          * if we observe a non-NULL tgid_map then we also observe the correct
2547          * tgid_map_max.
2548          */
2549         int *map = smp_load_acquire(&tgid_map);
2550
2551         if (unlikely(!map || pid > tgid_map_max))
2552                 return NULL;
2553
2554         return &map[pid];
2555 }
2556
2557 int trace_find_tgid(int pid)
2558 {
2559         int *ptr = trace_find_tgid_ptr(pid);
2560
2561         return ptr ? *ptr : 0;
2562 }
2563
2564 static int trace_save_tgid(struct task_struct *tsk)
2565 {
2566         int *ptr;
2567
2568         /* treat recording of idle task as a success */
2569         if (!tsk->pid)
2570                 return 1;
2571
2572         ptr = trace_find_tgid_ptr(tsk->pid);
2573         if (!ptr)
2574                 return 0;
2575
2576         *ptr = tsk->tgid;
2577         return 1;
2578 }
2579
2580 static bool tracing_record_taskinfo_skip(int flags)
2581 {
2582         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2583                 return true;
2584         if (!__this_cpu_read(trace_taskinfo_save))
2585                 return true;
2586         return false;
2587 }
2588
2589 /**
2590  * tracing_record_taskinfo - record the task info of a task
2591  *
2592  * @task:  task to record
2593  * @flags: TRACE_RECORD_CMDLINE for recording comm
2594  *         TRACE_RECORD_TGID for recording tgid
2595  */
2596 void tracing_record_taskinfo(struct task_struct *task, int flags)
2597 {
2598         bool done;
2599
2600         if (tracing_record_taskinfo_skip(flags))
2601                 return;
2602
2603         /*
2604          * Record as much task information as possible. If some fail, continue
2605          * to try to record the others.
2606          */
2607         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2608         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2609
2610         /* If recording any information failed, retry again soon. */
2611         if (!done)
2612                 return;
2613
2614         __this_cpu_write(trace_taskinfo_save, false);
2615 }
2616
2617 /**
2618  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2619  *
2620  * @prev: previous task during sched_switch
2621  * @next: next task during sched_switch
2622  * @flags: TRACE_RECORD_CMDLINE for recording comm
2623  *         TRACE_RECORD_TGID for recording tgid
2624  */
2625 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2626                                           struct task_struct *next, int flags)
2627 {
2628         bool done;
2629
2630         if (tracing_record_taskinfo_skip(flags))
2631                 return;
2632
2633         /*
2634          * Record as much task information as possible. If some fail, continue
2635          * to try to record the others.
2636          */
2637         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2638         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2639         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2640         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2641
2642         /* If recording any information failed, retry again soon. */
2643         if (!done)
2644                 return;
2645
2646         __this_cpu_write(trace_taskinfo_save, false);
2647 }
2648
2649 /* Helpers to record a specific task information */
2650 void tracing_record_cmdline(struct task_struct *task)
2651 {
2652         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2653 }
2654
2655 void tracing_record_tgid(struct task_struct *task)
2656 {
2657         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2658 }
2659
2660 /*
2661  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2662  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2663  * simplifies those functions and keeps them in sync.
2664  */
2665 enum print_line_t trace_handle_return(struct trace_seq *s)
2666 {
2667         return trace_seq_has_overflowed(s) ?
2668                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2669 }
2670 EXPORT_SYMBOL_GPL(trace_handle_return);
2671
2672 static unsigned short migration_disable_value(void)
2673 {
2674 #if defined(CONFIG_SMP)
2675         return current->migration_disabled;
2676 #else
2677         return 0;
2678 #endif
2679 }
2680
2681 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2682 {
2683         unsigned int trace_flags = irqs_status;
2684         unsigned int pc;
2685
2686         pc = preempt_count();
2687
2688         if (pc & NMI_MASK)
2689                 trace_flags |= TRACE_FLAG_NMI;
2690         if (pc & HARDIRQ_MASK)
2691                 trace_flags |= TRACE_FLAG_HARDIRQ;
2692         if (in_serving_softirq())
2693                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2694         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2695                 trace_flags |= TRACE_FLAG_BH_OFF;
2696
2697         if (tif_need_resched())
2698                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2699         if (test_preempt_need_resched())
2700                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2701         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2702                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2703 }
2704
2705 struct ring_buffer_event *
2706 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2707                           int type,
2708                           unsigned long len,
2709                           unsigned int trace_ctx)
2710 {
2711         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2712 }
2713
2714 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2715 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2716 static int trace_buffered_event_ref;
2717
2718 /**
2719  * trace_buffered_event_enable - enable buffering events
2720  *
2721  * When events are being filtered, it is quicker to use a temporary
2722  * buffer to write the event data into if there's a likely chance
2723  * that it will not be committed. The discard of the ring buffer
2724  * is not as fast as committing, and is much slower than copying
2725  * a commit.
2726  *
2727  * When an event is to be filtered, allocate per cpu buffers to
2728  * write the event data into, and if the event is filtered and discarded
2729  * it is simply dropped, otherwise, the entire data is to be committed
2730  * in one shot.
2731  */
2732 void trace_buffered_event_enable(void)
2733 {
2734         struct ring_buffer_event *event;
2735         struct page *page;
2736         int cpu;
2737
2738         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2739
2740         if (trace_buffered_event_ref++)
2741                 return;
2742
2743         for_each_tracing_cpu(cpu) {
2744                 page = alloc_pages_node(cpu_to_node(cpu),
2745                                         GFP_KERNEL | __GFP_NORETRY, 0);
2746                 if (!page)
2747                         goto failed;
2748
2749                 event = page_address(page);
2750                 memset(event, 0, sizeof(*event));
2751
2752                 per_cpu(trace_buffered_event, cpu) = event;
2753
2754                 preempt_disable();
2755                 if (cpu == smp_processor_id() &&
2756                     __this_cpu_read(trace_buffered_event) !=
2757                     per_cpu(trace_buffered_event, cpu))
2758                         WARN_ON_ONCE(1);
2759                 preempt_enable();
2760         }
2761
2762         return;
2763  failed:
2764         trace_buffered_event_disable();
2765 }
2766
2767 static void enable_trace_buffered_event(void *data)
2768 {
2769         /* Probably not needed, but do it anyway */
2770         smp_rmb();
2771         this_cpu_dec(trace_buffered_event_cnt);
2772 }
2773
2774 static void disable_trace_buffered_event(void *data)
2775 {
2776         this_cpu_inc(trace_buffered_event_cnt);
2777 }
2778
2779 /**
2780  * trace_buffered_event_disable - disable buffering events
2781  *
2782  * When a filter is removed, it is faster to not use the buffered
2783  * events, and to commit directly into the ring buffer. Free up
2784  * the temp buffers when there are no more users. This requires
2785  * special synchronization with current events.
2786  */
2787 void trace_buffered_event_disable(void)
2788 {
2789         int cpu;
2790
2791         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2792
2793         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2794                 return;
2795
2796         if (--trace_buffered_event_ref)
2797                 return;
2798
2799         preempt_disable();
2800         /* For each CPU, set the buffer as used. */
2801         smp_call_function_many(tracing_buffer_mask,
2802                                disable_trace_buffered_event, NULL, 1);
2803         preempt_enable();
2804
2805         /* Wait for all current users to finish */
2806         synchronize_rcu();
2807
2808         for_each_tracing_cpu(cpu) {
2809                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2810                 per_cpu(trace_buffered_event, cpu) = NULL;
2811         }
2812         /*
2813          * Make sure trace_buffered_event is NULL before clearing
2814          * trace_buffered_event_cnt.
2815          */
2816         smp_wmb();
2817
2818         preempt_disable();
2819         /* Do the work on each cpu */
2820         smp_call_function_many(tracing_buffer_mask,
2821                                enable_trace_buffered_event, NULL, 1);
2822         preempt_enable();
2823 }
2824
2825 static struct trace_buffer *temp_buffer;
2826
2827 struct ring_buffer_event *
2828 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2829                           struct trace_event_file *trace_file,
2830                           int type, unsigned long len,
2831                           unsigned int trace_ctx)
2832 {
2833         struct ring_buffer_event *entry;
2834         struct trace_array *tr = trace_file->tr;
2835         int val;
2836
2837         *current_rb = tr->array_buffer.buffer;
2838
2839         if (!tr->no_filter_buffering_ref &&
2840             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2841                 preempt_disable_notrace();
2842                 /*
2843                  * Filtering is on, so try to use the per cpu buffer first.
2844                  * This buffer will simulate a ring_buffer_event,
2845                  * where the type_len is zero and the array[0] will
2846                  * hold the full length.
2847                  * (see include/linux/ring-buffer.h for details on
2848                  *  how the ring_buffer_event is structured).
2849                  *
2850                  * Using a temp buffer during filtering and copying it
2851                  * on a matched filter is quicker than writing directly
2852                  * into the ring buffer and then discarding it when
2853                  * it doesn't match. That is because the discard
2854                  * requires several atomic operations to get right.
2855                  * Copying on match and doing nothing on a failed match
2856                  * is still quicker than no copy on match, but having
2857                  * to discard out of the ring buffer on a failed match.
2858                  */
2859                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2860                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2861
2862                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2863
2864                         /*
2865                          * Preemption is disabled, but interrupts and NMIs
2866                          * can still come in now. If that happens after
2867                          * the above increment, then it will have to go
2868                          * back to the old method of allocating the event
2869                          * on the ring buffer, and if the filter fails, it
2870                          * will have to call ring_buffer_discard_commit()
2871                          * to remove it.
2872                          *
2873                          * Need to also check the unlikely case that the
2874                          * length is bigger than the temp buffer size.
2875                          * If that happens, then the reserve is pretty much
2876                          * guaranteed to fail, as the ring buffer currently
2877                          * only allows events less than a page. But that may
2878                          * change in the future, so let the ring buffer reserve
2879                          * handle the failure in that case.
2880                          */
2881                         if (val == 1 && likely(len <= max_len)) {
2882                                 trace_event_setup(entry, type, trace_ctx);
2883                                 entry->array[0] = len;
2884                                 /* Return with preemption disabled */
2885                                 return entry;
2886                         }
2887                         this_cpu_dec(trace_buffered_event_cnt);
2888                 }
2889                 /* __trace_buffer_lock_reserve() disables preemption */
2890                 preempt_enable_notrace();
2891         }
2892
2893         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2894                                             trace_ctx);
2895         /*
2896          * If tracing is off, but we have triggers enabled
2897          * we still need to look at the event data. Use the temp_buffer
2898          * to store the trace event for the trigger to use. It's recursive
2899          * safe and will not be recorded anywhere.
2900          */
2901         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2902                 *current_rb = temp_buffer;
2903                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2904                                                     trace_ctx);
2905         }
2906         return entry;
2907 }
2908 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2909
2910 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2911 static DEFINE_MUTEX(tracepoint_printk_mutex);
2912
2913 static void output_printk(struct trace_event_buffer *fbuffer)
2914 {
2915         struct trace_event_call *event_call;
2916         struct trace_event_file *file;
2917         struct trace_event *event;
2918         unsigned long flags;
2919         struct trace_iterator *iter = tracepoint_print_iter;
2920
2921         /* We should never get here if iter is NULL */
2922         if (WARN_ON_ONCE(!iter))
2923                 return;
2924
2925         event_call = fbuffer->trace_file->event_call;
2926         if (!event_call || !event_call->event.funcs ||
2927             !event_call->event.funcs->trace)
2928                 return;
2929
2930         file = fbuffer->trace_file;
2931         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2932             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2933              !filter_match_preds(file->filter, fbuffer->entry)))
2934                 return;
2935
2936         event = &fbuffer->trace_file->event_call->event;
2937
2938         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2939         trace_seq_init(&iter->seq);
2940         iter->ent = fbuffer->entry;
2941         event_call->event.funcs->trace(iter, 0, event);
2942         trace_seq_putc(&iter->seq, 0);
2943         printk("%s", iter->seq.buffer);
2944
2945         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2946 }
2947
2948 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2949                              void *buffer, size_t *lenp,
2950                              loff_t *ppos)
2951 {
2952         int save_tracepoint_printk;
2953         int ret;
2954
2955         mutex_lock(&tracepoint_printk_mutex);
2956         save_tracepoint_printk = tracepoint_printk;
2957
2958         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2959
2960         /*
2961          * This will force exiting early, as tracepoint_printk
2962          * is always zero when tracepoint_printk_iter is not allocated
2963          */
2964         if (!tracepoint_print_iter)
2965                 tracepoint_printk = 0;
2966
2967         if (save_tracepoint_printk == tracepoint_printk)
2968                 goto out;
2969
2970         if (tracepoint_printk)
2971                 static_key_enable(&tracepoint_printk_key.key);
2972         else
2973                 static_key_disable(&tracepoint_printk_key.key);
2974
2975  out:
2976         mutex_unlock(&tracepoint_printk_mutex);
2977
2978         return ret;
2979 }
2980
2981 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2982 {
2983         enum event_trigger_type tt = ETT_NONE;
2984         struct trace_event_file *file = fbuffer->trace_file;
2985
2986         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2987                         fbuffer->entry, &tt))
2988                 goto discard;
2989
2990         if (static_key_false(&tracepoint_printk_key.key))
2991                 output_printk(fbuffer);
2992
2993         if (static_branch_unlikely(&trace_event_exports_enabled))
2994                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2995
2996         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2997                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2998
2999 discard:
3000         if (tt)
3001                 event_triggers_post_call(file, tt);
3002
3003 }
3004 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3005
3006 /*
3007  * Skip 3:
3008  *
3009  *   trace_buffer_unlock_commit_regs()
3010  *   trace_event_buffer_commit()
3011  *   trace_event_raw_event_xxx()
3012  */
3013 # define STACK_SKIP 3
3014
3015 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3016                                      struct trace_buffer *buffer,
3017                                      struct ring_buffer_event *event,
3018                                      unsigned int trace_ctx,
3019                                      struct pt_regs *regs)
3020 {
3021         __buffer_unlock_commit(buffer, event);
3022
3023         /*
3024          * If regs is not set, then skip the necessary functions.
3025          * Note, we can still get here via blktrace, wakeup tracer
3026          * and mmiotrace, but that's ok if they lose a function or
3027          * two. They are not that meaningful.
3028          */
3029         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3030         ftrace_trace_userstack(tr, buffer, trace_ctx);
3031 }
3032
3033 /*
3034  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3035  */
3036 void
3037 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3038                                    struct ring_buffer_event *event)
3039 {
3040         __buffer_unlock_commit(buffer, event);
3041 }
3042
3043 void
3044 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3045                parent_ip, unsigned int trace_ctx)
3046 {
3047         struct trace_event_call *call = &event_function;
3048         struct trace_buffer *buffer = tr->array_buffer.buffer;
3049         struct ring_buffer_event *event;
3050         struct ftrace_entry *entry;
3051
3052         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3053                                             trace_ctx);
3054         if (!event)
3055                 return;
3056         entry   = ring_buffer_event_data(event);
3057         entry->ip                       = ip;
3058         entry->parent_ip                = parent_ip;
3059
3060         if (!call_filter_check_discard(call, entry, buffer, event)) {
3061                 if (static_branch_unlikely(&trace_function_exports_enabled))
3062                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3063                 __buffer_unlock_commit(buffer, event);
3064         }
3065 }
3066
3067 #ifdef CONFIG_STACKTRACE
3068
3069 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3070 #define FTRACE_KSTACK_NESTING   4
3071
3072 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3073
3074 struct ftrace_stack {
3075         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3076 };
3077
3078
3079 struct ftrace_stacks {
3080         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3081 };
3082
3083 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3084 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3085
3086 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3087                                  unsigned int trace_ctx,
3088                                  int skip, struct pt_regs *regs)
3089 {
3090         struct trace_event_call *call = &event_kernel_stack;
3091         struct ring_buffer_event *event;
3092         unsigned int size, nr_entries;
3093         struct ftrace_stack *fstack;
3094         struct stack_entry *entry;
3095         int stackidx;
3096
3097         /*
3098          * Add one, for this function and the call to save_stack_trace()
3099          * If regs is set, then these functions will not be in the way.
3100          */
3101 #ifndef CONFIG_UNWINDER_ORC
3102         if (!regs)
3103                 skip++;
3104 #endif
3105
3106         preempt_disable_notrace();
3107
3108         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3109
3110         /* This should never happen. If it does, yell once and skip */
3111         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3112                 goto out;
3113
3114         /*
3115          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3116          * interrupt will either see the value pre increment or post
3117          * increment. If the interrupt happens pre increment it will have
3118          * restored the counter when it returns.  We just need a barrier to
3119          * keep gcc from moving things around.
3120          */
3121         barrier();
3122
3123         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3124         size = ARRAY_SIZE(fstack->calls);
3125
3126         if (regs) {
3127                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3128                                                    size, skip);
3129         } else {
3130                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3131         }
3132
3133         size = nr_entries * sizeof(unsigned long);
3134         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3135                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3136                                     trace_ctx);
3137         if (!event)
3138                 goto out;
3139         entry = ring_buffer_event_data(event);
3140
3141         memcpy(&entry->caller, fstack->calls, size);
3142         entry->size = nr_entries;
3143
3144         if (!call_filter_check_discard(call, entry, buffer, event))
3145                 __buffer_unlock_commit(buffer, event);
3146
3147  out:
3148         /* Again, don't let gcc optimize things here */
3149         barrier();
3150         __this_cpu_dec(ftrace_stack_reserve);
3151         preempt_enable_notrace();
3152
3153 }
3154
3155 static inline void ftrace_trace_stack(struct trace_array *tr,
3156                                       struct trace_buffer *buffer,
3157                                       unsigned int trace_ctx,
3158                                       int skip, struct pt_regs *regs)
3159 {
3160         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3161                 return;
3162
3163         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3164 }
3165
3166 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3167                    int skip)
3168 {
3169         struct trace_buffer *buffer = tr->array_buffer.buffer;
3170
3171         if (rcu_is_watching()) {
3172                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3173                 return;
3174         }
3175
3176         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3177                 return;
3178
3179         /*
3180          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3181          * but if the above rcu_is_watching() failed, then the NMI
3182          * triggered someplace critical, and ct_irq_enter() should
3183          * not be called from NMI.
3184          */
3185         if (unlikely(in_nmi()))
3186                 return;
3187
3188         ct_irq_enter_irqson();
3189         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3190         ct_irq_exit_irqson();
3191 }
3192
3193 /**
3194  * trace_dump_stack - record a stack back trace in the trace buffer
3195  * @skip: Number of functions to skip (helper handlers)
3196  */
3197 void trace_dump_stack(int skip)
3198 {
3199         if (tracing_disabled || tracing_selftest_running)
3200                 return;
3201
3202 #ifndef CONFIG_UNWINDER_ORC
3203         /* Skip 1 to skip this function. */
3204         skip++;
3205 #endif
3206         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3207                              tracing_gen_ctx(), skip, NULL);
3208 }
3209 EXPORT_SYMBOL_GPL(trace_dump_stack);
3210
3211 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3212 static DEFINE_PER_CPU(int, user_stack_count);
3213
3214 static void
3215 ftrace_trace_userstack(struct trace_array *tr,
3216                        struct trace_buffer *buffer, unsigned int trace_ctx)
3217 {
3218         struct trace_event_call *call = &event_user_stack;
3219         struct ring_buffer_event *event;
3220         struct userstack_entry *entry;
3221
3222         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3223                 return;
3224
3225         /*
3226          * NMIs can not handle page faults, even with fix ups.
3227          * The save user stack can (and often does) fault.
3228          */
3229         if (unlikely(in_nmi()))
3230                 return;
3231
3232         /*
3233          * prevent recursion, since the user stack tracing may
3234          * trigger other kernel events.
3235          */
3236         preempt_disable();
3237         if (__this_cpu_read(user_stack_count))
3238                 goto out;
3239
3240         __this_cpu_inc(user_stack_count);
3241
3242         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3243                                             sizeof(*entry), trace_ctx);
3244         if (!event)
3245                 goto out_drop_count;
3246         entry   = ring_buffer_event_data(event);
3247
3248         entry->tgid             = current->tgid;
3249         memset(&entry->caller, 0, sizeof(entry->caller));
3250
3251         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3252         if (!call_filter_check_discard(call, entry, buffer, event))
3253                 __buffer_unlock_commit(buffer, event);
3254
3255  out_drop_count:
3256         __this_cpu_dec(user_stack_count);
3257  out:
3258         preempt_enable();
3259 }
3260 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3261 static void ftrace_trace_userstack(struct trace_array *tr,
3262                                    struct trace_buffer *buffer,
3263                                    unsigned int trace_ctx)
3264 {
3265 }
3266 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3267
3268 #endif /* CONFIG_STACKTRACE */
3269
3270 static inline void
3271 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3272                           unsigned long long delta)
3273 {
3274         entry->bottom_delta_ts = delta & U32_MAX;
3275         entry->top_delta_ts = (delta >> 32);
3276 }
3277
3278 void trace_last_func_repeats(struct trace_array *tr,
3279                              struct trace_func_repeats *last_info,
3280                              unsigned int trace_ctx)
3281 {
3282         struct trace_buffer *buffer = tr->array_buffer.buffer;
3283         struct func_repeats_entry *entry;
3284         struct ring_buffer_event *event;
3285         u64 delta;
3286
3287         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3288                                             sizeof(*entry), trace_ctx);
3289         if (!event)
3290                 return;
3291
3292         delta = ring_buffer_event_time_stamp(buffer, event) -
3293                 last_info->ts_last_call;
3294
3295         entry = ring_buffer_event_data(event);
3296         entry->ip = last_info->ip;
3297         entry->parent_ip = last_info->parent_ip;
3298         entry->count = last_info->count;
3299         func_repeats_set_delta_ts(entry, delta);
3300
3301         __buffer_unlock_commit(buffer, event);
3302 }
3303
3304 /* created for use with alloc_percpu */
3305 struct trace_buffer_struct {
3306         int nesting;
3307         char buffer[4][TRACE_BUF_SIZE];
3308 };
3309
3310 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3311
3312 /*
3313  * This allows for lockless recording.  If we're nested too deeply, then
3314  * this returns NULL.
3315  */
3316 static char *get_trace_buf(void)
3317 {
3318         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3319
3320         if (!trace_percpu_buffer || buffer->nesting >= 4)
3321                 return NULL;
3322
3323         buffer->nesting++;
3324
3325         /* Interrupts must see nesting incremented before we use the buffer */
3326         barrier();
3327         return &buffer->buffer[buffer->nesting - 1][0];
3328 }
3329
3330 static void put_trace_buf(void)
3331 {
3332         /* Don't let the decrement of nesting leak before this */
3333         barrier();
3334         this_cpu_dec(trace_percpu_buffer->nesting);
3335 }
3336
3337 static int alloc_percpu_trace_buffer(void)
3338 {
3339         struct trace_buffer_struct __percpu *buffers;
3340
3341         if (trace_percpu_buffer)
3342                 return 0;
3343
3344         buffers = alloc_percpu(struct trace_buffer_struct);
3345         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3346                 return -ENOMEM;
3347
3348         trace_percpu_buffer = buffers;
3349         return 0;
3350 }
3351
3352 static int buffers_allocated;
3353
3354 void trace_printk_init_buffers(void)
3355 {
3356         if (buffers_allocated)
3357                 return;
3358
3359         if (alloc_percpu_trace_buffer())
3360                 return;
3361
3362         /* trace_printk() is for debug use only. Don't use it in production. */
3363
3364         pr_warn("\n");
3365         pr_warn("**********************************************************\n");
3366         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3367         pr_warn("**                                                      **\n");
3368         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3369         pr_warn("**                                                      **\n");
3370         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3371         pr_warn("** unsafe for production use.                           **\n");
3372         pr_warn("**                                                      **\n");
3373         pr_warn("** If you see this message and you are not debugging    **\n");
3374         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3375         pr_warn("**                                                      **\n");
3376         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3377         pr_warn("**********************************************************\n");
3378
3379         /* Expand the buffers to set size */
3380         tracing_update_buffers();
3381
3382         buffers_allocated = 1;
3383
3384         /*
3385          * trace_printk_init_buffers() can be called by modules.
3386          * If that happens, then we need to start cmdline recording
3387          * directly here. If the global_trace.buffer is already
3388          * allocated here, then this was called by module code.
3389          */
3390         if (global_trace.array_buffer.buffer)
3391                 tracing_start_cmdline_record();
3392 }
3393 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3394
3395 void trace_printk_start_comm(void)
3396 {
3397         /* Start tracing comms if trace printk is set */
3398         if (!buffers_allocated)
3399                 return;
3400         tracing_start_cmdline_record();
3401 }
3402
3403 static void trace_printk_start_stop_comm(int enabled)
3404 {
3405         if (!buffers_allocated)
3406                 return;
3407
3408         if (enabled)
3409                 tracing_start_cmdline_record();
3410         else
3411                 tracing_stop_cmdline_record();
3412 }
3413
3414 /**
3415  * trace_vbprintk - write binary msg to tracing buffer
3416  * @ip:    The address of the caller
3417  * @fmt:   The string format to write to the buffer
3418  * @args:  Arguments for @fmt
3419  */
3420 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3421 {
3422         struct trace_event_call *call = &event_bprint;
3423         struct ring_buffer_event *event;
3424         struct trace_buffer *buffer;
3425         struct trace_array *tr = &global_trace;
3426         struct bprint_entry *entry;
3427         unsigned int trace_ctx;
3428         char *tbuffer;
3429         int len = 0, size;
3430
3431         if (unlikely(tracing_selftest_running || tracing_disabled))
3432                 return 0;
3433
3434         /* Don't pollute graph traces with trace_vprintk internals */
3435         pause_graph_tracing();
3436
3437         trace_ctx = tracing_gen_ctx();
3438         preempt_disable_notrace();
3439
3440         tbuffer = get_trace_buf();
3441         if (!tbuffer) {
3442                 len = 0;
3443                 goto out_nobuffer;
3444         }
3445
3446         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3447
3448         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3449                 goto out_put;
3450
3451         size = sizeof(*entry) + sizeof(u32) * len;
3452         buffer = tr->array_buffer.buffer;
3453         ring_buffer_nest_start(buffer);
3454         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3455                                             trace_ctx);
3456         if (!event)
3457                 goto out;
3458         entry = ring_buffer_event_data(event);
3459         entry->ip                       = ip;
3460         entry->fmt                      = fmt;
3461
3462         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3463         if (!call_filter_check_discard(call, entry, buffer, event)) {
3464                 __buffer_unlock_commit(buffer, event);
3465                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3466         }
3467
3468 out:
3469         ring_buffer_nest_end(buffer);
3470 out_put:
3471         put_trace_buf();
3472
3473 out_nobuffer:
3474         preempt_enable_notrace();
3475         unpause_graph_tracing();
3476
3477         return len;
3478 }
3479 EXPORT_SYMBOL_GPL(trace_vbprintk);
3480
3481 __printf(3, 0)
3482 static int
3483 __trace_array_vprintk(struct trace_buffer *buffer,
3484                       unsigned long ip, const char *fmt, va_list args)
3485 {
3486         struct trace_event_call *call = &event_print;
3487         struct ring_buffer_event *event;
3488         int len = 0, size;
3489         struct print_entry *entry;
3490         unsigned int trace_ctx;
3491         char *tbuffer;
3492
3493         if (tracing_disabled || tracing_selftest_running)
3494                 return 0;
3495
3496         /* Don't pollute graph traces with trace_vprintk internals */
3497         pause_graph_tracing();
3498
3499         trace_ctx = tracing_gen_ctx();
3500         preempt_disable_notrace();
3501
3502
3503         tbuffer = get_trace_buf();
3504         if (!tbuffer) {
3505                 len = 0;
3506                 goto out_nobuffer;
3507         }
3508
3509         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3510
3511         size = sizeof(*entry) + len + 1;
3512         ring_buffer_nest_start(buffer);
3513         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3514                                             trace_ctx);
3515         if (!event)
3516                 goto out;
3517         entry = ring_buffer_event_data(event);
3518         entry->ip = ip;
3519
3520         memcpy(&entry->buf, tbuffer, len + 1);
3521         if (!call_filter_check_discard(call, entry, buffer, event)) {
3522                 __buffer_unlock_commit(buffer, event);
3523                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3524         }
3525
3526 out:
3527         ring_buffer_nest_end(buffer);
3528         put_trace_buf();
3529
3530 out_nobuffer:
3531         preempt_enable_notrace();
3532         unpause_graph_tracing();
3533
3534         return len;
3535 }
3536
3537 __printf(3, 0)
3538 int trace_array_vprintk(struct trace_array *tr,
3539                         unsigned long ip, const char *fmt, va_list args)
3540 {
3541         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3542 }
3543
3544 /**
3545  * trace_array_printk - Print a message to a specific instance
3546  * @tr: The instance trace_array descriptor
3547  * @ip: The instruction pointer that this is called from.
3548  * @fmt: The format to print (printf format)
3549  *
3550  * If a subsystem sets up its own instance, they have the right to
3551  * printk strings into their tracing instance buffer using this
3552  * function. Note, this function will not write into the top level
3553  * buffer (use trace_printk() for that), as writing into the top level
3554  * buffer should only have events that can be individually disabled.
3555  * trace_printk() is only used for debugging a kernel, and should not
3556  * be ever incorporated in normal use.
3557  *
3558  * trace_array_printk() can be used, as it will not add noise to the
3559  * top level tracing buffer.
3560  *
3561  * Note, trace_array_init_printk() must be called on @tr before this
3562  * can be used.
3563  */
3564 __printf(3, 0)
3565 int trace_array_printk(struct trace_array *tr,
3566                        unsigned long ip, const char *fmt, ...)
3567 {
3568         int ret;
3569         va_list ap;
3570
3571         if (!tr)
3572                 return -ENOENT;
3573
3574         /* This is only allowed for created instances */
3575         if (tr == &global_trace)
3576                 return 0;
3577
3578         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3579                 return 0;
3580
3581         va_start(ap, fmt);
3582         ret = trace_array_vprintk(tr, ip, fmt, ap);
3583         va_end(ap);
3584         return ret;
3585 }
3586 EXPORT_SYMBOL_GPL(trace_array_printk);
3587
3588 /**
3589  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3590  * @tr: The trace array to initialize the buffers for
3591  *
3592  * As trace_array_printk() only writes into instances, they are OK to
3593  * have in the kernel (unlike trace_printk()). This needs to be called
3594  * before trace_array_printk() can be used on a trace_array.
3595  */
3596 int trace_array_init_printk(struct trace_array *tr)
3597 {
3598         if (!tr)
3599                 return -ENOENT;
3600
3601         /* This is only allowed for created instances */
3602         if (tr == &global_trace)
3603                 return -EINVAL;
3604
3605         return alloc_percpu_trace_buffer();
3606 }
3607 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3608
3609 __printf(3, 4)
3610 int trace_array_printk_buf(struct trace_buffer *buffer,
3611                            unsigned long ip, const char *fmt, ...)
3612 {
3613         int ret;
3614         va_list ap;
3615
3616         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3617                 return 0;
3618
3619         va_start(ap, fmt);
3620         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3621         va_end(ap);
3622         return ret;
3623 }
3624
3625 __printf(2, 0)
3626 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3627 {
3628         return trace_array_vprintk(&global_trace, ip, fmt, args);
3629 }
3630 EXPORT_SYMBOL_GPL(trace_vprintk);
3631
3632 static void trace_iterator_increment(struct trace_iterator *iter)
3633 {
3634         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3635
3636         iter->idx++;
3637         if (buf_iter)
3638                 ring_buffer_iter_advance(buf_iter);
3639 }
3640
3641 static struct trace_entry *
3642 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3643                 unsigned long *lost_events)
3644 {
3645         struct ring_buffer_event *event;
3646         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3647
3648         if (buf_iter) {
3649                 event = ring_buffer_iter_peek(buf_iter, ts);
3650                 if (lost_events)
3651                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3652                                 (unsigned long)-1 : 0;
3653         } else {
3654                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3655                                          lost_events);
3656         }
3657
3658         if (event) {
3659                 iter->ent_size = ring_buffer_event_length(event);
3660                 return ring_buffer_event_data(event);
3661         }
3662         iter->ent_size = 0;
3663         return NULL;
3664 }
3665
3666 static struct trace_entry *
3667 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3668                   unsigned long *missing_events, u64 *ent_ts)
3669 {
3670         struct trace_buffer *buffer = iter->array_buffer->buffer;
3671         struct trace_entry *ent, *next = NULL;
3672         unsigned long lost_events = 0, next_lost = 0;
3673         int cpu_file = iter->cpu_file;
3674         u64 next_ts = 0, ts;
3675         int next_cpu = -1;
3676         int next_size = 0;
3677         int cpu;
3678
3679         /*
3680          * If we are in a per_cpu trace file, don't bother by iterating over
3681          * all cpu and peek directly.
3682          */
3683         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3684                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3685                         return NULL;
3686                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3687                 if (ent_cpu)
3688                         *ent_cpu = cpu_file;
3689
3690                 return ent;
3691         }
3692
3693         for_each_tracing_cpu(cpu) {
3694
3695                 if (ring_buffer_empty_cpu(buffer, cpu))
3696                         continue;
3697
3698                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3699
3700                 /*
3701                  * Pick the entry with the smallest timestamp:
3702                  */
3703                 if (ent && (!next || ts < next_ts)) {
3704                         next = ent;
3705                         next_cpu = cpu;
3706                         next_ts = ts;
3707                         next_lost = lost_events;
3708                         next_size = iter->ent_size;
3709                 }
3710         }
3711
3712         iter->ent_size = next_size;
3713
3714         if (ent_cpu)
3715                 *ent_cpu = next_cpu;
3716
3717         if (ent_ts)
3718                 *ent_ts = next_ts;
3719
3720         if (missing_events)
3721                 *missing_events = next_lost;
3722
3723         return next;
3724 }
3725
3726 #define STATIC_FMT_BUF_SIZE     128
3727 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3728
3729 char *trace_iter_expand_format(struct trace_iterator *iter)
3730 {
3731         char *tmp;
3732
3733         /*
3734          * iter->tr is NULL when used with tp_printk, which makes
3735          * this get called where it is not safe to call krealloc().
3736          */
3737         if (!iter->tr || iter->fmt == static_fmt_buf)
3738                 return NULL;
3739
3740         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3741                        GFP_KERNEL);
3742         if (tmp) {
3743                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3744                 iter->fmt = tmp;
3745         }
3746
3747         return tmp;
3748 }
3749
3750 /* Returns true if the string is safe to dereference from an event */
3751 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3752                            bool star, int len)
3753 {
3754         unsigned long addr = (unsigned long)str;
3755         struct trace_event *trace_event;
3756         struct trace_event_call *event;
3757
3758         /* Ignore strings with no length */
3759         if (star && !len)
3760                 return true;
3761
3762         /* OK if part of the event data */
3763         if ((addr >= (unsigned long)iter->ent) &&
3764             (addr < (unsigned long)iter->ent + iter->ent_size))
3765                 return true;
3766
3767         /* OK if part of the temp seq buffer */
3768         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3769             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3770                 return true;
3771
3772         /* Core rodata can not be freed */
3773         if (is_kernel_rodata(addr))
3774                 return true;
3775
3776         if (trace_is_tracepoint_string(str))
3777                 return true;
3778
3779         /*
3780          * Now this could be a module event, referencing core module
3781          * data, which is OK.
3782          */
3783         if (!iter->ent)
3784                 return false;
3785
3786         trace_event = ftrace_find_event(iter->ent->type);
3787         if (!trace_event)
3788                 return false;
3789
3790         event = container_of(trace_event, struct trace_event_call, event);
3791         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3792                 return false;
3793
3794         /* Would rather have rodata, but this will suffice */
3795         if (within_module_core(addr, event->module))
3796                 return true;
3797
3798         return false;
3799 }
3800
3801 static const char *show_buffer(struct trace_seq *s)
3802 {
3803         struct seq_buf *seq = &s->seq;
3804
3805         seq_buf_terminate(seq);
3806
3807         return seq->buffer;
3808 }
3809
3810 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3811
3812 static int test_can_verify_check(const char *fmt, ...)
3813 {
3814         char buf[16];
3815         va_list ap;
3816         int ret;
3817
3818         /*
3819          * The verifier is dependent on vsnprintf() modifies the va_list
3820          * passed to it, where it is sent as a reference. Some architectures
3821          * (like x86_32) passes it by value, which means that vsnprintf()
3822          * does not modify the va_list passed to it, and the verifier
3823          * would then need to be able to understand all the values that
3824          * vsnprintf can use. If it is passed by value, then the verifier
3825          * is disabled.
3826          */
3827         va_start(ap, fmt);
3828         vsnprintf(buf, 16, "%d", ap);
3829         ret = va_arg(ap, int);
3830         va_end(ap);
3831
3832         return ret;
3833 }
3834
3835 static void test_can_verify(void)
3836 {
3837         if (!test_can_verify_check("%d %d", 0, 1)) {
3838                 pr_info("trace event string verifier disabled\n");
3839                 static_branch_inc(&trace_no_verify);
3840         }
3841 }
3842
3843 /**
3844  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3845  * @iter: The iterator that holds the seq buffer and the event being printed
3846  * @fmt: The format used to print the event
3847  * @ap: The va_list holding the data to print from @fmt.
3848  *
3849  * This writes the data into the @iter->seq buffer using the data from
3850  * @fmt and @ap. If the format has a %s, then the source of the string
3851  * is examined to make sure it is safe to print, otherwise it will
3852  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3853  * pointer.
3854  */
3855 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3856                          va_list ap)
3857 {
3858         const char *p = fmt;
3859         const char *str;
3860         int i, j;
3861
3862         if (WARN_ON_ONCE(!fmt))
3863                 return;
3864
3865         if (static_branch_unlikely(&trace_no_verify))
3866                 goto print;
3867
3868         /* Don't bother checking when doing a ftrace_dump() */
3869         if (iter->fmt == static_fmt_buf)
3870                 goto print;
3871
3872         while (*p) {
3873                 bool star = false;
3874                 int len = 0;
3875
3876                 j = 0;
3877
3878                 /* We only care about %s and variants */
3879                 for (i = 0; p[i]; i++) {
3880                         if (i + 1 >= iter->fmt_size) {
3881                                 /*
3882                                  * If we can't expand the copy buffer,
3883                                  * just print it.
3884                                  */
3885                                 if (!trace_iter_expand_format(iter))
3886                                         goto print;
3887                         }
3888
3889                         if (p[i] == '\\' && p[i+1]) {
3890                                 i++;
3891                                 continue;
3892                         }
3893                         if (p[i] == '%') {
3894                                 /* Need to test cases like %08.*s */
3895                                 for (j = 1; p[i+j]; j++) {
3896                                         if (isdigit(p[i+j]) ||
3897                                             p[i+j] == '.')
3898                                                 continue;
3899                                         if (p[i+j] == '*') {
3900                                                 star = true;
3901                                                 continue;
3902                                         }
3903                                         break;
3904                                 }
3905                                 if (p[i+j] == 's')
3906                                         break;
3907                                 star = false;
3908                         }
3909                         j = 0;
3910                 }
3911                 /* If no %s found then just print normally */
3912                 if (!p[i])
3913                         break;
3914
3915                 /* Copy up to the %s, and print that */
3916                 strncpy(iter->fmt, p, i);
3917                 iter->fmt[i] = '\0';
3918                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3919
3920                 /*
3921                  * If iter->seq is full, the above call no longer guarantees
3922                  * that ap is in sync with fmt processing, and further calls
3923                  * to va_arg() can return wrong positional arguments.
3924                  *
3925                  * Ensure that ap is no longer used in this case.
3926                  */
3927                 if (iter->seq.full) {
3928                         p = "";
3929                         break;
3930                 }
3931
3932                 if (star)
3933                         len = va_arg(ap, int);
3934
3935                 /* The ap now points to the string data of the %s */
3936                 str = va_arg(ap, const char *);
3937
3938                 /*
3939                  * If you hit this warning, it is likely that the
3940                  * trace event in question used %s on a string that
3941                  * was saved at the time of the event, but may not be
3942                  * around when the trace is read. Use __string(),
3943                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3944                  * instead. See samples/trace_events/trace-events-sample.h
3945                  * for reference.
3946                  */
3947                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3948                               "fmt: '%s' current_buffer: '%s'",
3949                               fmt, show_buffer(&iter->seq))) {
3950                         int ret;
3951
3952                         /* Try to safely read the string */
3953                         if (star) {
3954                                 if (len + 1 > iter->fmt_size)
3955                                         len = iter->fmt_size - 1;
3956                                 if (len < 0)
3957                                         len = 0;
3958                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3959                                 iter->fmt[len] = 0;
3960                                 star = false;
3961                         } else {
3962                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3963                                                                   iter->fmt_size);
3964                         }
3965                         if (ret < 0)
3966                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3967                         else
3968                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3969                                                  str, iter->fmt);
3970                         str = "[UNSAFE-MEMORY]";
3971                         strcpy(iter->fmt, "%s");
3972                 } else {
3973                         strncpy(iter->fmt, p + i, j + 1);
3974                         iter->fmt[j+1] = '\0';
3975                 }
3976                 if (star)
3977                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3978                 else
3979                         trace_seq_printf(&iter->seq, iter->fmt, str);
3980
3981                 p += i + j + 1;
3982         }
3983  print:
3984         if (*p)
3985                 trace_seq_vprintf(&iter->seq, p, ap);
3986 }
3987
3988 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3989 {
3990         const char *p, *new_fmt;
3991         char *q;
3992
3993         if (WARN_ON_ONCE(!fmt))
3994                 return fmt;
3995
3996         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3997                 return fmt;
3998
3999         p = fmt;
4000         new_fmt = q = iter->fmt;
4001         while (*p) {
4002                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4003                         if (!trace_iter_expand_format(iter))
4004                                 return fmt;
4005
4006                         q += iter->fmt - new_fmt;
4007                         new_fmt = iter->fmt;
4008                 }
4009
4010                 *q++ = *p++;
4011
4012                 /* Replace %p with %px */
4013                 if (p[-1] == '%') {
4014                         if (p[0] == '%') {
4015                                 *q++ = *p++;
4016                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4017                                 *q++ = *p++;
4018                                 *q++ = 'x';
4019                         }
4020                 }
4021         }
4022         *q = '\0';
4023
4024         return new_fmt;
4025 }
4026
4027 #define STATIC_TEMP_BUF_SIZE    128
4028 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4029
4030 /* Find the next real entry, without updating the iterator itself */
4031 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4032                                           int *ent_cpu, u64 *ent_ts)
4033 {
4034         /* __find_next_entry will reset ent_size */
4035         int ent_size = iter->ent_size;
4036         struct trace_entry *entry;
4037
4038         /*
4039          * If called from ftrace_dump(), then the iter->temp buffer
4040          * will be the static_temp_buf and not created from kmalloc.
4041          * If the entry size is greater than the buffer, we can
4042          * not save it. Just return NULL in that case. This is only
4043          * used to add markers when two consecutive events' time
4044          * stamps have a large delta. See trace_print_lat_context()
4045          */
4046         if (iter->temp == static_temp_buf &&
4047             STATIC_TEMP_BUF_SIZE < ent_size)
4048                 return NULL;
4049
4050         /*
4051          * The __find_next_entry() may call peek_next_entry(), which may
4052          * call ring_buffer_peek() that may make the contents of iter->ent
4053          * undefined. Need to copy iter->ent now.
4054          */
4055         if (iter->ent && iter->ent != iter->temp) {
4056                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4057                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4058                         void *temp;
4059                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4060                         if (!temp)
4061                                 return NULL;
4062                         kfree(iter->temp);
4063                         iter->temp = temp;
4064                         iter->temp_size = iter->ent_size;
4065                 }
4066                 memcpy(iter->temp, iter->ent, iter->ent_size);
4067                 iter->ent = iter->temp;
4068         }
4069         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4070         /* Put back the original ent_size */
4071         iter->ent_size = ent_size;
4072
4073         return entry;
4074 }
4075
4076 /* Find the next real entry, and increment the iterator to the next entry */
4077 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4078 {
4079         iter->ent = __find_next_entry(iter, &iter->cpu,
4080                                       &iter->lost_events, &iter->ts);
4081
4082         if (iter->ent)
4083                 trace_iterator_increment(iter);
4084
4085         return iter->ent ? iter : NULL;
4086 }
4087
4088 static void trace_consume(struct trace_iterator *iter)
4089 {
4090         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4091                             &iter->lost_events);
4092 }
4093
4094 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4095 {
4096         struct trace_iterator *iter = m->private;
4097         int i = (int)*pos;
4098         void *ent;
4099
4100         WARN_ON_ONCE(iter->leftover);
4101
4102         (*pos)++;
4103
4104         /* can't go backwards */
4105         if (iter->idx > i)
4106                 return NULL;
4107
4108         if (iter->idx < 0)
4109                 ent = trace_find_next_entry_inc(iter);
4110         else
4111                 ent = iter;
4112
4113         while (ent && iter->idx < i)
4114                 ent = trace_find_next_entry_inc(iter);
4115
4116         iter->pos = *pos;
4117
4118         return ent;
4119 }
4120
4121 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4122 {
4123         struct ring_buffer_iter *buf_iter;
4124         unsigned long entries = 0;
4125         u64 ts;
4126
4127         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4128
4129         buf_iter = trace_buffer_iter(iter, cpu);
4130         if (!buf_iter)
4131                 return;
4132
4133         ring_buffer_iter_reset(buf_iter);
4134
4135         /*
4136          * We could have the case with the max latency tracers
4137          * that a reset never took place on a cpu. This is evident
4138          * by the timestamp being before the start of the buffer.
4139          */
4140         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4141                 if (ts >= iter->array_buffer->time_start)
4142                         break;
4143                 entries++;
4144                 ring_buffer_iter_advance(buf_iter);
4145         }
4146
4147         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4148 }
4149
4150 /*
4151  * The current tracer is copied to avoid a global locking
4152  * all around.
4153  */
4154 static void *s_start(struct seq_file *m, loff_t *pos)
4155 {
4156         struct trace_iterator *iter = m->private;
4157         struct trace_array *tr = iter->tr;
4158         int cpu_file = iter->cpu_file;
4159         void *p = NULL;
4160         loff_t l = 0;
4161         int cpu;
4162
4163         /*
4164          * copy the tracer to avoid using a global lock all around.
4165          * iter->trace is a copy of current_trace, the pointer to the
4166          * name may be used instead of a strcmp(), as iter->trace->name
4167          * will point to the same string as current_trace->name.
4168          */
4169         mutex_lock(&trace_types_lock);
4170         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4171                 *iter->trace = *tr->current_trace;
4172         mutex_unlock(&trace_types_lock);
4173
4174 #ifdef CONFIG_TRACER_MAX_TRACE
4175         if (iter->snapshot && iter->trace->use_max_tr)
4176                 return ERR_PTR(-EBUSY);
4177 #endif
4178
4179         if (*pos != iter->pos) {
4180                 iter->ent = NULL;
4181                 iter->cpu = 0;
4182                 iter->idx = -1;
4183
4184                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4185                         for_each_tracing_cpu(cpu)
4186                                 tracing_iter_reset(iter, cpu);
4187                 } else
4188                         tracing_iter_reset(iter, cpu_file);
4189
4190                 iter->leftover = 0;
4191                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4192                         ;
4193
4194         } else {
4195                 /*
4196                  * If we overflowed the seq_file before, then we want
4197                  * to just reuse the trace_seq buffer again.
4198                  */
4199                 if (iter->leftover)
4200                         p = iter;
4201                 else {
4202                         l = *pos - 1;
4203                         p = s_next(m, p, &l);
4204                 }
4205         }
4206
4207         trace_event_read_lock();
4208         trace_access_lock(cpu_file);
4209         return p;
4210 }
4211
4212 static void s_stop(struct seq_file *m, void *p)
4213 {
4214         struct trace_iterator *iter = m->private;
4215
4216 #ifdef CONFIG_TRACER_MAX_TRACE
4217         if (iter->snapshot && iter->trace->use_max_tr)
4218                 return;
4219 #endif
4220
4221         trace_access_unlock(iter->cpu_file);
4222         trace_event_read_unlock();
4223 }
4224
4225 static void
4226 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4227                       unsigned long *entries, int cpu)
4228 {
4229         unsigned long count;
4230
4231         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4232         /*
4233          * If this buffer has skipped entries, then we hold all
4234          * entries for the trace and we need to ignore the
4235          * ones before the time stamp.
4236          */
4237         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4238                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4239                 /* total is the same as the entries */
4240                 *total = count;
4241         } else
4242                 *total = count +
4243                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4244         *entries = count;
4245 }
4246
4247 static void
4248 get_total_entries(struct array_buffer *buf,
4249                   unsigned long *total, unsigned long *entries)
4250 {
4251         unsigned long t, e;
4252         int cpu;
4253
4254         *total = 0;
4255         *entries = 0;
4256
4257         for_each_tracing_cpu(cpu) {
4258                 get_total_entries_cpu(buf, &t, &e, cpu);
4259                 *total += t;
4260                 *entries += e;
4261         }
4262 }
4263
4264 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4265 {
4266         unsigned long total, entries;
4267
4268         if (!tr)
4269                 tr = &global_trace;
4270
4271         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4272
4273         return entries;
4274 }
4275
4276 unsigned long trace_total_entries(struct trace_array *tr)
4277 {
4278         unsigned long total, entries;
4279
4280         if (!tr)
4281                 tr = &global_trace;
4282
4283         get_total_entries(&tr->array_buffer, &total, &entries);
4284
4285         return entries;
4286 }
4287
4288 static void print_lat_help_header(struct seq_file *m)
4289 {
4290         seq_puts(m, "#                    _------=> CPU#            \n"
4291                     "#                   / _-----=> irqs-off/BH-disabled\n"
4292                     "#                  | / _----=> need-resched    \n"
4293                     "#                  || / _---=> hardirq/softirq \n"
4294                     "#                  ||| / _--=> preempt-depth   \n"
4295                     "#                  |||| / _-=> migrate-disable \n"
4296                     "#                  ||||| /     delay           \n"
4297                     "#  cmd     pid     |||||| time  |   caller     \n"
4298                     "#     \\   /        ||||||  \\    |    /       \n");
4299 }
4300
4301 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4302 {
4303         unsigned long total;
4304         unsigned long entries;
4305
4306         get_total_entries(buf, &total, &entries);
4307         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4308                    entries, total, num_online_cpus());
4309         seq_puts(m, "#\n");
4310 }
4311
4312 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4313                                    unsigned int flags)
4314 {
4315         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4316
4317         print_event_info(buf, m);
4318
4319         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4320         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4321 }
4322
4323 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4324                                        unsigned int flags)
4325 {
4326         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4327         static const char space[] = "            ";
4328         int prec = tgid ? 12 : 2;
4329
4330         print_event_info(buf, m);
4331
4332         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4333         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4334         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4335         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4336         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4337         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4338         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4339         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4340 }
4341
4342 void
4343 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4344 {
4345         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4346         struct array_buffer *buf = iter->array_buffer;
4347         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4348         struct tracer *type = iter->trace;
4349         unsigned long entries;
4350         unsigned long total;
4351         const char *name = type->name;
4352
4353         get_total_entries(buf, &total, &entries);
4354
4355         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4356                    name, UTS_RELEASE);
4357         seq_puts(m, "# -----------------------------------"
4358                  "---------------------------------\n");
4359         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4360                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4361                    nsecs_to_usecs(data->saved_latency),
4362                    entries,
4363                    total,
4364                    buf->cpu,
4365                    preempt_model_none()      ? "server" :
4366                    preempt_model_voluntary() ? "desktop" :
4367                    preempt_model_full()      ? "preempt" :
4368                    preempt_model_rt()        ? "preempt_rt" :
4369                    "unknown",
4370                    /* These are reserved for later use */
4371                    0, 0, 0, 0);
4372 #ifdef CONFIG_SMP
4373         seq_printf(m, " #P:%d)\n", num_online_cpus());
4374 #else
4375         seq_puts(m, ")\n");
4376 #endif
4377         seq_puts(m, "#    -----------------\n");
4378         seq_printf(m, "#    | task: %.16s-%d "
4379                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4380                    data->comm, data->pid,
4381                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4382                    data->policy, data->rt_priority);
4383         seq_puts(m, "#    -----------------\n");
4384
4385         if (data->critical_start) {
4386                 seq_puts(m, "#  => started at: ");
4387                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4388                 trace_print_seq(m, &iter->seq);
4389                 seq_puts(m, "\n#  => ended at:   ");
4390                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4391                 trace_print_seq(m, &iter->seq);
4392                 seq_puts(m, "\n#\n");
4393         }
4394
4395         seq_puts(m, "#\n");
4396 }
4397
4398 static void test_cpu_buff_start(struct trace_iterator *iter)
4399 {
4400         struct trace_seq *s = &iter->seq;
4401         struct trace_array *tr = iter->tr;
4402
4403         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4404                 return;
4405
4406         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4407                 return;
4408
4409         if (cpumask_available(iter->started) &&
4410             cpumask_test_cpu(iter->cpu, iter->started))
4411                 return;
4412
4413         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4414                 return;
4415
4416         if (cpumask_available(iter->started))
4417                 cpumask_set_cpu(iter->cpu, iter->started);
4418
4419         /* Don't print started cpu buffer for the first entry of the trace */
4420         if (iter->idx > 1)
4421                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4422                                 iter->cpu);
4423 }
4424
4425 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4426 {
4427         struct trace_array *tr = iter->tr;
4428         struct trace_seq *s = &iter->seq;
4429         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4430         struct trace_entry *entry;
4431         struct trace_event *event;
4432
4433         entry = iter->ent;
4434
4435         test_cpu_buff_start(iter);
4436
4437         event = ftrace_find_event(entry->type);
4438
4439         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4440                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4441                         trace_print_lat_context(iter);
4442                 else
4443                         trace_print_context(iter);
4444         }
4445
4446         if (trace_seq_has_overflowed(s))
4447                 return TRACE_TYPE_PARTIAL_LINE;
4448
4449         if (event) {
4450                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4451                         return print_event_fields(iter, event);
4452                 return event->funcs->trace(iter, sym_flags, event);
4453         }
4454
4455         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4456
4457         return trace_handle_return(s);
4458 }
4459
4460 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4461 {
4462         struct trace_array *tr = iter->tr;
4463         struct trace_seq *s = &iter->seq;
4464         struct trace_entry *entry;
4465         struct trace_event *event;
4466
4467         entry = iter->ent;
4468
4469         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4470                 trace_seq_printf(s, "%d %d %llu ",
4471                                  entry->pid, iter->cpu, iter->ts);
4472
4473         if (trace_seq_has_overflowed(s))
4474                 return TRACE_TYPE_PARTIAL_LINE;
4475
4476         event = ftrace_find_event(entry->type);
4477         if (event)
4478                 return event->funcs->raw(iter, 0, event);
4479
4480         trace_seq_printf(s, "%d ?\n", entry->type);
4481
4482         return trace_handle_return(s);
4483 }
4484
4485 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4486 {
4487         struct trace_array *tr = iter->tr;
4488         struct trace_seq *s = &iter->seq;
4489         unsigned char newline = '\n';
4490         struct trace_entry *entry;
4491         struct trace_event *event;
4492
4493         entry = iter->ent;
4494
4495         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4496                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4497                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4498                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4499                 if (trace_seq_has_overflowed(s))
4500                         return TRACE_TYPE_PARTIAL_LINE;
4501         }
4502
4503         event = ftrace_find_event(entry->type);
4504         if (event) {
4505                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4506                 if (ret != TRACE_TYPE_HANDLED)
4507                         return ret;
4508         }
4509
4510         SEQ_PUT_FIELD(s, newline);
4511
4512         return trace_handle_return(s);
4513 }
4514
4515 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4516 {
4517         struct trace_array *tr = iter->tr;
4518         struct trace_seq *s = &iter->seq;
4519         struct trace_entry *entry;
4520         struct trace_event *event;
4521
4522         entry = iter->ent;
4523
4524         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4525                 SEQ_PUT_FIELD(s, entry->pid);
4526                 SEQ_PUT_FIELD(s, iter->cpu);
4527                 SEQ_PUT_FIELD(s, iter->ts);
4528                 if (trace_seq_has_overflowed(s))
4529                         return TRACE_TYPE_PARTIAL_LINE;
4530         }
4531
4532         event = ftrace_find_event(entry->type);
4533         return event ? event->funcs->binary(iter, 0, event) :
4534                 TRACE_TYPE_HANDLED;
4535 }
4536
4537 int trace_empty(struct trace_iterator *iter)
4538 {
4539         struct ring_buffer_iter *buf_iter;
4540         int cpu;
4541
4542         /* If we are looking at one CPU buffer, only check that one */
4543         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4544                 cpu = iter->cpu_file;
4545                 buf_iter = trace_buffer_iter(iter, cpu);
4546                 if (buf_iter) {
4547                         if (!ring_buffer_iter_empty(buf_iter))
4548                                 return 0;
4549                 } else {
4550                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4551                                 return 0;
4552                 }
4553                 return 1;
4554         }
4555
4556         for_each_tracing_cpu(cpu) {
4557                 buf_iter = trace_buffer_iter(iter, cpu);
4558                 if (buf_iter) {
4559                         if (!ring_buffer_iter_empty(buf_iter))
4560                                 return 0;
4561                 } else {
4562                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4563                                 return 0;
4564                 }
4565         }
4566
4567         return 1;
4568 }
4569
4570 /*  Called with trace_event_read_lock() held. */
4571 enum print_line_t print_trace_line(struct trace_iterator *iter)
4572 {
4573         struct trace_array *tr = iter->tr;
4574         unsigned long trace_flags = tr->trace_flags;
4575         enum print_line_t ret;
4576
4577         if (iter->lost_events) {
4578                 if (iter->lost_events == (unsigned long)-1)
4579                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4580                                          iter->cpu);
4581                 else
4582                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4583                                          iter->cpu, iter->lost_events);
4584                 if (trace_seq_has_overflowed(&iter->seq))
4585                         return TRACE_TYPE_PARTIAL_LINE;
4586         }
4587
4588         if (iter->trace && iter->trace->print_line) {
4589                 ret = iter->trace->print_line(iter);
4590                 if (ret != TRACE_TYPE_UNHANDLED)
4591                         return ret;
4592         }
4593
4594         if (iter->ent->type == TRACE_BPUTS &&
4595                         trace_flags & TRACE_ITER_PRINTK &&
4596                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4597                 return trace_print_bputs_msg_only(iter);
4598
4599         if (iter->ent->type == TRACE_BPRINT &&
4600                         trace_flags & TRACE_ITER_PRINTK &&
4601                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4602                 return trace_print_bprintk_msg_only(iter);
4603
4604         if (iter->ent->type == TRACE_PRINT &&
4605                         trace_flags & TRACE_ITER_PRINTK &&
4606                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4607                 return trace_print_printk_msg_only(iter);
4608
4609         if (trace_flags & TRACE_ITER_BIN)
4610                 return print_bin_fmt(iter);
4611
4612         if (trace_flags & TRACE_ITER_HEX)
4613                 return print_hex_fmt(iter);
4614
4615         if (trace_flags & TRACE_ITER_RAW)
4616                 return print_raw_fmt(iter);
4617
4618         return print_trace_fmt(iter);
4619 }
4620
4621 void trace_latency_header(struct seq_file *m)
4622 {
4623         struct trace_iterator *iter = m->private;
4624         struct trace_array *tr = iter->tr;
4625
4626         /* print nothing if the buffers are empty */
4627         if (trace_empty(iter))
4628                 return;
4629
4630         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4631                 print_trace_header(m, iter);
4632
4633         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4634                 print_lat_help_header(m);
4635 }
4636
4637 void trace_default_header(struct seq_file *m)
4638 {
4639         struct trace_iterator *iter = m->private;
4640         struct trace_array *tr = iter->tr;
4641         unsigned long trace_flags = tr->trace_flags;
4642
4643         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4644                 return;
4645
4646         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4647                 /* print nothing if the buffers are empty */
4648                 if (trace_empty(iter))
4649                         return;
4650                 print_trace_header(m, iter);
4651                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4652                         print_lat_help_header(m);
4653         } else {
4654                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4655                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4656                                 print_func_help_header_irq(iter->array_buffer,
4657                                                            m, trace_flags);
4658                         else
4659                                 print_func_help_header(iter->array_buffer, m,
4660                                                        trace_flags);
4661                 }
4662         }
4663 }
4664
4665 static void test_ftrace_alive(struct seq_file *m)
4666 {
4667         if (!ftrace_is_dead())
4668                 return;
4669         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4670                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4671 }
4672
4673 #ifdef CONFIG_TRACER_MAX_TRACE
4674 static void show_snapshot_main_help(struct seq_file *m)
4675 {
4676         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4677                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4678                     "#                      Takes a snapshot of the main buffer.\n"
4679                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4680                     "#                      (Doesn't have to be '2' works with any number that\n"
4681                     "#                       is not a '0' or '1')\n");
4682 }
4683
4684 static void show_snapshot_percpu_help(struct seq_file *m)
4685 {
4686         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4687 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4688         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4689                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4690 #else
4691         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4692                     "#                     Must use main snapshot file to allocate.\n");
4693 #endif
4694         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4695                     "#                      (Doesn't have to be '2' works with any number that\n"
4696                     "#                       is not a '0' or '1')\n");
4697 }
4698
4699 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4700 {
4701         if (iter->tr->allocated_snapshot)
4702                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4703         else
4704                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4705
4706         seq_puts(m, "# Snapshot commands:\n");
4707         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4708                 show_snapshot_main_help(m);
4709         else
4710                 show_snapshot_percpu_help(m);
4711 }
4712 #else
4713 /* Should never be called */
4714 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4715 #endif
4716
4717 static int s_show(struct seq_file *m, void *v)
4718 {
4719         struct trace_iterator *iter = v;
4720         int ret;
4721
4722         if (iter->ent == NULL) {
4723                 if (iter->tr) {
4724                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4725                         seq_puts(m, "#\n");
4726                         test_ftrace_alive(m);
4727                 }
4728                 if (iter->snapshot && trace_empty(iter))
4729                         print_snapshot_help(m, iter);
4730                 else if (iter->trace && iter->trace->print_header)
4731                         iter->trace->print_header(m);
4732                 else
4733                         trace_default_header(m);
4734
4735         } else if (iter->leftover) {
4736                 /*
4737                  * If we filled the seq_file buffer earlier, we
4738                  * want to just show it now.
4739                  */
4740                 ret = trace_print_seq(m, &iter->seq);
4741
4742                 /* ret should this time be zero, but you never know */
4743                 iter->leftover = ret;
4744
4745         } else {
4746                 print_trace_line(iter);
4747                 ret = trace_print_seq(m, &iter->seq);
4748                 /*
4749                  * If we overflow the seq_file buffer, then it will
4750                  * ask us for this data again at start up.
4751                  * Use that instead.
4752                  *  ret is 0 if seq_file write succeeded.
4753                  *        -1 otherwise.
4754                  */
4755                 iter->leftover = ret;
4756         }
4757
4758         return 0;
4759 }
4760
4761 /*
4762  * Should be used after trace_array_get(), trace_types_lock
4763  * ensures that i_cdev was already initialized.
4764  */
4765 static inline int tracing_get_cpu(struct inode *inode)
4766 {
4767         if (inode->i_cdev) /* See trace_create_cpu_file() */
4768                 return (long)inode->i_cdev - 1;
4769         return RING_BUFFER_ALL_CPUS;
4770 }
4771
4772 static const struct seq_operations tracer_seq_ops = {
4773         .start          = s_start,
4774         .next           = s_next,
4775         .stop           = s_stop,
4776         .show           = s_show,
4777 };
4778
4779 static struct trace_iterator *
4780 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4781 {
4782         struct trace_array *tr = inode->i_private;
4783         struct trace_iterator *iter;
4784         int cpu;
4785
4786         if (tracing_disabled)
4787                 return ERR_PTR(-ENODEV);
4788
4789         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4790         if (!iter)
4791                 return ERR_PTR(-ENOMEM);
4792
4793         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4794                                     GFP_KERNEL);
4795         if (!iter->buffer_iter)
4796                 goto release;
4797
4798         /*
4799          * trace_find_next_entry() may need to save off iter->ent.
4800          * It will place it into the iter->temp buffer. As most
4801          * events are less than 128, allocate a buffer of that size.
4802          * If one is greater, then trace_find_next_entry() will
4803          * allocate a new buffer to adjust for the bigger iter->ent.
4804          * It's not critical if it fails to get allocated here.
4805          */
4806         iter->temp = kmalloc(128, GFP_KERNEL);
4807         if (iter->temp)
4808                 iter->temp_size = 128;
4809
4810         /*
4811          * trace_event_printf() may need to modify given format
4812          * string to replace %p with %px so that it shows real address
4813          * instead of hash value. However, that is only for the event
4814          * tracing, other tracer may not need. Defer the allocation
4815          * until it is needed.
4816          */
4817         iter->fmt = NULL;
4818         iter->fmt_size = 0;
4819
4820         /*
4821          * We make a copy of the current tracer to avoid concurrent
4822          * changes on it while we are reading.
4823          */
4824         mutex_lock(&trace_types_lock);
4825         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4826         if (!iter->trace)
4827                 goto fail;
4828
4829         *iter->trace = *tr->current_trace;
4830
4831         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4832                 goto fail;
4833
4834         iter->tr = tr;
4835
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837         /* Currently only the top directory has a snapshot */
4838         if (tr->current_trace->print_max || snapshot)
4839                 iter->array_buffer = &tr->max_buffer;
4840         else
4841 #endif
4842                 iter->array_buffer = &tr->array_buffer;
4843         iter->snapshot = snapshot;
4844         iter->pos = -1;
4845         iter->cpu_file = tracing_get_cpu(inode);
4846         mutex_init(&iter->mutex);
4847
4848         /* Notify the tracer early; before we stop tracing. */
4849         if (iter->trace->open)
4850                 iter->trace->open(iter);
4851
4852         /* Annotate start of buffers if we had overruns */
4853         if (ring_buffer_overruns(iter->array_buffer->buffer))
4854                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4855
4856         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4857         if (trace_clocks[tr->clock_id].in_ns)
4858                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4859
4860         /*
4861          * If pause-on-trace is enabled, then stop the trace while
4862          * dumping, unless this is the "snapshot" file
4863          */
4864         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4865                 tracing_stop_tr(tr);
4866
4867         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4868                 for_each_tracing_cpu(cpu) {
4869                         iter->buffer_iter[cpu] =
4870                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4871                                                          cpu, GFP_KERNEL);
4872                 }
4873                 ring_buffer_read_prepare_sync();
4874                 for_each_tracing_cpu(cpu) {
4875                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4876                         tracing_iter_reset(iter, cpu);
4877                 }
4878         } else {
4879                 cpu = iter->cpu_file;
4880                 iter->buffer_iter[cpu] =
4881                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4882                                                  cpu, GFP_KERNEL);
4883                 ring_buffer_read_prepare_sync();
4884                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4885                 tracing_iter_reset(iter, cpu);
4886         }
4887
4888         mutex_unlock(&trace_types_lock);
4889
4890         return iter;
4891
4892  fail:
4893         mutex_unlock(&trace_types_lock);
4894         kfree(iter->trace);
4895         kfree(iter->temp);
4896         kfree(iter->buffer_iter);
4897 release:
4898         seq_release_private(inode, file);
4899         return ERR_PTR(-ENOMEM);
4900 }
4901
4902 int tracing_open_generic(struct inode *inode, struct file *filp)
4903 {
4904         int ret;
4905
4906         ret = tracing_check_open_get_tr(NULL);
4907         if (ret)
4908                 return ret;
4909
4910         filp->private_data = inode->i_private;
4911         return 0;
4912 }
4913
4914 bool tracing_is_disabled(void)
4915 {
4916         return (tracing_disabled) ? true: false;
4917 }
4918
4919 /*
4920  * Open and update trace_array ref count.
4921  * Must have the current trace_array passed to it.
4922  */
4923 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4924 {
4925         struct trace_array *tr = inode->i_private;
4926         int ret;
4927
4928         ret = tracing_check_open_get_tr(tr);
4929         if (ret)
4930                 return ret;
4931
4932         filp->private_data = inode->i_private;
4933
4934         return 0;
4935 }
4936
4937 static int tracing_mark_open(struct inode *inode, struct file *filp)
4938 {
4939         stream_open(inode, filp);
4940         return tracing_open_generic_tr(inode, filp);
4941 }
4942
4943 static int tracing_release(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946         struct seq_file *m = file->private_data;
4947         struct trace_iterator *iter;
4948         int cpu;
4949
4950         if (!(file->f_mode & FMODE_READ)) {
4951                 trace_array_put(tr);
4952                 return 0;
4953         }
4954
4955         /* Writes do not use seq_file */
4956         iter = m->private;
4957         mutex_lock(&trace_types_lock);
4958
4959         for_each_tracing_cpu(cpu) {
4960                 if (iter->buffer_iter[cpu])
4961                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4962         }
4963
4964         if (iter->trace && iter->trace->close)
4965                 iter->trace->close(iter);
4966
4967         if (!iter->snapshot && tr->stop_count)
4968                 /* reenable tracing if it was previously enabled */
4969                 tracing_start_tr(tr);
4970
4971         __trace_array_put(tr);
4972
4973         mutex_unlock(&trace_types_lock);
4974
4975         mutex_destroy(&iter->mutex);
4976         free_cpumask_var(iter->started);
4977         kfree(iter->fmt);
4978         kfree(iter->temp);
4979         kfree(iter->trace);
4980         kfree(iter->buffer_iter);
4981         seq_release_private(inode, file);
4982
4983         return 0;
4984 }
4985
4986 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4987 {
4988         struct trace_array *tr = inode->i_private;
4989
4990         trace_array_put(tr);
4991         return 0;
4992 }
4993
4994 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4995 {
4996         struct trace_array *tr = inode->i_private;
4997
4998         trace_array_put(tr);
4999
5000         return single_release(inode, file);
5001 }
5002
5003 static int tracing_open(struct inode *inode, struct file *file)
5004 {
5005         struct trace_array *tr = inode->i_private;
5006         struct trace_iterator *iter;
5007         int ret;
5008
5009         ret = tracing_check_open_get_tr(tr);
5010         if (ret)
5011                 return ret;
5012
5013         /* If this file was open for write, then erase contents */
5014         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5015                 int cpu = tracing_get_cpu(inode);
5016                 struct array_buffer *trace_buf = &tr->array_buffer;
5017
5018 #ifdef CONFIG_TRACER_MAX_TRACE
5019                 if (tr->current_trace->print_max)
5020                         trace_buf = &tr->max_buffer;
5021 #endif
5022
5023                 if (cpu == RING_BUFFER_ALL_CPUS)
5024                         tracing_reset_online_cpus(trace_buf);
5025                 else
5026                         tracing_reset_cpu(trace_buf, cpu);
5027         }
5028
5029         if (file->f_mode & FMODE_READ) {
5030                 iter = __tracing_open(inode, file, false);
5031                 if (IS_ERR(iter))
5032                         ret = PTR_ERR(iter);
5033                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5034                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5035         }
5036
5037         if (ret < 0)
5038                 trace_array_put(tr);
5039
5040         return ret;
5041 }
5042
5043 /*
5044  * Some tracers are not suitable for instance buffers.
5045  * A tracer is always available for the global array (toplevel)
5046  * or if it explicitly states that it is.
5047  */
5048 static bool
5049 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5050 {
5051         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5052 }
5053
5054 /* Find the next tracer that this trace array may use */
5055 static struct tracer *
5056 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5057 {
5058         while (t && !trace_ok_for_array(t, tr))
5059                 t = t->next;
5060
5061         return t;
5062 }
5063
5064 static void *
5065 t_next(struct seq_file *m, void *v, loff_t *pos)
5066 {
5067         struct trace_array *tr = m->private;
5068         struct tracer *t = v;
5069
5070         (*pos)++;
5071
5072         if (t)
5073                 t = get_tracer_for_array(tr, t->next);
5074
5075         return t;
5076 }
5077
5078 static void *t_start(struct seq_file *m, loff_t *pos)
5079 {
5080         struct trace_array *tr = m->private;
5081         struct tracer *t;
5082         loff_t l = 0;
5083
5084         mutex_lock(&trace_types_lock);
5085
5086         t = get_tracer_for_array(tr, trace_types);
5087         for (; t && l < *pos; t = t_next(m, t, &l))
5088                         ;
5089
5090         return t;
5091 }
5092
5093 static void t_stop(struct seq_file *m, void *p)
5094 {
5095         mutex_unlock(&trace_types_lock);
5096 }
5097
5098 static int t_show(struct seq_file *m, void *v)
5099 {
5100         struct tracer *t = v;
5101
5102         if (!t)
5103                 return 0;
5104
5105         seq_puts(m, t->name);
5106         if (t->next)
5107                 seq_putc(m, ' ');
5108         else
5109                 seq_putc(m, '\n');
5110
5111         return 0;
5112 }
5113
5114 static const struct seq_operations show_traces_seq_ops = {
5115         .start          = t_start,
5116         .next           = t_next,
5117         .stop           = t_stop,
5118         .show           = t_show,
5119 };
5120
5121 static int show_traces_open(struct inode *inode, struct file *file)
5122 {
5123         struct trace_array *tr = inode->i_private;
5124         struct seq_file *m;
5125         int ret;
5126
5127         ret = tracing_check_open_get_tr(tr);
5128         if (ret)
5129                 return ret;
5130
5131         ret = seq_open(file, &show_traces_seq_ops);
5132         if (ret) {
5133                 trace_array_put(tr);
5134                 return ret;
5135         }
5136
5137         m = file->private_data;
5138         m->private = tr;
5139
5140         return 0;
5141 }
5142
5143 static int show_traces_release(struct inode *inode, struct file *file)
5144 {
5145         struct trace_array *tr = inode->i_private;
5146
5147         trace_array_put(tr);
5148         return seq_release(inode, file);
5149 }
5150
5151 static ssize_t
5152 tracing_write_stub(struct file *filp, const char __user *ubuf,
5153                    size_t count, loff_t *ppos)
5154 {
5155         return count;
5156 }
5157
5158 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5159 {
5160         int ret;
5161
5162         if (file->f_mode & FMODE_READ)
5163                 ret = seq_lseek(file, offset, whence);
5164         else
5165                 file->f_pos = ret = 0;
5166
5167         return ret;
5168 }
5169
5170 static const struct file_operations tracing_fops = {
5171         .open           = tracing_open,
5172         .read           = seq_read,
5173         .read_iter      = seq_read_iter,
5174         .splice_read    = generic_file_splice_read,
5175         .write          = tracing_write_stub,
5176         .llseek         = tracing_lseek,
5177         .release        = tracing_release,
5178 };
5179
5180 static const struct file_operations show_traces_fops = {
5181         .open           = show_traces_open,
5182         .read           = seq_read,
5183         .llseek         = seq_lseek,
5184         .release        = show_traces_release,
5185 };
5186
5187 static ssize_t
5188 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5189                      size_t count, loff_t *ppos)
5190 {
5191         struct trace_array *tr = file_inode(filp)->i_private;
5192         char *mask_str;
5193         int len;
5194
5195         len = snprintf(NULL, 0, "%*pb\n",
5196                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5197         mask_str = kmalloc(len, GFP_KERNEL);
5198         if (!mask_str)
5199                 return -ENOMEM;
5200
5201         len = snprintf(mask_str, len, "%*pb\n",
5202                        cpumask_pr_args(tr->tracing_cpumask));
5203         if (len >= count) {
5204                 count = -EINVAL;
5205                 goto out_err;
5206         }
5207         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5208
5209 out_err:
5210         kfree(mask_str);
5211
5212         return count;
5213 }
5214
5215 int tracing_set_cpumask(struct trace_array *tr,
5216                         cpumask_var_t tracing_cpumask_new)
5217 {
5218         int cpu;
5219
5220         if (!tr)
5221                 return -EINVAL;
5222
5223         local_irq_disable();
5224         arch_spin_lock(&tr->max_lock);
5225         for_each_tracing_cpu(cpu) {
5226                 /*
5227                  * Increase/decrease the disabled counter if we are
5228                  * about to flip a bit in the cpumask:
5229                  */
5230                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5231                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5232                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5233                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5234                 }
5235                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5236                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5237                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5238                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5239                 }
5240         }
5241         arch_spin_unlock(&tr->max_lock);
5242         local_irq_enable();
5243
5244         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5245
5246         return 0;
5247 }
5248
5249 static ssize_t
5250 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5251                       size_t count, loff_t *ppos)
5252 {
5253         struct trace_array *tr = file_inode(filp)->i_private;
5254         cpumask_var_t tracing_cpumask_new;
5255         int err;
5256
5257         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5258                 return -ENOMEM;
5259
5260         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5261         if (err)
5262                 goto err_free;
5263
5264         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5265         if (err)
5266                 goto err_free;
5267
5268         free_cpumask_var(tracing_cpumask_new);
5269
5270         return count;
5271
5272 err_free:
5273         free_cpumask_var(tracing_cpumask_new);
5274
5275         return err;
5276 }
5277
5278 static const struct file_operations tracing_cpumask_fops = {
5279         .open           = tracing_open_generic_tr,
5280         .read           = tracing_cpumask_read,
5281         .write          = tracing_cpumask_write,
5282         .release        = tracing_release_generic_tr,
5283         .llseek         = generic_file_llseek,
5284 };
5285
5286 static int tracing_trace_options_show(struct seq_file *m, void *v)
5287 {
5288         struct tracer_opt *trace_opts;
5289         struct trace_array *tr = m->private;
5290         u32 tracer_flags;
5291         int i;
5292
5293         mutex_lock(&trace_types_lock);
5294         tracer_flags = tr->current_trace->flags->val;
5295         trace_opts = tr->current_trace->flags->opts;
5296
5297         for (i = 0; trace_options[i]; i++) {
5298                 if (tr->trace_flags & (1 << i))
5299                         seq_printf(m, "%s\n", trace_options[i]);
5300                 else
5301                         seq_printf(m, "no%s\n", trace_options[i]);
5302         }
5303
5304         for (i = 0; trace_opts[i].name; i++) {
5305                 if (tracer_flags & trace_opts[i].bit)
5306                         seq_printf(m, "%s\n", trace_opts[i].name);
5307                 else
5308                         seq_printf(m, "no%s\n", trace_opts[i].name);
5309         }
5310         mutex_unlock(&trace_types_lock);
5311
5312         return 0;
5313 }
5314
5315 static int __set_tracer_option(struct trace_array *tr,
5316                                struct tracer_flags *tracer_flags,
5317                                struct tracer_opt *opts, int neg)
5318 {
5319         struct tracer *trace = tracer_flags->trace;
5320         int ret;
5321
5322         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5323         if (ret)
5324                 return ret;
5325
5326         if (neg)
5327                 tracer_flags->val &= ~opts->bit;
5328         else
5329                 tracer_flags->val |= opts->bit;
5330         return 0;
5331 }
5332
5333 /* Try to assign a tracer specific option */
5334 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5335 {
5336         struct tracer *trace = tr->current_trace;
5337         struct tracer_flags *tracer_flags = trace->flags;
5338         struct tracer_opt *opts = NULL;
5339         int i;
5340
5341         for (i = 0; tracer_flags->opts[i].name; i++) {
5342                 opts = &tracer_flags->opts[i];
5343
5344                 if (strcmp(cmp, opts->name) == 0)
5345                         return __set_tracer_option(tr, trace->flags, opts, neg);
5346         }
5347
5348         return -EINVAL;
5349 }
5350
5351 /* Some tracers require overwrite to stay enabled */
5352 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5353 {
5354         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5355                 return -1;
5356
5357         return 0;
5358 }
5359
5360 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5361 {
5362         int *map;
5363
5364         if ((mask == TRACE_ITER_RECORD_TGID) ||
5365             (mask == TRACE_ITER_RECORD_CMD))
5366                 lockdep_assert_held(&event_mutex);
5367
5368         /* do nothing if flag is already set */
5369         if (!!(tr->trace_flags & mask) == !!enabled)
5370                 return 0;
5371
5372         /* Give the tracer a chance to approve the change */
5373         if (tr->current_trace->flag_changed)
5374                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5375                         return -EINVAL;
5376
5377         if (enabled)
5378                 tr->trace_flags |= mask;
5379         else
5380                 tr->trace_flags &= ~mask;
5381
5382         if (mask == TRACE_ITER_RECORD_CMD)
5383                 trace_event_enable_cmd_record(enabled);
5384
5385         if (mask == TRACE_ITER_RECORD_TGID) {
5386                 if (!tgid_map) {
5387                         tgid_map_max = pid_max;
5388                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5389                                        GFP_KERNEL);
5390
5391                         /*
5392                          * Pairs with smp_load_acquire() in
5393                          * trace_find_tgid_ptr() to ensure that if it observes
5394                          * the tgid_map we just allocated then it also observes
5395                          * the corresponding tgid_map_max value.
5396                          */
5397                         smp_store_release(&tgid_map, map);
5398                 }
5399                 if (!tgid_map) {
5400                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5401                         return -ENOMEM;
5402                 }
5403
5404                 trace_event_enable_tgid_record(enabled);
5405         }
5406
5407         if (mask == TRACE_ITER_EVENT_FORK)
5408                 trace_event_follow_fork(tr, enabled);
5409
5410         if (mask == TRACE_ITER_FUNC_FORK)
5411                 ftrace_pid_follow_fork(tr, enabled);
5412
5413         if (mask == TRACE_ITER_OVERWRITE) {
5414                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5415 #ifdef CONFIG_TRACER_MAX_TRACE
5416                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5417 #endif
5418         }
5419
5420         if (mask == TRACE_ITER_PRINTK) {
5421                 trace_printk_start_stop_comm(enabled);
5422                 trace_printk_control(enabled);
5423         }
5424
5425         return 0;
5426 }
5427
5428 int trace_set_options(struct trace_array *tr, char *option)
5429 {
5430         char *cmp;
5431         int neg = 0;
5432         int ret;
5433         size_t orig_len = strlen(option);
5434         int len;
5435
5436         cmp = strstrip(option);
5437
5438         len = str_has_prefix(cmp, "no");
5439         if (len)
5440                 neg = 1;
5441
5442         cmp += len;
5443
5444         mutex_lock(&event_mutex);
5445         mutex_lock(&trace_types_lock);
5446
5447         ret = match_string(trace_options, -1, cmp);
5448         /* If no option could be set, test the specific tracer options */
5449         if (ret < 0)
5450                 ret = set_tracer_option(tr, cmp, neg);
5451         else
5452                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5453
5454         mutex_unlock(&trace_types_lock);
5455         mutex_unlock(&event_mutex);
5456
5457         /*
5458          * If the first trailing whitespace is replaced with '\0' by strstrip,
5459          * turn it back into a space.
5460          */
5461         if (orig_len > strlen(option))
5462                 option[strlen(option)] = ' ';
5463
5464         return ret;
5465 }
5466
5467 static void __init apply_trace_boot_options(void)
5468 {
5469         char *buf = trace_boot_options_buf;
5470         char *option;
5471
5472         while (true) {
5473                 option = strsep(&buf, ",");
5474
5475                 if (!option)
5476                         break;
5477
5478                 if (*option)
5479                         trace_set_options(&global_trace, option);
5480
5481                 /* Put back the comma to allow this to be called again */
5482                 if (buf)
5483                         *(buf - 1) = ',';
5484         }
5485 }
5486
5487 static ssize_t
5488 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5489                         size_t cnt, loff_t *ppos)
5490 {
5491         struct seq_file *m = filp->private_data;
5492         struct trace_array *tr = m->private;
5493         char buf[64];
5494         int ret;
5495
5496         if (cnt >= sizeof(buf))
5497                 return -EINVAL;
5498
5499         if (copy_from_user(buf, ubuf, cnt))
5500                 return -EFAULT;
5501
5502         buf[cnt] = 0;
5503
5504         ret = trace_set_options(tr, buf);
5505         if (ret < 0)
5506                 return ret;
5507
5508         *ppos += cnt;
5509
5510         return cnt;
5511 }
5512
5513 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5514 {
5515         struct trace_array *tr = inode->i_private;
5516         int ret;
5517
5518         ret = tracing_check_open_get_tr(tr);
5519         if (ret)
5520                 return ret;
5521
5522         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5523         if (ret < 0)
5524                 trace_array_put(tr);
5525
5526         return ret;
5527 }
5528
5529 static const struct file_operations tracing_iter_fops = {
5530         .open           = tracing_trace_options_open,
5531         .read           = seq_read,
5532         .llseek         = seq_lseek,
5533         .release        = tracing_single_release_tr,
5534         .write          = tracing_trace_options_write,
5535 };
5536
5537 static const char readme_msg[] =
5538         "tracing mini-HOWTO:\n\n"
5539         "# echo 0 > tracing_on : quick way to disable tracing\n"
5540         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5541         " Important files:\n"
5542         "  trace\t\t\t- The static contents of the buffer\n"
5543         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5544         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5545         "  current_tracer\t- function and latency tracers\n"
5546         "  available_tracers\t- list of configured tracers for current_tracer\n"
5547         "  error_log\t- error log for failed commands (that support it)\n"
5548         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5549         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5550         "  trace_clock\t\t- change the clock used to order events\n"
5551         "       local:   Per cpu clock but may not be synced across CPUs\n"
5552         "      global:   Synced across CPUs but slows tracing down.\n"
5553         "     counter:   Not a clock, but just an increment\n"
5554         "      uptime:   Jiffy counter from time of boot\n"
5555         "        perf:   Same clock that perf events use\n"
5556 #ifdef CONFIG_X86_64
5557         "     x86-tsc:   TSC cycle counter\n"
5558 #endif
5559         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5560         "       delta:   Delta difference against a buffer-wide timestamp\n"
5561         "    absolute:   Absolute (standalone) timestamp\n"
5562         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5563         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5564         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5565         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5566         "\t\t\t  Remove sub-buffer with rmdir\n"
5567         "  trace_options\t\t- Set format or modify how tracing happens\n"
5568         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5569         "\t\t\t  option name\n"
5570         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5571 #ifdef CONFIG_DYNAMIC_FTRACE
5572         "\n  available_filter_functions - list of functions that can be filtered on\n"
5573         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5574         "\t\t\t  functions\n"
5575         "\t     accepts: func_full_name or glob-matching-pattern\n"
5576         "\t     modules: Can select a group via module\n"
5577         "\t      Format: :mod:<module-name>\n"
5578         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5579         "\t    triggers: a command to perform when function is hit\n"
5580         "\t      Format: <function>:<trigger>[:count]\n"
5581         "\t     trigger: traceon, traceoff\n"
5582         "\t\t      enable_event:<system>:<event>\n"
5583         "\t\t      disable_event:<system>:<event>\n"
5584 #ifdef CONFIG_STACKTRACE
5585         "\t\t      stacktrace\n"
5586 #endif
5587 #ifdef CONFIG_TRACER_SNAPSHOT
5588         "\t\t      snapshot\n"
5589 #endif
5590         "\t\t      dump\n"
5591         "\t\t      cpudump\n"
5592         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5593         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5594         "\t     The first one will disable tracing every time do_fault is hit\n"
5595         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5596         "\t       The first time do trap is hit and it disables tracing, the\n"
5597         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5598         "\t       the counter will not decrement. It only decrements when the\n"
5599         "\t       trigger did work\n"
5600         "\t     To remove trigger without count:\n"
5601         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5602         "\t     To remove trigger with a count:\n"
5603         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5604         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5605         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5606         "\t    modules: Can select a group via module command :mod:\n"
5607         "\t    Does not accept triggers\n"
5608 #endif /* CONFIG_DYNAMIC_FTRACE */
5609 #ifdef CONFIG_FUNCTION_TRACER
5610         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5611         "\t\t    (function)\n"
5612         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5613         "\t\t    (function)\n"
5614 #endif
5615 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5616         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5617         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5618         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5619 #endif
5620 #ifdef CONFIG_TRACER_SNAPSHOT
5621         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5622         "\t\t\t  snapshot buffer. Read the contents for more\n"
5623         "\t\t\t  information\n"
5624 #endif
5625 #ifdef CONFIG_STACK_TRACER
5626         "  stack_trace\t\t- Shows the max stack trace when active\n"
5627         "  stack_max_size\t- Shows current max stack size that was traced\n"
5628         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5629         "\t\t\t  new trace)\n"
5630 #ifdef CONFIG_DYNAMIC_FTRACE
5631         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5632         "\t\t\t  traces\n"
5633 #endif
5634 #endif /* CONFIG_STACK_TRACER */
5635 #ifdef CONFIG_DYNAMIC_EVENTS
5636         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5637         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5638 #endif
5639 #ifdef CONFIG_KPROBE_EVENTS
5640         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5641         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5642 #endif
5643 #ifdef CONFIG_UPROBE_EVENTS
5644         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5645         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5646 #endif
5647 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5648         "\t  accepts: event-definitions (one definition per line)\n"
5649         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5650         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5651 #ifdef CONFIG_HIST_TRIGGERS
5652         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5653 #endif
5654         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5655         "\t           -:[<group>/][<event>]\n"
5656 #ifdef CONFIG_KPROBE_EVENTS
5657         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5658   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5659 #endif
5660 #ifdef CONFIG_UPROBE_EVENTS
5661   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5662 #endif
5663         "\t     args: <name>=fetcharg[:type]\n"
5664         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5665 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5666         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5667 #else
5668         "\t           $stack<index>, $stack, $retval, $comm,\n"
5669 #endif
5670         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5671         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5672         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5673         "\t           symstr, <type>\\[<array-size>\\]\n"
5674 #ifdef CONFIG_HIST_TRIGGERS
5675         "\t    field: <stype> <name>;\n"
5676         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5677         "\t           [unsigned] char/int/long\n"
5678 #endif
5679         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5680         "\t            of the <attached-group>/<attached-event>.\n"
5681 #endif
5682         "  events/\t\t- Directory containing all trace event subsystems:\n"
5683         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5684         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5685         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5686         "\t\t\t  events\n"
5687         "      filter\t\t- If set, only events passing filter are traced\n"
5688         "  events/<system>/<event>/\t- Directory containing control files for\n"
5689         "\t\t\t  <event>:\n"
5690         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5691         "      filter\t\t- If set, only events passing filter are traced\n"
5692         "      trigger\t\t- If set, a command to perform when event is hit\n"
5693         "\t    Format: <trigger>[:count][if <filter>]\n"
5694         "\t   trigger: traceon, traceoff\n"
5695         "\t            enable_event:<system>:<event>\n"
5696         "\t            disable_event:<system>:<event>\n"
5697 #ifdef CONFIG_HIST_TRIGGERS
5698         "\t            enable_hist:<system>:<event>\n"
5699         "\t            disable_hist:<system>:<event>\n"
5700 #endif
5701 #ifdef CONFIG_STACKTRACE
5702         "\t\t    stacktrace\n"
5703 #endif
5704 #ifdef CONFIG_TRACER_SNAPSHOT
5705         "\t\t    snapshot\n"
5706 #endif
5707 #ifdef CONFIG_HIST_TRIGGERS
5708         "\t\t    hist (see below)\n"
5709 #endif
5710         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5711         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5712         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5713         "\t                  events/block/block_unplug/trigger\n"
5714         "\t   The first disables tracing every time block_unplug is hit.\n"
5715         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5716         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5717         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5718         "\t   Like function triggers, the counter is only decremented if it\n"
5719         "\t    enabled or disabled tracing.\n"
5720         "\t   To remove a trigger without a count:\n"
5721         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5722         "\t   To remove a trigger with a count:\n"
5723         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5724         "\t   Filters can be ignored when removing a trigger.\n"
5725 #ifdef CONFIG_HIST_TRIGGERS
5726         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5727         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5728         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5729         "\t            [:values=<field1[,field2,...]>]\n"
5730         "\t            [:sort=<field1[,field2,...]>]\n"
5731         "\t            [:size=#entries]\n"
5732         "\t            [:pause][:continue][:clear]\n"
5733         "\t            [:name=histname1]\n"
5734         "\t            [:nohitcount]\n"
5735         "\t            [:<handler>.<action>]\n"
5736         "\t            [if <filter>]\n\n"
5737         "\t    Note, special fields can be used as well:\n"
5738         "\t            common_timestamp - to record current timestamp\n"
5739         "\t            common_cpu - to record the CPU the event happened on\n"
5740         "\n"
5741         "\t    A hist trigger variable can be:\n"
5742         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5743         "\t        - a reference to another variable e.g. y=$x,\n"
5744         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5745         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5746         "\n"
5747         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5748         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5749         "\t    variable reference, field or numeric literal.\n"
5750         "\n"
5751         "\t    When a matching event is hit, an entry is added to a hash\n"
5752         "\t    table using the key(s) and value(s) named, and the value of a\n"
5753         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5754         "\t    correspond to fields in the event's format description.  Keys\n"
5755         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5756         "\t    Compound keys consisting of up to two fields can be specified\n"
5757         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5758         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5759         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5760         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5761         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5762         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5763         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5764         "\t    its histogram data will be shared with other triggers of the\n"
5765         "\t    same name, and trigger hits will update this common data.\n\n"
5766         "\t    Reading the 'hist' file for the event will dump the hash\n"
5767         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5768         "\t    triggers attached to an event, there will be a table for each\n"
5769         "\t    trigger in the output.  The table displayed for a named\n"
5770         "\t    trigger will be the same as any other instance having the\n"
5771         "\t    same name.  The default format used to display a given field\n"
5772         "\t    can be modified by appending any of the following modifiers\n"
5773         "\t    to the field name, as applicable:\n\n"
5774         "\t            .hex        display a number as a hex value\n"
5775         "\t            .sym        display an address as a symbol\n"
5776         "\t            .sym-offset display an address as a symbol and offset\n"
5777         "\t            .execname   display a common_pid as a program name\n"
5778         "\t            .syscall    display a syscall id as a syscall name\n"
5779         "\t            .log2       display log2 value rather than raw number\n"
5780         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5781         "\t            .usecs      display a common_timestamp in microseconds\n"
5782         "\t            .percent    display a number of percentage value\n"
5783         "\t            .graph      display a bar-graph of a value\n\n"
5784         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5785         "\t    trigger or to start a hist trigger but not log any events\n"
5786         "\t    until told to do so.  'continue' can be used to start or\n"
5787         "\t    restart a paused hist trigger.\n\n"
5788         "\t    The 'clear' parameter will clear the contents of a running\n"
5789         "\t    hist trigger and leave its current paused/active state\n"
5790         "\t    unchanged.\n\n"
5791         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5792         "\t    raw hitcount in the histogram.\n\n"
5793         "\t    The enable_hist and disable_hist triggers can be used to\n"
5794         "\t    have one event conditionally start and stop another event's\n"
5795         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5796         "\t    the enable_event and disable_event triggers.\n\n"
5797         "\t    Hist trigger handlers and actions are executed whenever a\n"
5798         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5799         "\t        <handler>.<action>\n\n"
5800         "\t    The available handlers are:\n\n"
5801         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5802         "\t        onmax(var)               - invoke if var exceeds current max\n"
5803         "\t        onchange(var)            - invoke action if var changes\n\n"
5804         "\t    The available actions are:\n\n"
5805         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5806         "\t        save(field,...)                      - save current event fields\n"
5807 #ifdef CONFIG_TRACER_SNAPSHOT
5808         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5809 #endif
5810 #ifdef CONFIG_SYNTH_EVENTS
5811         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5812         "\t  Write into this file to define/undefine new synthetic events.\n"
5813         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5814 #endif
5815 #endif
5816 ;
5817
5818 static ssize_t
5819 tracing_readme_read(struct file *filp, char __user *ubuf,
5820                        size_t cnt, loff_t *ppos)
5821 {
5822         return simple_read_from_buffer(ubuf, cnt, ppos,
5823                                         readme_msg, strlen(readme_msg));
5824 }
5825
5826 static const struct file_operations tracing_readme_fops = {
5827         .open           = tracing_open_generic,
5828         .read           = tracing_readme_read,
5829         .llseek         = generic_file_llseek,
5830 };
5831
5832 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5833 {
5834         int pid = ++(*pos);
5835
5836         return trace_find_tgid_ptr(pid);
5837 }
5838
5839 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5840 {
5841         int pid = *pos;
5842
5843         return trace_find_tgid_ptr(pid);
5844 }
5845
5846 static void saved_tgids_stop(struct seq_file *m, void *v)
5847 {
5848 }
5849
5850 static int saved_tgids_show(struct seq_file *m, void *v)
5851 {
5852         int *entry = (int *)v;
5853         int pid = entry - tgid_map;
5854         int tgid = *entry;
5855
5856         if (tgid == 0)
5857                 return SEQ_SKIP;
5858
5859         seq_printf(m, "%d %d\n", pid, tgid);
5860         return 0;
5861 }
5862
5863 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5864         .start          = saved_tgids_start,
5865         .stop           = saved_tgids_stop,
5866         .next           = saved_tgids_next,
5867         .show           = saved_tgids_show,
5868 };
5869
5870 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5871 {
5872         int ret;
5873
5874         ret = tracing_check_open_get_tr(NULL);
5875         if (ret)
5876                 return ret;
5877
5878         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5879 }
5880
5881
5882 static const struct file_operations tracing_saved_tgids_fops = {
5883         .open           = tracing_saved_tgids_open,
5884         .read           = seq_read,
5885         .llseek         = seq_lseek,
5886         .release        = seq_release,
5887 };
5888
5889 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5890 {
5891         unsigned int *ptr = v;
5892
5893         if (*pos || m->count)
5894                 ptr++;
5895
5896         (*pos)++;
5897
5898         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5899              ptr++) {
5900                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5901                         continue;
5902
5903                 return ptr;
5904         }
5905
5906         return NULL;
5907 }
5908
5909 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5910 {
5911         void *v;
5912         loff_t l = 0;
5913
5914         preempt_disable();
5915         arch_spin_lock(&trace_cmdline_lock);
5916
5917         v = &savedcmd->map_cmdline_to_pid[0];
5918         while (l <= *pos) {
5919                 v = saved_cmdlines_next(m, v, &l);
5920                 if (!v)
5921                         return NULL;
5922         }
5923
5924         return v;
5925 }
5926
5927 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5928 {
5929         arch_spin_unlock(&trace_cmdline_lock);
5930         preempt_enable();
5931 }
5932
5933 static int saved_cmdlines_show(struct seq_file *m, void *v)
5934 {
5935         char buf[TASK_COMM_LEN];
5936         unsigned int *pid = v;
5937
5938         __trace_find_cmdline(*pid, buf);
5939         seq_printf(m, "%d %s\n", *pid, buf);
5940         return 0;
5941 }
5942
5943 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5944         .start          = saved_cmdlines_start,
5945         .next           = saved_cmdlines_next,
5946         .stop           = saved_cmdlines_stop,
5947         .show           = saved_cmdlines_show,
5948 };
5949
5950 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5951 {
5952         int ret;
5953
5954         ret = tracing_check_open_get_tr(NULL);
5955         if (ret)
5956                 return ret;
5957
5958         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5959 }
5960
5961 static const struct file_operations tracing_saved_cmdlines_fops = {
5962         .open           = tracing_saved_cmdlines_open,
5963         .read           = seq_read,
5964         .llseek         = seq_lseek,
5965         .release        = seq_release,
5966 };
5967
5968 static ssize_t
5969 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5970                                  size_t cnt, loff_t *ppos)
5971 {
5972         char buf[64];
5973         int r;
5974
5975         preempt_disable();
5976         arch_spin_lock(&trace_cmdline_lock);
5977         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5978         arch_spin_unlock(&trace_cmdline_lock);
5979         preempt_enable();
5980
5981         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5982 }
5983
5984 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5985 {
5986         kfree(s->saved_cmdlines);
5987         kfree(s->map_cmdline_to_pid);
5988         kfree(s);
5989 }
5990
5991 static int tracing_resize_saved_cmdlines(unsigned int val)
5992 {
5993         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5994
5995         s = kmalloc(sizeof(*s), GFP_KERNEL);
5996         if (!s)
5997                 return -ENOMEM;
5998
5999         if (allocate_cmdlines_buffer(val, s) < 0) {
6000                 kfree(s);
6001                 return -ENOMEM;
6002         }
6003
6004         preempt_disable();
6005         arch_spin_lock(&trace_cmdline_lock);
6006         savedcmd_temp = savedcmd;
6007         savedcmd = s;
6008         arch_spin_unlock(&trace_cmdline_lock);
6009         preempt_enable();
6010         free_saved_cmdlines_buffer(savedcmd_temp);
6011
6012         return 0;
6013 }
6014
6015 static ssize_t
6016 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6017                                   size_t cnt, loff_t *ppos)
6018 {
6019         unsigned long val;
6020         int ret;
6021
6022         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6023         if (ret)
6024                 return ret;
6025
6026         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6027         if (!val || val > PID_MAX_DEFAULT)
6028                 return -EINVAL;
6029
6030         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6031         if (ret < 0)
6032                 return ret;
6033
6034         *ppos += cnt;
6035
6036         return cnt;
6037 }
6038
6039 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6040         .open           = tracing_open_generic,
6041         .read           = tracing_saved_cmdlines_size_read,
6042         .write          = tracing_saved_cmdlines_size_write,
6043 };
6044
6045 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6046 static union trace_eval_map_item *
6047 update_eval_map(union trace_eval_map_item *ptr)
6048 {
6049         if (!ptr->map.eval_string) {
6050                 if (ptr->tail.next) {
6051                         ptr = ptr->tail.next;
6052                         /* Set ptr to the next real item (skip head) */
6053                         ptr++;
6054                 } else
6055                         return NULL;
6056         }
6057         return ptr;
6058 }
6059
6060 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6061 {
6062         union trace_eval_map_item *ptr = v;
6063
6064         /*
6065          * Paranoid! If ptr points to end, we don't want to increment past it.
6066          * This really should never happen.
6067          */
6068         (*pos)++;
6069         ptr = update_eval_map(ptr);
6070         if (WARN_ON_ONCE(!ptr))
6071                 return NULL;
6072
6073         ptr++;
6074         ptr = update_eval_map(ptr);
6075
6076         return ptr;
6077 }
6078
6079 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6080 {
6081         union trace_eval_map_item *v;
6082         loff_t l = 0;
6083
6084         mutex_lock(&trace_eval_mutex);
6085
6086         v = trace_eval_maps;
6087         if (v)
6088                 v++;
6089
6090         while (v && l < *pos) {
6091                 v = eval_map_next(m, v, &l);
6092         }
6093
6094         return v;
6095 }
6096
6097 static void eval_map_stop(struct seq_file *m, void *v)
6098 {
6099         mutex_unlock(&trace_eval_mutex);
6100 }
6101
6102 static int eval_map_show(struct seq_file *m, void *v)
6103 {
6104         union trace_eval_map_item *ptr = v;
6105
6106         seq_printf(m, "%s %ld (%s)\n",
6107                    ptr->map.eval_string, ptr->map.eval_value,
6108                    ptr->map.system);
6109
6110         return 0;
6111 }
6112
6113 static const struct seq_operations tracing_eval_map_seq_ops = {
6114         .start          = eval_map_start,
6115         .next           = eval_map_next,
6116         .stop           = eval_map_stop,
6117         .show           = eval_map_show,
6118 };
6119
6120 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6121 {
6122         int ret;
6123
6124         ret = tracing_check_open_get_tr(NULL);
6125         if (ret)
6126                 return ret;
6127
6128         return seq_open(filp, &tracing_eval_map_seq_ops);
6129 }
6130
6131 static const struct file_operations tracing_eval_map_fops = {
6132         .open           = tracing_eval_map_open,
6133         .read           = seq_read,
6134         .llseek         = seq_lseek,
6135         .release        = seq_release,
6136 };
6137
6138 static inline union trace_eval_map_item *
6139 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6140 {
6141         /* Return tail of array given the head */
6142         return ptr + ptr->head.length + 1;
6143 }
6144
6145 static void
6146 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6147                            int len)
6148 {
6149         struct trace_eval_map **stop;
6150         struct trace_eval_map **map;
6151         union trace_eval_map_item *map_array;
6152         union trace_eval_map_item *ptr;
6153
6154         stop = start + len;
6155
6156         /*
6157          * The trace_eval_maps contains the map plus a head and tail item,
6158          * where the head holds the module and length of array, and the
6159          * tail holds a pointer to the next list.
6160          */
6161         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6162         if (!map_array) {
6163                 pr_warn("Unable to allocate trace eval mapping\n");
6164                 return;
6165         }
6166
6167         mutex_lock(&trace_eval_mutex);
6168
6169         if (!trace_eval_maps)
6170                 trace_eval_maps = map_array;
6171         else {
6172                 ptr = trace_eval_maps;
6173                 for (;;) {
6174                         ptr = trace_eval_jmp_to_tail(ptr);
6175                         if (!ptr->tail.next)
6176                                 break;
6177                         ptr = ptr->tail.next;
6178
6179                 }
6180                 ptr->tail.next = map_array;
6181         }
6182         map_array->head.mod = mod;
6183         map_array->head.length = len;
6184         map_array++;
6185
6186         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6187                 map_array->map = **map;
6188                 map_array++;
6189         }
6190         memset(map_array, 0, sizeof(*map_array));
6191
6192         mutex_unlock(&trace_eval_mutex);
6193 }
6194
6195 static void trace_create_eval_file(struct dentry *d_tracer)
6196 {
6197         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6198                           NULL, &tracing_eval_map_fops);
6199 }
6200
6201 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6202 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6203 static inline void trace_insert_eval_map_file(struct module *mod,
6204                               struct trace_eval_map **start, int len) { }
6205 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6206
6207 static void trace_insert_eval_map(struct module *mod,
6208                                   struct trace_eval_map **start, int len)
6209 {
6210         struct trace_eval_map **map;
6211
6212         if (len <= 0)
6213                 return;
6214
6215         map = start;
6216
6217         trace_event_eval_update(map, len);
6218
6219         trace_insert_eval_map_file(mod, start, len);
6220 }
6221
6222 static ssize_t
6223 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6224                        size_t cnt, loff_t *ppos)
6225 {
6226         struct trace_array *tr = filp->private_data;
6227         char buf[MAX_TRACER_SIZE+2];
6228         int r;
6229
6230         mutex_lock(&trace_types_lock);
6231         r = sprintf(buf, "%s\n", tr->current_trace->name);
6232         mutex_unlock(&trace_types_lock);
6233
6234         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6235 }
6236
6237 int tracer_init(struct tracer *t, struct trace_array *tr)
6238 {
6239         tracing_reset_online_cpus(&tr->array_buffer);
6240         return t->init(tr);
6241 }
6242
6243 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6244 {
6245         int cpu;
6246
6247         for_each_tracing_cpu(cpu)
6248                 per_cpu_ptr(buf->data, cpu)->entries = val;
6249 }
6250
6251 #ifdef CONFIG_TRACER_MAX_TRACE
6252 /* resize @tr's buffer to the size of @size_tr's entries */
6253 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6254                                         struct array_buffer *size_buf, int cpu_id)
6255 {
6256         int cpu, ret = 0;
6257
6258         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6259                 for_each_tracing_cpu(cpu) {
6260                         ret = ring_buffer_resize(trace_buf->buffer,
6261                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6262                         if (ret < 0)
6263                                 break;
6264                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6265                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6266                 }
6267         } else {
6268                 ret = ring_buffer_resize(trace_buf->buffer,
6269                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6270                 if (ret == 0)
6271                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6272                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6273         }
6274
6275         return ret;
6276 }
6277 #endif /* CONFIG_TRACER_MAX_TRACE */
6278
6279 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6280                                         unsigned long size, int cpu)
6281 {
6282         int ret;
6283
6284         /*
6285          * If kernel or user changes the size of the ring buffer
6286          * we use the size that was given, and we can forget about
6287          * expanding it later.
6288          */
6289         ring_buffer_expanded = true;
6290
6291         /* May be called before buffers are initialized */
6292         if (!tr->array_buffer.buffer)
6293                 return 0;
6294
6295         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6296         if (ret < 0)
6297                 return ret;
6298
6299 #ifdef CONFIG_TRACER_MAX_TRACE
6300         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6301             !tr->current_trace->use_max_tr)
6302                 goto out;
6303
6304         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6305         if (ret < 0) {
6306                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6307                                                      &tr->array_buffer, cpu);
6308                 if (r < 0) {
6309                         /*
6310                          * AARGH! We are left with different
6311                          * size max buffer!!!!
6312                          * The max buffer is our "snapshot" buffer.
6313                          * When a tracer needs a snapshot (one of the
6314                          * latency tracers), it swaps the max buffer
6315                          * with the saved snap shot. We succeeded to
6316                          * update the size of the main buffer, but failed to
6317                          * update the size of the max buffer. But when we tried
6318                          * to reset the main buffer to the original size, we
6319                          * failed there too. This is very unlikely to
6320                          * happen, but if it does, warn and kill all
6321                          * tracing.
6322                          */
6323                         WARN_ON(1);
6324                         tracing_disabled = 1;
6325                 }
6326                 return ret;
6327         }
6328
6329         if (cpu == RING_BUFFER_ALL_CPUS)
6330                 set_buffer_entries(&tr->max_buffer, size);
6331         else
6332                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6333
6334  out:
6335 #endif /* CONFIG_TRACER_MAX_TRACE */
6336
6337         if (cpu == RING_BUFFER_ALL_CPUS)
6338                 set_buffer_entries(&tr->array_buffer, size);
6339         else
6340                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6341
6342         return ret;
6343 }
6344
6345 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6346                                   unsigned long size, int cpu_id)
6347 {
6348         int ret;
6349
6350         mutex_lock(&trace_types_lock);
6351
6352         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6353                 /* make sure, this cpu is enabled in the mask */
6354                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6355                         ret = -EINVAL;
6356                         goto out;
6357                 }
6358         }
6359
6360         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6361         if (ret < 0)
6362                 ret = -ENOMEM;
6363
6364 out:
6365         mutex_unlock(&trace_types_lock);
6366
6367         return ret;
6368 }
6369
6370
6371 /**
6372  * tracing_update_buffers - used by tracing facility to expand ring buffers
6373  *
6374  * To save on memory when the tracing is never used on a system with it
6375  * configured in. The ring buffers are set to a minimum size. But once
6376  * a user starts to use the tracing facility, then they need to grow
6377  * to their default size.
6378  *
6379  * This function is to be called when a tracer is about to be used.
6380  */
6381 int tracing_update_buffers(void)
6382 {
6383         int ret = 0;
6384
6385         mutex_lock(&trace_types_lock);
6386         if (!ring_buffer_expanded)
6387                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6388                                                 RING_BUFFER_ALL_CPUS);
6389         mutex_unlock(&trace_types_lock);
6390
6391         return ret;
6392 }
6393
6394 struct trace_option_dentry;
6395
6396 static void
6397 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6398
6399 /*
6400  * Used to clear out the tracer before deletion of an instance.
6401  * Must have trace_types_lock held.
6402  */
6403 static void tracing_set_nop(struct trace_array *tr)
6404 {
6405         if (tr->current_trace == &nop_trace)
6406                 return;
6407         
6408         tr->current_trace->enabled--;
6409
6410         if (tr->current_trace->reset)
6411                 tr->current_trace->reset(tr);
6412
6413         tr->current_trace = &nop_trace;
6414 }
6415
6416 static bool tracer_options_updated;
6417
6418 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6419 {
6420         /* Only enable if the directory has been created already. */
6421         if (!tr->dir)
6422                 return;
6423
6424         /* Only create trace option files after update_tracer_options finish */
6425         if (!tracer_options_updated)
6426                 return;
6427
6428         create_trace_option_files(tr, t);
6429 }
6430
6431 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6432 {
6433         struct tracer *t;
6434 #ifdef CONFIG_TRACER_MAX_TRACE
6435         bool had_max_tr;
6436 #endif
6437         int ret = 0;
6438
6439         mutex_lock(&trace_types_lock);
6440
6441         if (!ring_buffer_expanded) {
6442                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6443                                                 RING_BUFFER_ALL_CPUS);
6444                 if (ret < 0)
6445                         goto out;
6446                 ret = 0;
6447         }
6448
6449         for (t = trace_types; t; t = t->next) {
6450                 if (strcmp(t->name, buf) == 0)
6451                         break;
6452         }
6453         if (!t) {
6454                 ret = -EINVAL;
6455                 goto out;
6456         }
6457         if (t == tr->current_trace)
6458                 goto out;
6459
6460 #ifdef CONFIG_TRACER_SNAPSHOT
6461         if (t->use_max_tr) {
6462                 local_irq_disable();
6463                 arch_spin_lock(&tr->max_lock);
6464                 if (tr->cond_snapshot)
6465                         ret = -EBUSY;
6466                 arch_spin_unlock(&tr->max_lock);
6467                 local_irq_enable();
6468                 if (ret)
6469                         goto out;
6470         }
6471 #endif
6472         /* Some tracers won't work on kernel command line */
6473         if (system_state < SYSTEM_RUNNING && t->noboot) {
6474                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6475                         t->name);
6476                 goto out;
6477         }
6478
6479         /* Some tracers are only allowed for the top level buffer */
6480         if (!trace_ok_for_array(t, tr)) {
6481                 ret = -EINVAL;
6482                 goto out;
6483         }
6484
6485         /* If trace pipe files are being read, we can't change the tracer */
6486         if (tr->trace_ref) {
6487                 ret = -EBUSY;
6488                 goto out;
6489         }
6490
6491         trace_branch_disable();
6492
6493         tr->current_trace->enabled--;
6494
6495         if (tr->current_trace->reset)
6496                 tr->current_trace->reset(tr);
6497
6498 #ifdef CONFIG_TRACER_MAX_TRACE
6499         had_max_tr = tr->current_trace->use_max_tr;
6500
6501         /* Current trace needs to be nop_trace before synchronize_rcu */
6502         tr->current_trace = &nop_trace;
6503
6504         if (had_max_tr && !t->use_max_tr) {
6505                 /*
6506                  * We need to make sure that the update_max_tr sees that
6507                  * current_trace changed to nop_trace to keep it from
6508                  * swapping the buffers after we resize it.
6509                  * The update_max_tr is called from interrupts disabled
6510                  * so a synchronized_sched() is sufficient.
6511                  */
6512                 synchronize_rcu();
6513                 free_snapshot(tr);
6514         }
6515
6516         if (t->use_max_tr && !tr->allocated_snapshot) {
6517                 ret = tracing_alloc_snapshot_instance(tr);
6518                 if (ret < 0)
6519                         goto out;
6520         }
6521 #else
6522         tr->current_trace = &nop_trace;
6523 #endif
6524
6525         if (t->init) {
6526                 ret = tracer_init(t, tr);
6527                 if (ret)
6528                         goto out;
6529         }
6530
6531         tr->current_trace = t;
6532         tr->current_trace->enabled++;
6533         trace_branch_enable(tr);
6534  out:
6535         mutex_unlock(&trace_types_lock);
6536
6537         return ret;
6538 }
6539
6540 static ssize_t
6541 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6542                         size_t cnt, loff_t *ppos)
6543 {
6544         struct trace_array *tr = filp->private_data;
6545         char buf[MAX_TRACER_SIZE+1];
6546         char *name;
6547         size_t ret;
6548         int err;
6549
6550         ret = cnt;
6551
6552         if (cnt > MAX_TRACER_SIZE)
6553                 cnt = MAX_TRACER_SIZE;
6554
6555         if (copy_from_user(buf, ubuf, cnt))
6556                 return -EFAULT;
6557
6558         buf[cnt] = 0;
6559
6560         name = strim(buf);
6561
6562         err = tracing_set_tracer(tr, name);
6563         if (err)
6564                 return err;
6565
6566         *ppos += ret;
6567
6568         return ret;
6569 }
6570
6571 static ssize_t
6572 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6573                    size_t cnt, loff_t *ppos)
6574 {
6575         char buf[64];
6576         int r;
6577
6578         r = snprintf(buf, sizeof(buf), "%ld\n",
6579                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6580         if (r > sizeof(buf))
6581                 r = sizeof(buf);
6582         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6583 }
6584
6585 static ssize_t
6586 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6587                     size_t cnt, loff_t *ppos)
6588 {
6589         unsigned long val;
6590         int ret;
6591
6592         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6593         if (ret)
6594                 return ret;
6595
6596         *ptr = val * 1000;
6597
6598         return cnt;
6599 }
6600
6601 static ssize_t
6602 tracing_thresh_read(struct file *filp, char __user *ubuf,
6603                     size_t cnt, loff_t *ppos)
6604 {
6605         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6606 }
6607
6608 static ssize_t
6609 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6610                      size_t cnt, loff_t *ppos)
6611 {
6612         struct trace_array *tr = filp->private_data;
6613         int ret;
6614
6615         mutex_lock(&trace_types_lock);
6616         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6617         if (ret < 0)
6618                 goto out;
6619
6620         if (tr->current_trace->update_thresh) {
6621                 ret = tr->current_trace->update_thresh(tr);
6622                 if (ret < 0)
6623                         goto out;
6624         }
6625
6626         ret = cnt;
6627 out:
6628         mutex_unlock(&trace_types_lock);
6629
6630         return ret;
6631 }
6632
6633 #ifdef CONFIG_TRACER_MAX_TRACE
6634
6635 static ssize_t
6636 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6637                      size_t cnt, loff_t *ppos)
6638 {
6639         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6640 }
6641
6642 static ssize_t
6643 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6644                       size_t cnt, loff_t *ppos)
6645 {
6646         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6647 }
6648
6649 #endif
6650
6651 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6652 {
6653         struct trace_array *tr = inode->i_private;
6654         struct trace_iterator *iter;
6655         int ret;
6656
6657         ret = tracing_check_open_get_tr(tr);
6658         if (ret)
6659                 return ret;
6660
6661         mutex_lock(&trace_types_lock);
6662
6663         /* create a buffer to store the information to pass to userspace */
6664         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6665         if (!iter) {
6666                 ret = -ENOMEM;
6667                 __trace_array_put(tr);
6668                 goto out;
6669         }
6670
6671         trace_seq_init(&iter->seq);
6672         iter->trace = tr->current_trace;
6673
6674         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6675                 ret = -ENOMEM;
6676                 goto fail;
6677         }
6678
6679         /* trace pipe does not show start of buffer */
6680         cpumask_setall(iter->started);
6681
6682         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6683                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6684
6685         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6686         if (trace_clocks[tr->clock_id].in_ns)
6687                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6688
6689         iter->tr = tr;
6690         iter->array_buffer = &tr->array_buffer;
6691         iter->cpu_file = tracing_get_cpu(inode);
6692         mutex_init(&iter->mutex);
6693         filp->private_data = iter;
6694
6695         if (iter->trace->pipe_open)
6696                 iter->trace->pipe_open(iter);
6697
6698         nonseekable_open(inode, filp);
6699
6700         tr->trace_ref++;
6701 out:
6702         mutex_unlock(&trace_types_lock);
6703         return ret;
6704
6705 fail:
6706         kfree(iter);
6707         __trace_array_put(tr);
6708         mutex_unlock(&trace_types_lock);
6709         return ret;
6710 }
6711
6712 static int tracing_release_pipe(struct inode *inode, struct file *file)
6713 {
6714         struct trace_iterator *iter = file->private_data;
6715         struct trace_array *tr = inode->i_private;
6716
6717         mutex_lock(&trace_types_lock);
6718
6719         tr->trace_ref--;
6720
6721         if (iter->trace->pipe_close)
6722                 iter->trace->pipe_close(iter);
6723
6724         mutex_unlock(&trace_types_lock);
6725
6726         free_cpumask_var(iter->started);
6727         kfree(iter->fmt);
6728         mutex_destroy(&iter->mutex);
6729         kfree(iter);
6730
6731         trace_array_put(tr);
6732
6733         return 0;
6734 }
6735
6736 static __poll_t
6737 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6738 {
6739         struct trace_array *tr = iter->tr;
6740
6741         /* Iterators are static, they should be filled or empty */
6742         if (trace_buffer_iter(iter, iter->cpu_file))
6743                 return EPOLLIN | EPOLLRDNORM;
6744
6745         if (tr->trace_flags & TRACE_ITER_BLOCK)
6746                 /*
6747                  * Always select as readable when in blocking mode
6748                  */
6749                 return EPOLLIN | EPOLLRDNORM;
6750         else
6751                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6752                                              filp, poll_table, iter->tr->buffer_percent);
6753 }
6754
6755 static __poll_t
6756 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6757 {
6758         struct trace_iterator *iter = filp->private_data;
6759
6760         return trace_poll(iter, filp, poll_table);
6761 }
6762
6763 /* Must be called with iter->mutex held. */
6764 static int tracing_wait_pipe(struct file *filp)
6765 {
6766         struct trace_iterator *iter = filp->private_data;
6767         int ret;
6768
6769         while (trace_empty(iter)) {
6770
6771                 if ((filp->f_flags & O_NONBLOCK)) {
6772                         return -EAGAIN;
6773                 }
6774
6775                 /*
6776                  * We block until we read something and tracing is disabled.
6777                  * We still block if tracing is disabled, but we have never
6778                  * read anything. This allows a user to cat this file, and
6779                  * then enable tracing. But after we have read something,
6780                  * we give an EOF when tracing is again disabled.
6781                  *
6782                  * iter->pos will be 0 if we haven't read anything.
6783                  */
6784                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6785                         break;
6786
6787                 mutex_unlock(&iter->mutex);
6788
6789                 ret = wait_on_pipe(iter, 0);
6790
6791                 mutex_lock(&iter->mutex);
6792
6793                 if (ret)
6794                         return ret;
6795         }
6796
6797         return 1;
6798 }
6799
6800 /*
6801  * Consumer reader.
6802  */
6803 static ssize_t
6804 tracing_read_pipe(struct file *filp, char __user *ubuf,
6805                   size_t cnt, loff_t *ppos)
6806 {
6807         struct trace_iterator *iter = filp->private_data;
6808         ssize_t sret;
6809
6810         /*
6811          * Avoid more than one consumer on a single file descriptor
6812          * This is just a matter of traces coherency, the ring buffer itself
6813          * is protected.
6814          */
6815         mutex_lock(&iter->mutex);
6816
6817         /* return any leftover data */
6818         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6819         if (sret != -EBUSY)
6820                 goto out;
6821
6822         trace_seq_init(&iter->seq);
6823
6824         if (iter->trace->read) {
6825                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6826                 if (sret)
6827                         goto out;
6828         }
6829
6830 waitagain:
6831         sret = tracing_wait_pipe(filp);
6832         if (sret <= 0)
6833                 goto out;
6834
6835         /* stop when tracing is finished */
6836         if (trace_empty(iter)) {
6837                 sret = 0;
6838                 goto out;
6839         }
6840
6841         if (cnt >= PAGE_SIZE)
6842                 cnt = PAGE_SIZE - 1;
6843
6844         /* reset all but tr, trace, and overruns */
6845         trace_iterator_reset(iter);
6846         cpumask_clear(iter->started);
6847         trace_seq_init(&iter->seq);
6848
6849         trace_event_read_lock();
6850         trace_access_lock(iter->cpu_file);
6851         while (trace_find_next_entry_inc(iter) != NULL) {
6852                 enum print_line_t ret;
6853                 int save_len = iter->seq.seq.len;
6854
6855                 ret = print_trace_line(iter);
6856                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6857                         /*
6858                          * If one print_trace_line() fills entire trace_seq in one shot,
6859                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6860                          * In this case, we need to consume it, otherwise, loop will peek
6861                          * this event next time, resulting in an infinite loop.
6862                          */
6863                         if (save_len == 0) {
6864                                 iter->seq.full = 0;
6865                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6866                                 trace_consume(iter);
6867                                 break;
6868                         }
6869
6870                         /* In other cases, don't print partial lines */
6871                         iter->seq.seq.len = save_len;
6872                         break;
6873                 }
6874                 if (ret != TRACE_TYPE_NO_CONSUME)
6875                         trace_consume(iter);
6876
6877                 if (trace_seq_used(&iter->seq) >= cnt)
6878                         break;
6879
6880                 /*
6881                  * Setting the full flag means we reached the trace_seq buffer
6882                  * size and we should leave by partial output condition above.
6883                  * One of the trace_seq_* functions is not used properly.
6884                  */
6885                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6886                           iter->ent->type);
6887         }
6888         trace_access_unlock(iter->cpu_file);
6889         trace_event_read_unlock();
6890
6891         /* Now copy what we have to the user */
6892         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6893         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6894                 trace_seq_init(&iter->seq);
6895
6896         /*
6897          * If there was nothing to send to user, in spite of consuming trace
6898          * entries, go back to wait for more entries.
6899          */
6900         if (sret == -EBUSY)
6901                 goto waitagain;
6902
6903 out:
6904         mutex_unlock(&iter->mutex);
6905
6906         return sret;
6907 }
6908
6909 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6910                                      unsigned int idx)
6911 {
6912         __free_page(spd->pages[idx]);
6913 }
6914
6915 static size_t
6916 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6917 {
6918         size_t count;
6919         int save_len;
6920         int ret;
6921
6922         /* Seq buffer is page-sized, exactly what we need. */
6923         for (;;) {
6924                 save_len = iter->seq.seq.len;
6925                 ret = print_trace_line(iter);
6926
6927                 if (trace_seq_has_overflowed(&iter->seq)) {
6928                         iter->seq.seq.len = save_len;
6929                         break;
6930                 }
6931
6932                 /*
6933                  * This should not be hit, because it should only
6934                  * be set if the iter->seq overflowed. But check it
6935                  * anyway to be safe.
6936                  */
6937                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6938                         iter->seq.seq.len = save_len;
6939                         break;
6940                 }
6941
6942                 count = trace_seq_used(&iter->seq) - save_len;
6943                 if (rem < count) {
6944                         rem = 0;
6945                         iter->seq.seq.len = save_len;
6946                         break;
6947                 }
6948
6949                 if (ret != TRACE_TYPE_NO_CONSUME)
6950                         trace_consume(iter);
6951                 rem -= count;
6952                 if (!trace_find_next_entry_inc(iter))   {
6953                         rem = 0;
6954                         iter->ent = NULL;
6955                         break;
6956                 }
6957         }
6958
6959         return rem;
6960 }
6961
6962 static ssize_t tracing_splice_read_pipe(struct file *filp,
6963                                         loff_t *ppos,
6964                                         struct pipe_inode_info *pipe,
6965                                         size_t len,
6966                                         unsigned int flags)
6967 {
6968         struct page *pages_def[PIPE_DEF_BUFFERS];
6969         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6970         struct trace_iterator *iter = filp->private_data;
6971         struct splice_pipe_desc spd = {
6972                 .pages          = pages_def,
6973                 .partial        = partial_def,
6974                 .nr_pages       = 0, /* This gets updated below. */
6975                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6976                 .ops            = &default_pipe_buf_ops,
6977                 .spd_release    = tracing_spd_release_pipe,
6978         };
6979         ssize_t ret;
6980         size_t rem;
6981         unsigned int i;
6982
6983         if (splice_grow_spd(pipe, &spd))
6984                 return -ENOMEM;
6985
6986         mutex_lock(&iter->mutex);
6987
6988         if (iter->trace->splice_read) {
6989                 ret = iter->trace->splice_read(iter, filp,
6990                                                ppos, pipe, len, flags);
6991                 if (ret)
6992                         goto out_err;
6993         }
6994
6995         ret = tracing_wait_pipe(filp);
6996         if (ret <= 0)
6997                 goto out_err;
6998
6999         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7000                 ret = -EFAULT;
7001                 goto out_err;
7002         }
7003
7004         trace_event_read_lock();
7005         trace_access_lock(iter->cpu_file);
7006
7007         /* Fill as many pages as possible. */
7008         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7009                 spd.pages[i] = alloc_page(GFP_KERNEL);
7010                 if (!spd.pages[i])
7011                         break;
7012
7013                 rem = tracing_fill_pipe_page(rem, iter);
7014
7015                 /* Copy the data into the page, so we can start over. */
7016                 ret = trace_seq_to_buffer(&iter->seq,
7017                                           page_address(spd.pages[i]),
7018                                           trace_seq_used(&iter->seq));
7019                 if (ret < 0) {
7020                         __free_page(spd.pages[i]);
7021                         break;
7022                 }
7023                 spd.partial[i].offset = 0;
7024                 spd.partial[i].len = trace_seq_used(&iter->seq);
7025
7026                 trace_seq_init(&iter->seq);
7027         }
7028
7029         trace_access_unlock(iter->cpu_file);
7030         trace_event_read_unlock();
7031         mutex_unlock(&iter->mutex);
7032
7033         spd.nr_pages = i;
7034
7035         if (i)
7036                 ret = splice_to_pipe(pipe, &spd);
7037         else
7038                 ret = 0;
7039 out:
7040         splice_shrink_spd(&spd);
7041         return ret;
7042
7043 out_err:
7044         mutex_unlock(&iter->mutex);
7045         goto out;
7046 }
7047
7048 static ssize_t
7049 tracing_entries_read(struct file *filp, char __user *ubuf,
7050                      size_t cnt, loff_t *ppos)
7051 {
7052         struct inode *inode = file_inode(filp);
7053         struct trace_array *tr = inode->i_private;
7054         int cpu = tracing_get_cpu(inode);
7055         char buf[64];
7056         int r = 0;
7057         ssize_t ret;
7058
7059         mutex_lock(&trace_types_lock);
7060
7061         if (cpu == RING_BUFFER_ALL_CPUS) {
7062                 int cpu, buf_size_same;
7063                 unsigned long size;
7064
7065                 size = 0;
7066                 buf_size_same = 1;
7067                 /* check if all cpu sizes are same */
7068                 for_each_tracing_cpu(cpu) {
7069                         /* fill in the size from first enabled cpu */
7070                         if (size == 0)
7071                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7072                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7073                                 buf_size_same = 0;
7074                                 break;
7075                         }
7076                 }
7077
7078                 if (buf_size_same) {
7079                         if (!ring_buffer_expanded)
7080                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7081                                             size >> 10,
7082                                             trace_buf_size >> 10);
7083                         else
7084                                 r = sprintf(buf, "%lu\n", size >> 10);
7085                 } else
7086                         r = sprintf(buf, "X\n");
7087         } else
7088                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7089
7090         mutex_unlock(&trace_types_lock);
7091
7092         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7093         return ret;
7094 }
7095
7096 static ssize_t
7097 tracing_entries_write(struct file *filp, const char __user *ubuf,
7098                       size_t cnt, loff_t *ppos)
7099 {
7100         struct inode *inode = file_inode(filp);
7101         struct trace_array *tr = inode->i_private;
7102         unsigned long val;
7103         int ret;
7104
7105         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7106         if (ret)
7107                 return ret;
7108
7109         /* must have at least 1 entry */
7110         if (!val)
7111                 return -EINVAL;
7112
7113         /* value is in KB */
7114         val <<= 10;
7115         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7116         if (ret < 0)
7117                 return ret;
7118
7119         *ppos += cnt;
7120
7121         return cnt;
7122 }
7123
7124 static ssize_t
7125 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7126                                 size_t cnt, loff_t *ppos)
7127 {
7128         struct trace_array *tr = filp->private_data;
7129         char buf[64];
7130         int r, cpu;
7131         unsigned long size = 0, expanded_size = 0;
7132
7133         mutex_lock(&trace_types_lock);
7134         for_each_tracing_cpu(cpu) {
7135                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7136                 if (!ring_buffer_expanded)
7137                         expanded_size += trace_buf_size >> 10;
7138         }
7139         if (ring_buffer_expanded)
7140                 r = sprintf(buf, "%lu\n", size);
7141         else
7142                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7143         mutex_unlock(&trace_types_lock);
7144
7145         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7146 }
7147
7148 static ssize_t
7149 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7150                           size_t cnt, loff_t *ppos)
7151 {
7152         /*
7153          * There is no need to read what the user has written, this function
7154          * is just to make sure that there is no error when "echo" is used
7155          */
7156
7157         *ppos += cnt;
7158
7159         return cnt;
7160 }
7161
7162 static int
7163 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7164 {
7165         struct trace_array *tr = inode->i_private;
7166
7167         /* disable tracing ? */
7168         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7169                 tracer_tracing_off(tr);
7170         /* resize the ring buffer to 0 */
7171         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7172
7173         trace_array_put(tr);
7174
7175         return 0;
7176 }
7177
7178 static ssize_t
7179 tracing_mark_write(struct file *filp, const char __user *ubuf,
7180                                         size_t cnt, loff_t *fpos)
7181 {
7182         struct trace_array *tr = filp->private_data;
7183         struct ring_buffer_event *event;
7184         enum event_trigger_type tt = ETT_NONE;
7185         struct trace_buffer *buffer;
7186         struct print_entry *entry;
7187         ssize_t written;
7188         int size;
7189         int len;
7190
7191 /* Used in tracing_mark_raw_write() as well */
7192 #define FAULTED_STR "<faulted>"
7193 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7194
7195         if (tracing_disabled)
7196                 return -EINVAL;
7197
7198         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7199                 return -EINVAL;
7200
7201         if (cnt > TRACE_BUF_SIZE)
7202                 cnt = TRACE_BUF_SIZE;
7203
7204         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7205
7206         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7207
7208         /* If less than "<faulted>", then make sure we can still add that */
7209         if (cnt < FAULTED_SIZE)
7210                 size += FAULTED_SIZE - cnt;
7211
7212         buffer = tr->array_buffer.buffer;
7213         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7214                                             tracing_gen_ctx());
7215         if (unlikely(!event))
7216                 /* Ring buffer disabled, return as if not open for write */
7217                 return -EBADF;
7218
7219         entry = ring_buffer_event_data(event);
7220         entry->ip = _THIS_IP_;
7221
7222         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7223         if (len) {
7224                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7225                 cnt = FAULTED_SIZE;
7226                 written = -EFAULT;
7227         } else
7228                 written = cnt;
7229
7230         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7231                 /* do not add \n before testing triggers, but add \0 */
7232                 entry->buf[cnt] = '\0';
7233                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7234         }
7235
7236         if (entry->buf[cnt - 1] != '\n') {
7237                 entry->buf[cnt] = '\n';
7238                 entry->buf[cnt + 1] = '\0';
7239         } else
7240                 entry->buf[cnt] = '\0';
7241
7242         if (static_branch_unlikely(&trace_marker_exports_enabled))
7243                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7244         __buffer_unlock_commit(buffer, event);
7245
7246         if (tt)
7247                 event_triggers_post_call(tr->trace_marker_file, tt);
7248
7249         return written;
7250 }
7251
7252 /* Limit it for now to 3K (including tag) */
7253 #define RAW_DATA_MAX_SIZE (1024*3)
7254
7255 static ssize_t
7256 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7257                                         size_t cnt, loff_t *fpos)
7258 {
7259         struct trace_array *tr = filp->private_data;
7260         struct ring_buffer_event *event;
7261         struct trace_buffer *buffer;
7262         struct raw_data_entry *entry;
7263         ssize_t written;
7264         int size;
7265         int len;
7266
7267 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7268
7269         if (tracing_disabled)
7270                 return -EINVAL;
7271
7272         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7273                 return -EINVAL;
7274
7275         /* The marker must at least have a tag id */
7276         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7277                 return -EINVAL;
7278
7279         if (cnt > TRACE_BUF_SIZE)
7280                 cnt = TRACE_BUF_SIZE;
7281
7282         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7283
7284         size = sizeof(*entry) + cnt;
7285         if (cnt < FAULT_SIZE_ID)
7286                 size += FAULT_SIZE_ID - cnt;
7287
7288         buffer = tr->array_buffer.buffer;
7289         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7290                                             tracing_gen_ctx());
7291         if (!event)
7292                 /* Ring buffer disabled, return as if not open for write */
7293                 return -EBADF;
7294
7295         entry = ring_buffer_event_data(event);
7296
7297         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7298         if (len) {
7299                 entry->id = -1;
7300                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7301                 written = -EFAULT;
7302         } else
7303                 written = cnt;
7304
7305         __buffer_unlock_commit(buffer, event);
7306
7307         return written;
7308 }
7309
7310 static int tracing_clock_show(struct seq_file *m, void *v)
7311 {
7312         struct trace_array *tr = m->private;
7313         int i;
7314
7315         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7316                 seq_printf(m,
7317                         "%s%s%s%s", i ? " " : "",
7318                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7319                         i == tr->clock_id ? "]" : "");
7320         seq_putc(m, '\n');
7321
7322         return 0;
7323 }
7324
7325 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7326 {
7327         int i;
7328
7329         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7330                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7331                         break;
7332         }
7333         if (i == ARRAY_SIZE(trace_clocks))
7334                 return -EINVAL;
7335
7336         mutex_lock(&trace_types_lock);
7337
7338         tr->clock_id = i;
7339
7340         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7341
7342         /*
7343          * New clock may not be consistent with the previous clock.
7344          * Reset the buffer so that it doesn't have incomparable timestamps.
7345          */
7346         tracing_reset_online_cpus(&tr->array_buffer);
7347
7348 #ifdef CONFIG_TRACER_MAX_TRACE
7349         if (tr->max_buffer.buffer)
7350                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7351         tracing_reset_online_cpus(&tr->max_buffer);
7352 #endif
7353
7354         mutex_unlock(&trace_types_lock);
7355
7356         return 0;
7357 }
7358
7359 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7360                                    size_t cnt, loff_t *fpos)
7361 {
7362         struct seq_file *m = filp->private_data;
7363         struct trace_array *tr = m->private;
7364         char buf[64];
7365         const char *clockstr;
7366         int ret;
7367
7368         if (cnt >= sizeof(buf))
7369                 return -EINVAL;
7370
7371         if (copy_from_user(buf, ubuf, cnt))
7372                 return -EFAULT;
7373
7374         buf[cnt] = 0;
7375
7376         clockstr = strstrip(buf);
7377
7378         ret = tracing_set_clock(tr, clockstr);
7379         if (ret)
7380                 return ret;
7381
7382         *fpos += cnt;
7383
7384         return cnt;
7385 }
7386
7387 static int tracing_clock_open(struct inode *inode, struct file *file)
7388 {
7389         struct trace_array *tr = inode->i_private;
7390         int ret;
7391
7392         ret = tracing_check_open_get_tr(tr);
7393         if (ret)
7394                 return ret;
7395
7396         ret = single_open(file, tracing_clock_show, inode->i_private);
7397         if (ret < 0)
7398                 trace_array_put(tr);
7399
7400         return ret;
7401 }
7402
7403 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7404 {
7405         struct trace_array *tr = m->private;
7406
7407         mutex_lock(&trace_types_lock);
7408
7409         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7410                 seq_puts(m, "delta [absolute]\n");
7411         else
7412                 seq_puts(m, "[delta] absolute\n");
7413
7414         mutex_unlock(&trace_types_lock);
7415
7416         return 0;
7417 }
7418
7419 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7420 {
7421         struct trace_array *tr = inode->i_private;
7422         int ret;
7423
7424         ret = tracing_check_open_get_tr(tr);
7425         if (ret)
7426                 return ret;
7427
7428         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7429         if (ret < 0)
7430                 trace_array_put(tr);
7431
7432         return ret;
7433 }
7434
7435 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7436 {
7437         if (rbe == this_cpu_read(trace_buffered_event))
7438                 return ring_buffer_time_stamp(buffer);
7439
7440         return ring_buffer_event_time_stamp(buffer, rbe);
7441 }
7442
7443 /*
7444  * Set or disable using the per CPU trace_buffer_event when possible.
7445  */
7446 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7447 {
7448         int ret = 0;
7449
7450         mutex_lock(&trace_types_lock);
7451
7452         if (set && tr->no_filter_buffering_ref++)
7453                 goto out;
7454
7455         if (!set) {
7456                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7457                         ret = -EINVAL;
7458                         goto out;
7459                 }
7460
7461                 --tr->no_filter_buffering_ref;
7462         }
7463  out:
7464         mutex_unlock(&trace_types_lock);
7465
7466         return ret;
7467 }
7468
7469 struct ftrace_buffer_info {
7470         struct trace_iterator   iter;
7471         void                    *spare;
7472         unsigned int            spare_cpu;
7473         unsigned int            read;
7474 };
7475
7476 #ifdef CONFIG_TRACER_SNAPSHOT
7477 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7478 {
7479         struct trace_array *tr = inode->i_private;
7480         struct trace_iterator *iter;
7481         struct seq_file *m;
7482         int ret;
7483
7484         ret = tracing_check_open_get_tr(tr);
7485         if (ret)
7486                 return ret;
7487
7488         if (file->f_mode & FMODE_READ) {
7489                 iter = __tracing_open(inode, file, true);
7490                 if (IS_ERR(iter))
7491                         ret = PTR_ERR(iter);
7492         } else {
7493                 /* Writes still need the seq_file to hold the private data */
7494                 ret = -ENOMEM;
7495                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7496                 if (!m)
7497                         goto out;
7498                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7499                 if (!iter) {
7500                         kfree(m);
7501                         goto out;
7502                 }
7503                 ret = 0;
7504
7505                 iter->tr = tr;
7506                 iter->array_buffer = &tr->max_buffer;
7507                 iter->cpu_file = tracing_get_cpu(inode);
7508                 m->private = iter;
7509                 file->private_data = m;
7510         }
7511 out:
7512         if (ret < 0)
7513                 trace_array_put(tr);
7514
7515         return ret;
7516 }
7517
7518 static ssize_t
7519 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7520                        loff_t *ppos)
7521 {
7522         struct seq_file *m = filp->private_data;
7523         struct trace_iterator *iter = m->private;
7524         struct trace_array *tr = iter->tr;
7525         unsigned long val;
7526         int ret;
7527
7528         ret = tracing_update_buffers();
7529         if (ret < 0)
7530                 return ret;
7531
7532         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7533         if (ret)
7534                 return ret;
7535
7536         mutex_lock(&trace_types_lock);
7537
7538         if (tr->current_trace->use_max_tr) {
7539                 ret = -EBUSY;
7540                 goto out;
7541         }
7542
7543         local_irq_disable();
7544         arch_spin_lock(&tr->max_lock);
7545         if (tr->cond_snapshot)
7546                 ret = -EBUSY;
7547         arch_spin_unlock(&tr->max_lock);
7548         local_irq_enable();
7549         if (ret)
7550                 goto out;
7551
7552         switch (val) {
7553         case 0:
7554                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7555                         ret = -EINVAL;
7556                         break;
7557                 }
7558                 if (tr->allocated_snapshot)
7559                         free_snapshot(tr);
7560                 break;
7561         case 1:
7562 /* Only allow per-cpu swap if the ring buffer supports it */
7563 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7564                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7565                         ret = -EINVAL;
7566                         break;
7567                 }
7568 #endif
7569                 if (tr->allocated_snapshot)
7570                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7571                                         &tr->array_buffer, iter->cpu_file);
7572                 else
7573                         ret = tracing_alloc_snapshot_instance(tr);
7574                 if (ret < 0)
7575                         break;
7576                 local_irq_disable();
7577                 /* Now, we're going to swap */
7578                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7579                         update_max_tr(tr, current, smp_processor_id(), NULL);
7580                 else
7581                         update_max_tr_single(tr, current, iter->cpu_file);
7582                 local_irq_enable();
7583                 break;
7584         default:
7585                 if (tr->allocated_snapshot) {
7586                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7587                                 tracing_reset_online_cpus(&tr->max_buffer);
7588                         else
7589                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7590                 }
7591                 break;
7592         }
7593
7594         if (ret >= 0) {
7595                 *ppos += cnt;
7596                 ret = cnt;
7597         }
7598 out:
7599         mutex_unlock(&trace_types_lock);
7600         return ret;
7601 }
7602
7603 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7604 {
7605         struct seq_file *m = file->private_data;
7606         int ret;
7607
7608         ret = tracing_release(inode, file);
7609
7610         if (file->f_mode & FMODE_READ)
7611                 return ret;
7612
7613         /* If write only, the seq_file is just a stub */
7614         if (m)
7615                 kfree(m->private);
7616         kfree(m);
7617
7618         return 0;
7619 }
7620
7621 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7622 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7623                                     size_t count, loff_t *ppos);
7624 static int tracing_buffers_release(struct inode *inode, struct file *file);
7625 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7626                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7627
7628 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7629 {
7630         struct ftrace_buffer_info *info;
7631         int ret;
7632
7633         /* The following checks for tracefs lockdown */
7634         ret = tracing_buffers_open(inode, filp);
7635         if (ret < 0)
7636                 return ret;
7637
7638         info = filp->private_data;
7639
7640         if (info->iter.trace->use_max_tr) {
7641                 tracing_buffers_release(inode, filp);
7642                 return -EBUSY;
7643         }
7644
7645         info->iter.snapshot = true;
7646         info->iter.array_buffer = &info->iter.tr->max_buffer;
7647
7648         return ret;
7649 }
7650
7651 #endif /* CONFIG_TRACER_SNAPSHOT */
7652
7653
7654 static const struct file_operations tracing_thresh_fops = {
7655         .open           = tracing_open_generic,
7656         .read           = tracing_thresh_read,
7657         .write          = tracing_thresh_write,
7658         .llseek         = generic_file_llseek,
7659 };
7660
7661 #ifdef CONFIG_TRACER_MAX_TRACE
7662 static const struct file_operations tracing_max_lat_fops = {
7663         .open           = tracing_open_generic,
7664         .read           = tracing_max_lat_read,
7665         .write          = tracing_max_lat_write,
7666         .llseek         = generic_file_llseek,
7667 };
7668 #endif
7669
7670 static const struct file_operations set_tracer_fops = {
7671         .open           = tracing_open_generic,
7672         .read           = tracing_set_trace_read,
7673         .write          = tracing_set_trace_write,
7674         .llseek         = generic_file_llseek,
7675 };
7676
7677 static const struct file_operations tracing_pipe_fops = {
7678         .open           = tracing_open_pipe,
7679         .poll           = tracing_poll_pipe,
7680         .read           = tracing_read_pipe,
7681         .splice_read    = tracing_splice_read_pipe,
7682         .release        = tracing_release_pipe,
7683         .llseek         = no_llseek,
7684 };
7685
7686 static const struct file_operations tracing_entries_fops = {
7687         .open           = tracing_open_generic_tr,
7688         .read           = tracing_entries_read,
7689         .write          = tracing_entries_write,
7690         .llseek         = generic_file_llseek,
7691         .release        = tracing_release_generic_tr,
7692 };
7693
7694 static const struct file_operations tracing_total_entries_fops = {
7695         .open           = tracing_open_generic_tr,
7696         .read           = tracing_total_entries_read,
7697         .llseek         = generic_file_llseek,
7698         .release        = tracing_release_generic_tr,
7699 };
7700
7701 static const struct file_operations tracing_free_buffer_fops = {
7702         .open           = tracing_open_generic_tr,
7703         .write          = tracing_free_buffer_write,
7704         .release        = tracing_free_buffer_release,
7705 };
7706
7707 static const struct file_operations tracing_mark_fops = {
7708         .open           = tracing_mark_open,
7709         .write          = tracing_mark_write,
7710         .release        = tracing_release_generic_tr,
7711 };
7712
7713 static const struct file_operations tracing_mark_raw_fops = {
7714         .open           = tracing_mark_open,
7715         .write          = tracing_mark_raw_write,
7716         .release        = tracing_release_generic_tr,
7717 };
7718
7719 static const struct file_operations trace_clock_fops = {
7720         .open           = tracing_clock_open,
7721         .read           = seq_read,
7722         .llseek         = seq_lseek,
7723         .release        = tracing_single_release_tr,
7724         .write          = tracing_clock_write,
7725 };
7726
7727 static const struct file_operations trace_time_stamp_mode_fops = {
7728         .open           = tracing_time_stamp_mode_open,
7729         .read           = seq_read,
7730         .llseek         = seq_lseek,
7731         .release        = tracing_single_release_tr,
7732 };
7733
7734 #ifdef CONFIG_TRACER_SNAPSHOT
7735 static const struct file_operations snapshot_fops = {
7736         .open           = tracing_snapshot_open,
7737         .read           = seq_read,
7738         .write          = tracing_snapshot_write,
7739         .llseek         = tracing_lseek,
7740         .release        = tracing_snapshot_release,
7741 };
7742
7743 static const struct file_operations snapshot_raw_fops = {
7744         .open           = snapshot_raw_open,
7745         .read           = tracing_buffers_read,
7746         .release        = tracing_buffers_release,
7747         .splice_read    = tracing_buffers_splice_read,
7748         .llseek         = no_llseek,
7749 };
7750
7751 #endif /* CONFIG_TRACER_SNAPSHOT */
7752
7753 /*
7754  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7755  * @filp: The active open file structure
7756  * @ubuf: The userspace provided buffer to read value into
7757  * @cnt: The maximum number of bytes to read
7758  * @ppos: The current "file" position
7759  *
7760  * This function implements the write interface for a struct trace_min_max_param.
7761  * The filp->private_data must point to a trace_min_max_param structure that
7762  * defines where to write the value, the min and the max acceptable values,
7763  * and a lock to protect the write.
7764  */
7765 static ssize_t
7766 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7767 {
7768         struct trace_min_max_param *param = filp->private_data;
7769         u64 val;
7770         int err;
7771
7772         if (!param)
7773                 return -EFAULT;
7774
7775         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7776         if (err)
7777                 return err;
7778
7779         if (param->lock)
7780                 mutex_lock(param->lock);
7781
7782         if (param->min && val < *param->min)
7783                 err = -EINVAL;
7784
7785         if (param->max && val > *param->max)
7786                 err = -EINVAL;
7787
7788         if (!err)
7789                 *param->val = val;
7790
7791         if (param->lock)
7792                 mutex_unlock(param->lock);
7793
7794         if (err)
7795                 return err;
7796
7797         return cnt;
7798 }
7799
7800 /*
7801  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7802  * @filp: The active open file structure
7803  * @ubuf: The userspace provided buffer to read value into
7804  * @cnt: The maximum number of bytes to read
7805  * @ppos: The current "file" position
7806  *
7807  * This function implements the read interface for a struct trace_min_max_param.
7808  * The filp->private_data must point to a trace_min_max_param struct with valid
7809  * data.
7810  */
7811 static ssize_t
7812 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7813 {
7814         struct trace_min_max_param *param = filp->private_data;
7815         char buf[U64_STR_SIZE];
7816         int len;
7817         u64 val;
7818
7819         if (!param)
7820                 return -EFAULT;
7821
7822         val = *param->val;
7823
7824         if (cnt > sizeof(buf))
7825                 cnt = sizeof(buf);
7826
7827         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7828
7829         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7830 }
7831
7832 const struct file_operations trace_min_max_fops = {
7833         .open           = tracing_open_generic,
7834         .read           = trace_min_max_read,
7835         .write          = trace_min_max_write,
7836 };
7837
7838 #define TRACING_LOG_ERRS_MAX    8
7839 #define TRACING_LOG_LOC_MAX     128
7840
7841 #define CMD_PREFIX "  Command: "
7842
7843 struct err_info {
7844         const char      **errs; /* ptr to loc-specific array of err strings */
7845         u8              type;   /* index into errs -> specific err string */
7846         u16             pos;    /* caret position */
7847         u64             ts;
7848 };
7849
7850 struct tracing_log_err {
7851         struct list_head        list;
7852         struct err_info         info;
7853         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7854         char                    *cmd;                     /* what caused err */
7855 };
7856
7857 static DEFINE_MUTEX(tracing_err_log_lock);
7858
7859 static struct tracing_log_err *alloc_tracing_log_err(int len)
7860 {
7861         struct tracing_log_err *err;
7862
7863         err = kzalloc(sizeof(*err), GFP_KERNEL);
7864         if (!err)
7865                 return ERR_PTR(-ENOMEM);
7866
7867         err->cmd = kzalloc(len, GFP_KERNEL);
7868         if (!err->cmd) {
7869                 kfree(err);
7870                 return ERR_PTR(-ENOMEM);
7871         }
7872
7873         return err;
7874 }
7875
7876 static void free_tracing_log_err(struct tracing_log_err *err)
7877 {
7878         kfree(err->cmd);
7879         kfree(err);
7880 }
7881
7882 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7883                                                    int len)
7884 {
7885         struct tracing_log_err *err;
7886         char *cmd;
7887
7888         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7889                 err = alloc_tracing_log_err(len);
7890                 if (PTR_ERR(err) != -ENOMEM)
7891                         tr->n_err_log_entries++;
7892
7893                 return err;
7894         }
7895         cmd = kzalloc(len, GFP_KERNEL);
7896         if (!cmd)
7897                 return ERR_PTR(-ENOMEM);
7898         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7899         kfree(err->cmd);
7900         err->cmd = cmd;
7901         list_del(&err->list);
7902
7903         return err;
7904 }
7905
7906 /**
7907  * err_pos - find the position of a string within a command for error careting
7908  * @cmd: The tracing command that caused the error
7909  * @str: The string to position the caret at within @cmd
7910  *
7911  * Finds the position of the first occurrence of @str within @cmd.  The
7912  * return value can be passed to tracing_log_err() for caret placement
7913  * within @cmd.
7914  *
7915  * Returns the index within @cmd of the first occurrence of @str or 0
7916  * if @str was not found.
7917  */
7918 unsigned int err_pos(char *cmd, const char *str)
7919 {
7920         char *found;
7921
7922         if (WARN_ON(!strlen(cmd)))
7923                 return 0;
7924
7925         found = strstr(cmd, str);
7926         if (found)
7927                 return found - cmd;
7928
7929         return 0;
7930 }
7931
7932 /**
7933  * tracing_log_err - write an error to the tracing error log
7934  * @tr: The associated trace array for the error (NULL for top level array)
7935  * @loc: A string describing where the error occurred
7936  * @cmd: The tracing command that caused the error
7937  * @errs: The array of loc-specific static error strings
7938  * @type: The index into errs[], which produces the specific static err string
7939  * @pos: The position the caret should be placed in the cmd
7940  *
7941  * Writes an error into tracing/error_log of the form:
7942  *
7943  * <loc>: error: <text>
7944  *   Command: <cmd>
7945  *              ^
7946  *
7947  * tracing/error_log is a small log file containing the last
7948  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7949  * unless there has been a tracing error, and the error log can be
7950  * cleared and have its memory freed by writing the empty string in
7951  * truncation mode to it i.e. echo > tracing/error_log.
7952  *
7953  * NOTE: the @errs array along with the @type param are used to
7954  * produce a static error string - this string is not copied and saved
7955  * when the error is logged - only a pointer to it is saved.  See
7956  * existing callers for examples of how static strings are typically
7957  * defined for use with tracing_log_err().
7958  */
7959 void tracing_log_err(struct trace_array *tr,
7960                      const char *loc, const char *cmd,
7961                      const char **errs, u8 type, u16 pos)
7962 {
7963         struct tracing_log_err *err;
7964         int len = 0;
7965
7966         if (!tr)
7967                 tr = &global_trace;
7968
7969         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7970
7971         mutex_lock(&tracing_err_log_lock);
7972         err = get_tracing_log_err(tr, len);
7973         if (PTR_ERR(err) == -ENOMEM) {
7974                 mutex_unlock(&tracing_err_log_lock);
7975                 return;
7976         }
7977
7978         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7979         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7980
7981         err->info.errs = errs;
7982         err->info.type = type;
7983         err->info.pos = pos;
7984         err->info.ts = local_clock();
7985
7986         list_add_tail(&err->list, &tr->err_log);
7987         mutex_unlock(&tracing_err_log_lock);
7988 }
7989
7990 static void clear_tracing_err_log(struct trace_array *tr)
7991 {
7992         struct tracing_log_err *err, *next;
7993
7994         mutex_lock(&tracing_err_log_lock);
7995         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7996                 list_del(&err->list);
7997                 free_tracing_log_err(err);
7998         }
7999
8000         tr->n_err_log_entries = 0;
8001         mutex_unlock(&tracing_err_log_lock);
8002 }
8003
8004 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8005 {
8006         struct trace_array *tr = m->private;
8007
8008         mutex_lock(&tracing_err_log_lock);
8009
8010         return seq_list_start(&tr->err_log, *pos);
8011 }
8012
8013 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8014 {
8015         struct trace_array *tr = m->private;
8016
8017         return seq_list_next(v, &tr->err_log, pos);
8018 }
8019
8020 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8021 {
8022         mutex_unlock(&tracing_err_log_lock);
8023 }
8024
8025 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8026 {
8027         u16 i;
8028
8029         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8030                 seq_putc(m, ' ');
8031         for (i = 0; i < pos; i++)
8032                 seq_putc(m, ' ');
8033         seq_puts(m, "^\n");
8034 }
8035
8036 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8037 {
8038         struct tracing_log_err *err = v;
8039
8040         if (err) {
8041                 const char *err_text = err->info.errs[err->info.type];
8042                 u64 sec = err->info.ts;
8043                 u32 nsec;
8044
8045                 nsec = do_div(sec, NSEC_PER_SEC);
8046                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8047                            err->loc, err_text);
8048                 seq_printf(m, "%s", err->cmd);
8049                 tracing_err_log_show_pos(m, err->info.pos);
8050         }
8051
8052         return 0;
8053 }
8054
8055 static const struct seq_operations tracing_err_log_seq_ops = {
8056         .start  = tracing_err_log_seq_start,
8057         .next   = tracing_err_log_seq_next,
8058         .stop   = tracing_err_log_seq_stop,
8059         .show   = tracing_err_log_seq_show
8060 };
8061
8062 static int tracing_err_log_open(struct inode *inode, struct file *file)
8063 {
8064         struct trace_array *tr = inode->i_private;
8065         int ret = 0;
8066
8067         ret = tracing_check_open_get_tr(tr);
8068         if (ret)
8069                 return ret;
8070
8071         /* If this file was opened for write, then erase contents */
8072         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8073                 clear_tracing_err_log(tr);
8074
8075         if (file->f_mode & FMODE_READ) {
8076                 ret = seq_open(file, &tracing_err_log_seq_ops);
8077                 if (!ret) {
8078                         struct seq_file *m = file->private_data;
8079                         m->private = tr;
8080                 } else {
8081                         trace_array_put(tr);
8082                 }
8083         }
8084         return ret;
8085 }
8086
8087 static ssize_t tracing_err_log_write(struct file *file,
8088                                      const char __user *buffer,
8089                                      size_t count, loff_t *ppos)
8090 {
8091         return count;
8092 }
8093
8094 static int tracing_err_log_release(struct inode *inode, struct file *file)
8095 {
8096         struct trace_array *tr = inode->i_private;
8097
8098         trace_array_put(tr);
8099
8100         if (file->f_mode & FMODE_READ)
8101                 seq_release(inode, file);
8102
8103         return 0;
8104 }
8105
8106 static const struct file_operations tracing_err_log_fops = {
8107         .open           = tracing_err_log_open,
8108         .write          = tracing_err_log_write,
8109         .read           = seq_read,
8110         .llseek         = seq_lseek,
8111         .release        = tracing_err_log_release,
8112 };
8113
8114 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8115 {
8116         struct trace_array *tr = inode->i_private;
8117         struct ftrace_buffer_info *info;
8118         int ret;
8119
8120         ret = tracing_check_open_get_tr(tr);
8121         if (ret)
8122                 return ret;
8123
8124         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8125         if (!info) {
8126                 trace_array_put(tr);
8127                 return -ENOMEM;
8128         }
8129
8130         mutex_lock(&trace_types_lock);
8131
8132         info->iter.tr           = tr;
8133         info->iter.cpu_file     = tracing_get_cpu(inode);
8134         info->iter.trace        = tr->current_trace;
8135         info->iter.array_buffer = &tr->array_buffer;
8136         info->spare             = NULL;
8137         /* Force reading ring buffer for first read */
8138         info->read              = (unsigned int)-1;
8139
8140         filp->private_data = info;
8141
8142         tr->trace_ref++;
8143
8144         mutex_unlock(&trace_types_lock);
8145
8146         ret = nonseekable_open(inode, filp);
8147         if (ret < 0)
8148                 trace_array_put(tr);
8149
8150         return ret;
8151 }
8152
8153 static __poll_t
8154 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8155 {
8156         struct ftrace_buffer_info *info = filp->private_data;
8157         struct trace_iterator *iter = &info->iter;
8158
8159         return trace_poll(iter, filp, poll_table);
8160 }
8161
8162 static ssize_t
8163 tracing_buffers_read(struct file *filp, char __user *ubuf,
8164                      size_t count, loff_t *ppos)
8165 {
8166         struct ftrace_buffer_info *info = filp->private_data;
8167         struct trace_iterator *iter = &info->iter;
8168         ssize_t ret = 0;
8169         ssize_t size;
8170
8171         if (!count)
8172                 return 0;
8173
8174 #ifdef CONFIG_TRACER_MAX_TRACE
8175         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8176                 return -EBUSY;
8177 #endif
8178
8179         if (!info->spare) {
8180                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8181                                                           iter->cpu_file);
8182                 if (IS_ERR(info->spare)) {
8183                         ret = PTR_ERR(info->spare);
8184                         info->spare = NULL;
8185                 } else {
8186                         info->spare_cpu = iter->cpu_file;
8187                 }
8188         }
8189         if (!info->spare)
8190                 return ret;
8191
8192         /* Do we have previous read data to read? */
8193         if (info->read < PAGE_SIZE)
8194                 goto read;
8195
8196  again:
8197         trace_access_lock(iter->cpu_file);
8198         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8199                                     &info->spare,
8200                                     count,
8201                                     iter->cpu_file, 0);
8202         trace_access_unlock(iter->cpu_file);
8203
8204         if (ret < 0) {
8205                 if (trace_empty(iter)) {
8206                         if ((filp->f_flags & O_NONBLOCK))
8207                                 return -EAGAIN;
8208
8209                         ret = wait_on_pipe(iter, 0);
8210                         if (ret)
8211                                 return ret;
8212
8213                         goto again;
8214                 }
8215                 return 0;
8216         }
8217
8218         info->read = 0;
8219  read:
8220         size = PAGE_SIZE - info->read;
8221         if (size > count)
8222                 size = count;
8223
8224         ret = copy_to_user(ubuf, info->spare + info->read, size);
8225         if (ret == size)
8226                 return -EFAULT;
8227
8228         size -= ret;
8229
8230         *ppos += size;
8231         info->read += size;
8232
8233         return size;
8234 }
8235
8236 static int tracing_buffers_release(struct inode *inode, struct file *file)
8237 {
8238         struct ftrace_buffer_info *info = file->private_data;
8239         struct trace_iterator *iter = &info->iter;
8240
8241         mutex_lock(&trace_types_lock);
8242
8243         iter->tr->trace_ref--;
8244
8245         __trace_array_put(iter->tr);
8246
8247         iter->wait_index++;
8248         /* Make sure the waiters see the new wait_index */
8249         smp_wmb();
8250
8251         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8252
8253         if (info->spare)
8254                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8255                                            info->spare_cpu, info->spare);
8256         kvfree(info);
8257
8258         mutex_unlock(&trace_types_lock);
8259
8260         return 0;
8261 }
8262
8263 struct buffer_ref {
8264         struct trace_buffer     *buffer;
8265         void                    *page;
8266         int                     cpu;
8267         refcount_t              refcount;
8268 };
8269
8270 static void buffer_ref_release(struct buffer_ref *ref)
8271 {
8272         if (!refcount_dec_and_test(&ref->refcount))
8273                 return;
8274         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8275         kfree(ref);
8276 }
8277
8278 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8279                                     struct pipe_buffer *buf)
8280 {
8281         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8282
8283         buffer_ref_release(ref);
8284         buf->private = 0;
8285 }
8286
8287 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8288                                 struct pipe_buffer *buf)
8289 {
8290         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8291
8292         if (refcount_read(&ref->refcount) > INT_MAX/2)
8293                 return false;
8294
8295         refcount_inc(&ref->refcount);
8296         return true;
8297 }
8298
8299 /* Pipe buffer operations for a buffer. */
8300 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8301         .release                = buffer_pipe_buf_release,
8302         .get                    = buffer_pipe_buf_get,
8303 };
8304
8305 /*
8306  * Callback from splice_to_pipe(), if we need to release some pages
8307  * at the end of the spd in case we error'ed out in filling the pipe.
8308  */
8309 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8310 {
8311         struct buffer_ref *ref =
8312                 (struct buffer_ref *)spd->partial[i].private;
8313
8314         buffer_ref_release(ref);
8315         spd->partial[i].private = 0;
8316 }
8317
8318 static ssize_t
8319 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8320                             struct pipe_inode_info *pipe, size_t len,
8321                             unsigned int flags)
8322 {
8323         struct ftrace_buffer_info *info = file->private_data;
8324         struct trace_iterator *iter = &info->iter;
8325         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8326         struct page *pages_def[PIPE_DEF_BUFFERS];
8327         struct splice_pipe_desc spd = {
8328                 .pages          = pages_def,
8329                 .partial        = partial_def,
8330                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8331                 .ops            = &buffer_pipe_buf_ops,
8332                 .spd_release    = buffer_spd_release,
8333         };
8334         struct buffer_ref *ref;
8335         int entries, i;
8336         ssize_t ret = 0;
8337
8338 #ifdef CONFIG_TRACER_MAX_TRACE
8339         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8340                 return -EBUSY;
8341 #endif
8342
8343         if (*ppos & (PAGE_SIZE - 1))
8344                 return -EINVAL;
8345
8346         if (len & (PAGE_SIZE - 1)) {
8347                 if (len < PAGE_SIZE)
8348                         return -EINVAL;
8349                 len &= PAGE_MASK;
8350         }
8351
8352         if (splice_grow_spd(pipe, &spd))
8353                 return -ENOMEM;
8354
8355  again:
8356         trace_access_lock(iter->cpu_file);
8357         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8358
8359         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8360                 struct page *page;
8361                 int r;
8362
8363                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8364                 if (!ref) {
8365                         ret = -ENOMEM;
8366                         break;
8367                 }
8368
8369                 refcount_set(&ref->refcount, 1);
8370                 ref->buffer = iter->array_buffer->buffer;
8371                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8372                 if (IS_ERR(ref->page)) {
8373                         ret = PTR_ERR(ref->page);
8374                         ref->page = NULL;
8375                         kfree(ref);
8376                         break;
8377                 }
8378                 ref->cpu = iter->cpu_file;
8379
8380                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8381                                           len, iter->cpu_file, 1);
8382                 if (r < 0) {
8383                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8384                                                    ref->page);
8385                         kfree(ref);
8386                         break;
8387                 }
8388
8389                 page = virt_to_page(ref->page);
8390
8391                 spd.pages[i] = page;
8392                 spd.partial[i].len = PAGE_SIZE;
8393                 spd.partial[i].offset = 0;
8394                 spd.partial[i].private = (unsigned long)ref;
8395                 spd.nr_pages++;
8396                 *ppos += PAGE_SIZE;
8397
8398                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8399         }
8400
8401         trace_access_unlock(iter->cpu_file);
8402         spd.nr_pages = i;
8403
8404         /* did we read anything? */
8405         if (!spd.nr_pages) {
8406                 long wait_index;
8407
8408                 if (ret)
8409                         goto out;
8410
8411                 ret = -EAGAIN;
8412                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8413                         goto out;
8414
8415                 wait_index = READ_ONCE(iter->wait_index);
8416
8417                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8418                 if (ret)
8419                         goto out;
8420
8421                 /* No need to wait after waking up when tracing is off */
8422                 if (!tracer_tracing_is_on(iter->tr))
8423                         goto out;
8424
8425                 /* Make sure we see the new wait_index */
8426                 smp_rmb();
8427                 if (wait_index != iter->wait_index)
8428                         goto out;
8429
8430                 goto again;
8431         }
8432
8433         ret = splice_to_pipe(pipe, &spd);
8434 out:
8435         splice_shrink_spd(&spd);
8436
8437         return ret;
8438 }
8439
8440 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8441 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8442 {
8443         struct ftrace_buffer_info *info = file->private_data;
8444         struct trace_iterator *iter = &info->iter;
8445
8446         if (cmd)
8447                 return -ENOIOCTLCMD;
8448
8449         mutex_lock(&trace_types_lock);
8450
8451         iter->wait_index++;
8452         /* Make sure the waiters see the new wait_index */
8453         smp_wmb();
8454
8455         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8456
8457         mutex_unlock(&trace_types_lock);
8458         return 0;
8459 }
8460
8461 static const struct file_operations tracing_buffers_fops = {
8462         .open           = tracing_buffers_open,
8463         .read           = tracing_buffers_read,
8464         .poll           = tracing_buffers_poll,
8465         .release        = tracing_buffers_release,
8466         .splice_read    = tracing_buffers_splice_read,
8467         .unlocked_ioctl = tracing_buffers_ioctl,
8468         .llseek         = no_llseek,
8469 };
8470
8471 static ssize_t
8472 tracing_stats_read(struct file *filp, char __user *ubuf,
8473                    size_t count, loff_t *ppos)
8474 {
8475         struct inode *inode = file_inode(filp);
8476         struct trace_array *tr = inode->i_private;
8477         struct array_buffer *trace_buf = &tr->array_buffer;
8478         int cpu = tracing_get_cpu(inode);
8479         struct trace_seq *s;
8480         unsigned long cnt;
8481         unsigned long long t;
8482         unsigned long usec_rem;
8483
8484         s = kmalloc(sizeof(*s), GFP_KERNEL);
8485         if (!s)
8486                 return -ENOMEM;
8487
8488         trace_seq_init(s);
8489
8490         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8491         trace_seq_printf(s, "entries: %ld\n", cnt);
8492
8493         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8494         trace_seq_printf(s, "overrun: %ld\n", cnt);
8495
8496         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8497         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8498
8499         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8500         trace_seq_printf(s, "bytes: %ld\n", cnt);
8501
8502         if (trace_clocks[tr->clock_id].in_ns) {
8503                 /* local or global for trace_clock */
8504                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8505                 usec_rem = do_div(t, USEC_PER_SEC);
8506                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8507                                                                 t, usec_rem);
8508
8509                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8510                 usec_rem = do_div(t, USEC_PER_SEC);
8511                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8512         } else {
8513                 /* counter or tsc mode for trace_clock */
8514                 trace_seq_printf(s, "oldest event ts: %llu\n",
8515                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8516
8517                 trace_seq_printf(s, "now ts: %llu\n",
8518                                 ring_buffer_time_stamp(trace_buf->buffer));
8519         }
8520
8521         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8522         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8523
8524         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8525         trace_seq_printf(s, "read events: %ld\n", cnt);
8526
8527         count = simple_read_from_buffer(ubuf, count, ppos,
8528                                         s->buffer, trace_seq_used(s));
8529
8530         kfree(s);
8531
8532         return count;
8533 }
8534
8535 static const struct file_operations tracing_stats_fops = {
8536         .open           = tracing_open_generic_tr,
8537         .read           = tracing_stats_read,
8538         .llseek         = generic_file_llseek,
8539         .release        = tracing_release_generic_tr,
8540 };
8541
8542 #ifdef CONFIG_DYNAMIC_FTRACE
8543
8544 static ssize_t
8545 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8546                   size_t cnt, loff_t *ppos)
8547 {
8548         ssize_t ret;
8549         char *buf;
8550         int r;
8551
8552         /* 256 should be plenty to hold the amount needed */
8553         buf = kmalloc(256, GFP_KERNEL);
8554         if (!buf)
8555                 return -ENOMEM;
8556
8557         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8558                       ftrace_update_tot_cnt,
8559                       ftrace_number_of_pages,
8560                       ftrace_number_of_groups);
8561
8562         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8563         kfree(buf);
8564         return ret;
8565 }
8566
8567 static const struct file_operations tracing_dyn_info_fops = {
8568         .open           = tracing_open_generic,
8569         .read           = tracing_read_dyn_info,
8570         .llseek         = generic_file_llseek,
8571 };
8572 #endif /* CONFIG_DYNAMIC_FTRACE */
8573
8574 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8575 static void
8576 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8577                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8578                 void *data)
8579 {
8580         tracing_snapshot_instance(tr);
8581 }
8582
8583 static void
8584 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8585                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8586                       void *data)
8587 {
8588         struct ftrace_func_mapper *mapper = data;
8589         long *count = NULL;
8590
8591         if (mapper)
8592                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8593
8594         if (count) {
8595
8596                 if (*count <= 0)
8597                         return;
8598
8599                 (*count)--;
8600         }
8601
8602         tracing_snapshot_instance(tr);
8603 }
8604
8605 static int
8606 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8607                       struct ftrace_probe_ops *ops, void *data)
8608 {
8609         struct ftrace_func_mapper *mapper = data;
8610         long *count = NULL;
8611
8612         seq_printf(m, "%ps:", (void *)ip);
8613
8614         seq_puts(m, "snapshot");
8615
8616         if (mapper)
8617                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8618
8619         if (count)
8620                 seq_printf(m, ":count=%ld\n", *count);
8621         else
8622                 seq_puts(m, ":unlimited\n");
8623
8624         return 0;
8625 }
8626
8627 static int
8628 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8629                      unsigned long ip, void *init_data, void **data)
8630 {
8631         struct ftrace_func_mapper *mapper = *data;
8632
8633         if (!mapper) {
8634                 mapper = allocate_ftrace_func_mapper();
8635                 if (!mapper)
8636                         return -ENOMEM;
8637                 *data = mapper;
8638         }
8639
8640         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8641 }
8642
8643 static void
8644 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8645                      unsigned long ip, void *data)
8646 {
8647         struct ftrace_func_mapper *mapper = data;
8648
8649         if (!ip) {
8650                 if (!mapper)
8651                         return;
8652                 free_ftrace_func_mapper(mapper, NULL);
8653                 return;
8654         }
8655
8656         ftrace_func_mapper_remove_ip(mapper, ip);
8657 }
8658
8659 static struct ftrace_probe_ops snapshot_probe_ops = {
8660         .func                   = ftrace_snapshot,
8661         .print                  = ftrace_snapshot_print,
8662 };
8663
8664 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8665         .func                   = ftrace_count_snapshot,
8666         .print                  = ftrace_snapshot_print,
8667         .init                   = ftrace_snapshot_init,
8668         .free                   = ftrace_snapshot_free,
8669 };
8670
8671 static int
8672 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8673                                char *glob, char *cmd, char *param, int enable)
8674 {
8675         struct ftrace_probe_ops *ops;
8676         void *count = (void *)-1;
8677         char *number;
8678         int ret;
8679
8680         if (!tr)
8681                 return -ENODEV;
8682
8683         /* hash funcs only work with set_ftrace_filter */
8684         if (!enable)
8685                 return -EINVAL;
8686
8687         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8688
8689         if (glob[0] == '!')
8690                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8691
8692         if (!param)
8693                 goto out_reg;
8694
8695         number = strsep(&param, ":");
8696
8697         if (!strlen(number))
8698                 goto out_reg;
8699
8700         /*
8701          * We use the callback data field (which is a pointer)
8702          * as our counter.
8703          */
8704         ret = kstrtoul(number, 0, (unsigned long *)&count);
8705         if (ret)
8706                 return ret;
8707
8708  out_reg:
8709         ret = tracing_alloc_snapshot_instance(tr);
8710         if (ret < 0)
8711                 goto out;
8712
8713         ret = register_ftrace_function_probe(glob, tr, ops, count);
8714
8715  out:
8716         return ret < 0 ? ret : 0;
8717 }
8718
8719 static struct ftrace_func_command ftrace_snapshot_cmd = {
8720         .name                   = "snapshot",
8721         .func                   = ftrace_trace_snapshot_callback,
8722 };
8723
8724 static __init int register_snapshot_cmd(void)
8725 {
8726         return register_ftrace_command(&ftrace_snapshot_cmd);
8727 }
8728 #else
8729 static inline __init int register_snapshot_cmd(void) { return 0; }
8730 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8731
8732 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8733 {
8734         if (WARN_ON(!tr->dir))
8735                 return ERR_PTR(-ENODEV);
8736
8737         /* Top directory uses NULL as the parent */
8738         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8739                 return NULL;
8740
8741         /* All sub buffers have a descriptor */
8742         return tr->dir;
8743 }
8744
8745 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8746 {
8747         struct dentry *d_tracer;
8748
8749         if (tr->percpu_dir)
8750                 return tr->percpu_dir;
8751
8752         d_tracer = tracing_get_dentry(tr);
8753         if (IS_ERR(d_tracer))
8754                 return NULL;
8755
8756         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8757
8758         MEM_FAIL(!tr->percpu_dir,
8759                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8760
8761         return tr->percpu_dir;
8762 }
8763
8764 static struct dentry *
8765 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8766                       void *data, long cpu, const struct file_operations *fops)
8767 {
8768         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8769
8770         if (ret) /* See tracing_get_cpu() */
8771                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8772         return ret;
8773 }
8774
8775 static void
8776 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8777 {
8778         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8779         struct dentry *d_cpu;
8780         char cpu_dir[30]; /* 30 characters should be more than enough */
8781
8782         if (!d_percpu)
8783                 return;
8784
8785         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8786         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8787         if (!d_cpu) {
8788                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8789                 return;
8790         }
8791
8792         /* per cpu trace_pipe */
8793         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8794                                 tr, cpu, &tracing_pipe_fops);
8795
8796         /* per cpu trace */
8797         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8798                                 tr, cpu, &tracing_fops);
8799
8800         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8801                                 tr, cpu, &tracing_buffers_fops);
8802
8803         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8804                                 tr, cpu, &tracing_stats_fops);
8805
8806         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8807                                 tr, cpu, &tracing_entries_fops);
8808
8809 #ifdef CONFIG_TRACER_SNAPSHOT
8810         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8811                                 tr, cpu, &snapshot_fops);
8812
8813         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8814                                 tr, cpu, &snapshot_raw_fops);
8815 #endif
8816 }
8817
8818 #ifdef CONFIG_FTRACE_SELFTEST
8819 /* Let selftest have access to static functions in this file */
8820 #include "trace_selftest.c"
8821 #endif
8822
8823 static ssize_t
8824 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8825                         loff_t *ppos)
8826 {
8827         struct trace_option_dentry *topt = filp->private_data;
8828         char *buf;
8829
8830         if (topt->flags->val & topt->opt->bit)
8831                 buf = "1\n";
8832         else
8833                 buf = "0\n";
8834
8835         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8836 }
8837
8838 static ssize_t
8839 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8840                          loff_t *ppos)
8841 {
8842         struct trace_option_dentry *topt = filp->private_data;
8843         unsigned long val;
8844         int ret;
8845
8846         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8847         if (ret)
8848                 return ret;
8849
8850         if (val != 0 && val != 1)
8851                 return -EINVAL;
8852
8853         if (!!(topt->flags->val & topt->opt->bit) != val) {
8854                 mutex_lock(&trace_types_lock);
8855                 ret = __set_tracer_option(topt->tr, topt->flags,
8856                                           topt->opt, !val);
8857                 mutex_unlock(&trace_types_lock);
8858                 if (ret)
8859                         return ret;
8860         }
8861
8862         *ppos += cnt;
8863
8864         return cnt;
8865 }
8866
8867
8868 static const struct file_operations trace_options_fops = {
8869         .open = tracing_open_generic,
8870         .read = trace_options_read,
8871         .write = trace_options_write,
8872         .llseek = generic_file_llseek,
8873 };
8874
8875 /*
8876  * In order to pass in both the trace_array descriptor as well as the index
8877  * to the flag that the trace option file represents, the trace_array
8878  * has a character array of trace_flags_index[], which holds the index
8879  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8880  * The address of this character array is passed to the flag option file
8881  * read/write callbacks.
8882  *
8883  * In order to extract both the index and the trace_array descriptor,
8884  * get_tr_index() uses the following algorithm.
8885  *
8886  *   idx = *ptr;
8887  *
8888  * As the pointer itself contains the address of the index (remember
8889  * index[1] == 1).
8890  *
8891  * Then to get the trace_array descriptor, by subtracting that index
8892  * from the ptr, we get to the start of the index itself.
8893  *
8894  *   ptr - idx == &index[0]
8895  *
8896  * Then a simple container_of() from that pointer gets us to the
8897  * trace_array descriptor.
8898  */
8899 static void get_tr_index(void *data, struct trace_array **ptr,
8900                          unsigned int *pindex)
8901 {
8902         *pindex = *(unsigned char *)data;
8903
8904         *ptr = container_of(data - *pindex, struct trace_array,
8905                             trace_flags_index);
8906 }
8907
8908 static ssize_t
8909 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8910                         loff_t *ppos)
8911 {
8912         void *tr_index = filp->private_data;
8913         struct trace_array *tr;
8914         unsigned int index;
8915         char *buf;
8916
8917         get_tr_index(tr_index, &tr, &index);
8918
8919         if (tr->trace_flags & (1 << index))
8920                 buf = "1\n";
8921         else
8922                 buf = "0\n";
8923
8924         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8925 }
8926
8927 static ssize_t
8928 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8929                          loff_t *ppos)
8930 {
8931         void *tr_index = filp->private_data;
8932         struct trace_array *tr;
8933         unsigned int index;
8934         unsigned long val;
8935         int ret;
8936
8937         get_tr_index(tr_index, &tr, &index);
8938
8939         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8940         if (ret)
8941                 return ret;
8942
8943         if (val != 0 && val != 1)
8944                 return -EINVAL;
8945
8946         mutex_lock(&event_mutex);
8947         mutex_lock(&trace_types_lock);
8948         ret = set_tracer_flag(tr, 1 << index, val);
8949         mutex_unlock(&trace_types_lock);
8950         mutex_unlock(&event_mutex);
8951
8952         if (ret < 0)
8953                 return ret;
8954
8955         *ppos += cnt;
8956
8957         return cnt;
8958 }
8959
8960 static const struct file_operations trace_options_core_fops = {
8961         .open = tracing_open_generic,
8962         .read = trace_options_core_read,
8963         .write = trace_options_core_write,
8964         .llseek = generic_file_llseek,
8965 };
8966
8967 struct dentry *trace_create_file(const char *name,
8968                                  umode_t mode,
8969                                  struct dentry *parent,
8970                                  void *data,
8971                                  const struct file_operations *fops)
8972 {
8973         struct dentry *ret;
8974
8975         ret = tracefs_create_file(name, mode, parent, data, fops);
8976         if (!ret)
8977                 pr_warn("Could not create tracefs '%s' entry\n", name);
8978
8979         return ret;
8980 }
8981
8982
8983 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8984 {
8985         struct dentry *d_tracer;
8986
8987         if (tr->options)
8988                 return tr->options;
8989
8990         d_tracer = tracing_get_dentry(tr);
8991         if (IS_ERR(d_tracer))
8992                 return NULL;
8993
8994         tr->options = tracefs_create_dir("options", d_tracer);
8995         if (!tr->options) {
8996                 pr_warn("Could not create tracefs directory 'options'\n");
8997                 return NULL;
8998         }
8999
9000         return tr->options;
9001 }
9002
9003 static void
9004 create_trace_option_file(struct trace_array *tr,
9005                          struct trace_option_dentry *topt,
9006                          struct tracer_flags *flags,
9007                          struct tracer_opt *opt)
9008 {
9009         struct dentry *t_options;
9010
9011         t_options = trace_options_init_dentry(tr);
9012         if (!t_options)
9013                 return;
9014
9015         topt->flags = flags;
9016         topt->opt = opt;
9017         topt->tr = tr;
9018
9019         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9020                                         t_options, topt, &trace_options_fops);
9021
9022 }
9023
9024 static void
9025 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9026 {
9027         struct trace_option_dentry *topts;
9028         struct trace_options *tr_topts;
9029         struct tracer_flags *flags;
9030         struct tracer_opt *opts;
9031         int cnt;
9032         int i;
9033
9034         if (!tracer)
9035                 return;
9036
9037         flags = tracer->flags;
9038
9039         if (!flags || !flags->opts)
9040                 return;
9041
9042         /*
9043          * If this is an instance, only create flags for tracers
9044          * the instance may have.
9045          */
9046         if (!trace_ok_for_array(tracer, tr))
9047                 return;
9048
9049         for (i = 0; i < tr->nr_topts; i++) {
9050                 /* Make sure there's no duplicate flags. */
9051                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9052                         return;
9053         }
9054
9055         opts = flags->opts;
9056
9057         for (cnt = 0; opts[cnt].name; cnt++)
9058                 ;
9059
9060         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9061         if (!topts)
9062                 return;
9063
9064         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9065                             GFP_KERNEL);
9066         if (!tr_topts) {
9067                 kfree(topts);
9068                 return;
9069         }
9070
9071         tr->topts = tr_topts;
9072         tr->topts[tr->nr_topts].tracer = tracer;
9073         tr->topts[tr->nr_topts].topts = topts;
9074         tr->nr_topts++;
9075
9076         for (cnt = 0; opts[cnt].name; cnt++) {
9077                 create_trace_option_file(tr, &topts[cnt], flags,
9078                                          &opts[cnt]);
9079                 MEM_FAIL(topts[cnt].entry == NULL,
9080                           "Failed to create trace option: %s",
9081                           opts[cnt].name);
9082         }
9083 }
9084
9085 static struct dentry *
9086 create_trace_option_core_file(struct trace_array *tr,
9087                               const char *option, long index)
9088 {
9089         struct dentry *t_options;
9090
9091         t_options = trace_options_init_dentry(tr);
9092         if (!t_options)
9093                 return NULL;
9094
9095         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9096                                  (void *)&tr->trace_flags_index[index],
9097                                  &trace_options_core_fops);
9098 }
9099
9100 static void create_trace_options_dir(struct trace_array *tr)
9101 {
9102         struct dentry *t_options;
9103         bool top_level = tr == &global_trace;
9104         int i;
9105
9106         t_options = trace_options_init_dentry(tr);
9107         if (!t_options)
9108                 return;
9109
9110         for (i = 0; trace_options[i]; i++) {
9111                 if (top_level ||
9112                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9113                         create_trace_option_core_file(tr, trace_options[i], i);
9114         }
9115 }
9116
9117 static ssize_t
9118 rb_simple_read(struct file *filp, char __user *ubuf,
9119                size_t cnt, loff_t *ppos)
9120 {
9121         struct trace_array *tr = filp->private_data;
9122         char buf[64];
9123         int r;
9124
9125         r = tracer_tracing_is_on(tr);
9126         r = sprintf(buf, "%d\n", r);
9127
9128         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9129 }
9130
9131 static ssize_t
9132 rb_simple_write(struct file *filp, const char __user *ubuf,
9133                 size_t cnt, loff_t *ppos)
9134 {
9135         struct trace_array *tr = filp->private_data;
9136         struct trace_buffer *buffer = tr->array_buffer.buffer;
9137         unsigned long val;
9138         int ret;
9139
9140         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9141         if (ret)
9142                 return ret;
9143
9144         if (buffer) {
9145                 mutex_lock(&trace_types_lock);
9146                 if (!!val == tracer_tracing_is_on(tr)) {
9147                         val = 0; /* do nothing */
9148                 } else if (val) {
9149                         tracer_tracing_on(tr);
9150                         if (tr->current_trace->start)
9151                                 tr->current_trace->start(tr);
9152                 } else {
9153                         tracer_tracing_off(tr);
9154                         if (tr->current_trace->stop)
9155                                 tr->current_trace->stop(tr);
9156                         /* Wake up any waiters */
9157                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9158                 }
9159                 mutex_unlock(&trace_types_lock);
9160         }
9161
9162         (*ppos)++;
9163
9164         return cnt;
9165 }
9166
9167 static const struct file_operations rb_simple_fops = {
9168         .open           = tracing_open_generic_tr,
9169         .read           = rb_simple_read,
9170         .write          = rb_simple_write,
9171         .release        = tracing_release_generic_tr,
9172         .llseek         = default_llseek,
9173 };
9174
9175 static ssize_t
9176 buffer_percent_read(struct file *filp, char __user *ubuf,
9177                     size_t cnt, loff_t *ppos)
9178 {
9179         struct trace_array *tr = filp->private_data;
9180         char buf[64];
9181         int r;
9182
9183         r = tr->buffer_percent;
9184         r = sprintf(buf, "%d\n", r);
9185
9186         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9187 }
9188
9189 static ssize_t
9190 buffer_percent_write(struct file *filp, const char __user *ubuf,
9191                      size_t cnt, loff_t *ppos)
9192 {
9193         struct trace_array *tr = filp->private_data;
9194         unsigned long val;
9195         int ret;
9196
9197         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9198         if (ret)
9199                 return ret;
9200
9201         if (val > 100)
9202                 return -EINVAL;
9203
9204         tr->buffer_percent = val;
9205
9206         (*ppos)++;
9207
9208         return cnt;
9209 }
9210
9211 static const struct file_operations buffer_percent_fops = {
9212         .open           = tracing_open_generic_tr,
9213         .read           = buffer_percent_read,
9214         .write          = buffer_percent_write,
9215         .release        = tracing_release_generic_tr,
9216         .llseek         = default_llseek,
9217 };
9218
9219 static struct dentry *trace_instance_dir;
9220
9221 static void
9222 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9223
9224 static int
9225 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9226 {
9227         enum ring_buffer_flags rb_flags;
9228
9229         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9230
9231         buf->tr = tr;
9232
9233         buf->buffer = ring_buffer_alloc(size, rb_flags);
9234         if (!buf->buffer)
9235                 return -ENOMEM;
9236
9237         buf->data = alloc_percpu(struct trace_array_cpu);
9238         if (!buf->data) {
9239                 ring_buffer_free(buf->buffer);
9240                 buf->buffer = NULL;
9241                 return -ENOMEM;
9242         }
9243
9244         /* Allocate the first page for all buffers */
9245         set_buffer_entries(&tr->array_buffer,
9246                            ring_buffer_size(tr->array_buffer.buffer, 0));
9247
9248         return 0;
9249 }
9250
9251 static void free_trace_buffer(struct array_buffer *buf)
9252 {
9253         if (buf->buffer) {
9254                 ring_buffer_free(buf->buffer);
9255                 buf->buffer = NULL;
9256                 free_percpu(buf->data);
9257                 buf->data = NULL;
9258         }
9259 }
9260
9261 static int allocate_trace_buffers(struct trace_array *tr, int size)
9262 {
9263         int ret;
9264
9265         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9266         if (ret)
9267                 return ret;
9268
9269 #ifdef CONFIG_TRACER_MAX_TRACE
9270         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9271                                     allocate_snapshot ? size : 1);
9272         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9273                 free_trace_buffer(&tr->array_buffer);
9274                 return -ENOMEM;
9275         }
9276         tr->allocated_snapshot = allocate_snapshot;
9277
9278         allocate_snapshot = false;
9279 #endif
9280
9281         return 0;
9282 }
9283
9284 static void free_trace_buffers(struct trace_array *tr)
9285 {
9286         if (!tr)
9287                 return;
9288
9289         free_trace_buffer(&tr->array_buffer);
9290
9291 #ifdef CONFIG_TRACER_MAX_TRACE
9292         free_trace_buffer(&tr->max_buffer);
9293 #endif
9294 }
9295
9296 static void init_trace_flags_index(struct trace_array *tr)
9297 {
9298         int i;
9299
9300         /* Used by the trace options files */
9301         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9302                 tr->trace_flags_index[i] = i;
9303 }
9304
9305 static void __update_tracer_options(struct trace_array *tr)
9306 {
9307         struct tracer *t;
9308
9309         for (t = trace_types; t; t = t->next)
9310                 add_tracer_options(tr, t);
9311 }
9312
9313 static void update_tracer_options(struct trace_array *tr)
9314 {
9315         mutex_lock(&trace_types_lock);
9316         tracer_options_updated = true;
9317         __update_tracer_options(tr);
9318         mutex_unlock(&trace_types_lock);
9319 }
9320
9321 /* Must have trace_types_lock held */
9322 struct trace_array *trace_array_find(const char *instance)
9323 {
9324         struct trace_array *tr, *found = NULL;
9325
9326         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9327                 if (tr->name && strcmp(tr->name, instance) == 0) {
9328                         found = tr;
9329                         break;
9330                 }
9331         }
9332
9333         return found;
9334 }
9335
9336 struct trace_array *trace_array_find_get(const char *instance)
9337 {
9338         struct trace_array *tr;
9339
9340         mutex_lock(&trace_types_lock);
9341         tr = trace_array_find(instance);
9342         if (tr)
9343                 tr->ref++;
9344         mutex_unlock(&trace_types_lock);
9345
9346         return tr;
9347 }
9348
9349 static int trace_array_create_dir(struct trace_array *tr)
9350 {
9351         int ret;
9352
9353         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9354         if (!tr->dir)
9355                 return -EINVAL;
9356
9357         ret = event_trace_add_tracer(tr->dir, tr);
9358         if (ret) {
9359                 tracefs_remove(tr->dir);
9360                 return ret;
9361         }
9362
9363         init_tracer_tracefs(tr, tr->dir);
9364         __update_tracer_options(tr);
9365
9366         return ret;
9367 }
9368
9369 static struct trace_array *trace_array_create(const char *name)
9370 {
9371         struct trace_array *tr;
9372         int ret;
9373
9374         ret = -ENOMEM;
9375         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9376         if (!tr)
9377                 return ERR_PTR(ret);
9378
9379         tr->name = kstrdup(name, GFP_KERNEL);
9380         if (!tr->name)
9381                 goto out_free_tr;
9382
9383         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9384                 goto out_free_tr;
9385
9386         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9387
9388         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9389
9390         raw_spin_lock_init(&tr->start_lock);
9391
9392         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9393
9394         tr->current_trace = &nop_trace;
9395
9396         INIT_LIST_HEAD(&tr->systems);
9397         INIT_LIST_HEAD(&tr->events);
9398         INIT_LIST_HEAD(&tr->hist_vars);
9399         INIT_LIST_HEAD(&tr->err_log);
9400
9401         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9402                 goto out_free_tr;
9403
9404         if (ftrace_allocate_ftrace_ops(tr) < 0)
9405                 goto out_free_tr;
9406
9407         ftrace_init_trace_array(tr);
9408
9409         init_trace_flags_index(tr);
9410
9411         if (trace_instance_dir) {
9412                 ret = trace_array_create_dir(tr);
9413                 if (ret)
9414                         goto out_free_tr;
9415         } else
9416                 __trace_early_add_events(tr);
9417
9418         list_add(&tr->list, &ftrace_trace_arrays);
9419
9420         tr->ref++;
9421
9422         return tr;
9423
9424  out_free_tr:
9425         ftrace_free_ftrace_ops(tr);
9426         free_trace_buffers(tr);
9427         free_cpumask_var(tr->tracing_cpumask);
9428         kfree(tr->name);
9429         kfree(tr);
9430
9431         return ERR_PTR(ret);
9432 }
9433
9434 static int instance_mkdir(const char *name)
9435 {
9436         struct trace_array *tr;
9437         int ret;
9438
9439         mutex_lock(&event_mutex);
9440         mutex_lock(&trace_types_lock);
9441
9442         ret = -EEXIST;
9443         if (trace_array_find(name))
9444                 goto out_unlock;
9445
9446         tr = trace_array_create(name);
9447
9448         ret = PTR_ERR_OR_ZERO(tr);
9449
9450 out_unlock:
9451         mutex_unlock(&trace_types_lock);
9452         mutex_unlock(&event_mutex);
9453         return ret;
9454 }
9455
9456 /**
9457  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9458  * @name: The name of the trace array to be looked up/created.
9459  *
9460  * Returns pointer to trace array with given name.
9461  * NULL, if it cannot be created.
9462  *
9463  * NOTE: This function increments the reference counter associated with the
9464  * trace array returned. This makes sure it cannot be freed while in use.
9465  * Use trace_array_put() once the trace array is no longer needed.
9466  * If the trace_array is to be freed, trace_array_destroy() needs to
9467  * be called after the trace_array_put(), or simply let user space delete
9468  * it from the tracefs instances directory. But until the
9469  * trace_array_put() is called, user space can not delete it.
9470  *
9471  */
9472 struct trace_array *trace_array_get_by_name(const char *name)
9473 {
9474         struct trace_array *tr;
9475
9476         mutex_lock(&event_mutex);
9477         mutex_lock(&trace_types_lock);
9478
9479         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9480                 if (tr->name && strcmp(tr->name, name) == 0)
9481                         goto out_unlock;
9482         }
9483
9484         tr = trace_array_create(name);
9485
9486         if (IS_ERR(tr))
9487                 tr = NULL;
9488 out_unlock:
9489         if (tr)
9490                 tr->ref++;
9491
9492         mutex_unlock(&trace_types_lock);
9493         mutex_unlock(&event_mutex);
9494         return tr;
9495 }
9496 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9497
9498 static int __remove_instance(struct trace_array *tr)
9499 {
9500         int i;
9501
9502         /* Reference counter for a newly created trace array = 1. */
9503         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9504                 return -EBUSY;
9505
9506         list_del(&tr->list);
9507
9508         /* Disable all the flags that were enabled coming in */
9509         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9510                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9511                         set_tracer_flag(tr, 1 << i, 0);
9512         }
9513
9514         tracing_set_nop(tr);
9515         clear_ftrace_function_probes(tr);
9516         event_trace_del_tracer(tr);
9517         ftrace_clear_pids(tr);
9518         ftrace_destroy_function_files(tr);
9519         tracefs_remove(tr->dir);
9520         free_percpu(tr->last_func_repeats);
9521         free_trace_buffers(tr);
9522         clear_tracing_err_log(tr);
9523
9524         for (i = 0; i < tr->nr_topts; i++) {
9525                 kfree(tr->topts[i].topts);
9526         }
9527         kfree(tr->topts);
9528
9529         free_cpumask_var(tr->tracing_cpumask);
9530         kfree(tr->name);
9531         kfree(tr);
9532
9533         return 0;
9534 }
9535
9536 int trace_array_destroy(struct trace_array *this_tr)
9537 {
9538         struct trace_array *tr;
9539         int ret;
9540
9541         if (!this_tr)
9542                 return -EINVAL;
9543
9544         mutex_lock(&event_mutex);
9545         mutex_lock(&trace_types_lock);
9546
9547         ret = -ENODEV;
9548
9549         /* Making sure trace array exists before destroying it. */
9550         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9551                 if (tr == this_tr) {
9552                         ret = __remove_instance(tr);
9553                         break;
9554                 }
9555         }
9556
9557         mutex_unlock(&trace_types_lock);
9558         mutex_unlock(&event_mutex);
9559
9560         return ret;
9561 }
9562 EXPORT_SYMBOL_GPL(trace_array_destroy);
9563
9564 static int instance_rmdir(const char *name)
9565 {
9566         struct trace_array *tr;
9567         int ret;
9568
9569         mutex_lock(&event_mutex);
9570         mutex_lock(&trace_types_lock);
9571
9572         ret = -ENODEV;
9573         tr = trace_array_find(name);
9574         if (tr)
9575                 ret = __remove_instance(tr);
9576
9577         mutex_unlock(&trace_types_lock);
9578         mutex_unlock(&event_mutex);
9579
9580         return ret;
9581 }
9582
9583 static __init void create_trace_instances(struct dentry *d_tracer)
9584 {
9585         struct trace_array *tr;
9586
9587         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9588                                                          instance_mkdir,
9589                                                          instance_rmdir);
9590         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9591                 return;
9592
9593         mutex_lock(&event_mutex);
9594         mutex_lock(&trace_types_lock);
9595
9596         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9597                 if (!tr->name)
9598                         continue;
9599                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9600                              "Failed to create instance directory\n"))
9601                         break;
9602         }
9603
9604         mutex_unlock(&trace_types_lock);
9605         mutex_unlock(&event_mutex);
9606 }
9607
9608 static void
9609 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9610 {
9611         struct trace_event_file *file;
9612         int cpu;
9613
9614         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9615                         tr, &show_traces_fops);
9616
9617         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9618                         tr, &set_tracer_fops);
9619
9620         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9621                           tr, &tracing_cpumask_fops);
9622
9623         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9624                           tr, &tracing_iter_fops);
9625
9626         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9627                           tr, &tracing_fops);
9628
9629         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9630                           tr, &tracing_pipe_fops);
9631
9632         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9633                           tr, &tracing_entries_fops);
9634
9635         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9636                           tr, &tracing_total_entries_fops);
9637
9638         trace_create_file("free_buffer", 0200, d_tracer,
9639                           tr, &tracing_free_buffer_fops);
9640
9641         trace_create_file("trace_marker", 0220, d_tracer,
9642                           tr, &tracing_mark_fops);
9643
9644         file = __find_event_file(tr, "ftrace", "print");
9645         if (file && file->dir)
9646                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9647                                   file, &event_trigger_fops);
9648         tr->trace_marker_file = file;
9649
9650         trace_create_file("trace_marker_raw", 0220, d_tracer,
9651                           tr, &tracing_mark_raw_fops);
9652
9653         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9654                           &trace_clock_fops);
9655
9656         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9657                           tr, &rb_simple_fops);
9658
9659         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9660                           &trace_time_stamp_mode_fops);
9661
9662         tr->buffer_percent = 50;
9663
9664         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9665                         tr, &buffer_percent_fops);
9666
9667         create_trace_options_dir(tr);
9668
9669 #ifdef CONFIG_TRACER_MAX_TRACE
9670         trace_create_maxlat_file(tr, d_tracer);
9671 #endif
9672
9673         if (ftrace_create_function_files(tr, d_tracer))
9674                 MEM_FAIL(1, "Could not allocate function filter files");
9675
9676 #ifdef CONFIG_TRACER_SNAPSHOT
9677         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9678                           tr, &snapshot_fops);
9679 #endif
9680
9681         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9682                           tr, &tracing_err_log_fops);
9683
9684         for_each_tracing_cpu(cpu)
9685                 tracing_init_tracefs_percpu(tr, cpu);
9686
9687         ftrace_init_tracefs(tr, d_tracer);
9688 }
9689
9690 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9691 {
9692         struct vfsmount *mnt;
9693         struct file_system_type *type;
9694
9695         /*
9696          * To maintain backward compatibility for tools that mount
9697          * debugfs to get to the tracing facility, tracefs is automatically
9698          * mounted to the debugfs/tracing directory.
9699          */
9700         type = get_fs_type("tracefs");
9701         if (!type)
9702                 return NULL;
9703         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9704         put_filesystem(type);
9705         if (IS_ERR(mnt))
9706                 return NULL;
9707         mntget(mnt);
9708
9709         return mnt;
9710 }
9711
9712 /**
9713  * tracing_init_dentry - initialize top level trace array
9714  *
9715  * This is called when creating files or directories in the tracing
9716  * directory. It is called via fs_initcall() by any of the boot up code
9717  * and expects to return the dentry of the top level tracing directory.
9718  */
9719 int tracing_init_dentry(void)
9720 {
9721         struct trace_array *tr = &global_trace;
9722
9723         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9724                 pr_warn("Tracing disabled due to lockdown\n");
9725                 return -EPERM;
9726         }
9727
9728         /* The top level trace array uses  NULL as parent */
9729         if (tr->dir)
9730                 return 0;
9731
9732         if (WARN_ON(!tracefs_initialized()))
9733                 return -ENODEV;
9734
9735         /*
9736          * As there may still be users that expect the tracing
9737          * files to exist in debugfs/tracing, we must automount
9738          * the tracefs file system there, so older tools still
9739          * work with the newer kernel.
9740          */
9741         tr->dir = debugfs_create_automount("tracing", NULL,
9742                                            trace_automount, NULL);
9743
9744         return 0;
9745 }
9746
9747 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9748 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9749
9750 static struct workqueue_struct *eval_map_wq __initdata;
9751 static struct work_struct eval_map_work __initdata;
9752 static struct work_struct tracerfs_init_work __initdata;
9753
9754 static void __init eval_map_work_func(struct work_struct *work)
9755 {
9756         int len;
9757
9758         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9759         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9760 }
9761
9762 static int __init trace_eval_init(void)
9763 {
9764         INIT_WORK(&eval_map_work, eval_map_work_func);
9765
9766         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9767         if (!eval_map_wq) {
9768                 pr_err("Unable to allocate eval_map_wq\n");
9769                 /* Do work here */
9770                 eval_map_work_func(&eval_map_work);
9771                 return -ENOMEM;
9772         }
9773
9774         queue_work(eval_map_wq, &eval_map_work);
9775         return 0;
9776 }
9777
9778 subsys_initcall(trace_eval_init);
9779
9780 static int __init trace_eval_sync(void)
9781 {
9782         /* Make sure the eval map updates are finished */
9783         if (eval_map_wq)
9784                 destroy_workqueue(eval_map_wq);
9785         return 0;
9786 }
9787
9788 late_initcall_sync(trace_eval_sync);
9789
9790
9791 #ifdef CONFIG_MODULES
9792 static void trace_module_add_evals(struct module *mod)
9793 {
9794         if (!mod->num_trace_evals)
9795                 return;
9796
9797         /*
9798          * Modules with bad taint do not have events created, do
9799          * not bother with enums either.
9800          */
9801         if (trace_module_has_bad_taint(mod))
9802                 return;
9803
9804         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9805 }
9806
9807 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9808 static void trace_module_remove_evals(struct module *mod)
9809 {
9810         union trace_eval_map_item *map;
9811         union trace_eval_map_item **last = &trace_eval_maps;
9812
9813         if (!mod->num_trace_evals)
9814                 return;
9815
9816         mutex_lock(&trace_eval_mutex);
9817
9818         map = trace_eval_maps;
9819
9820         while (map) {
9821                 if (map->head.mod == mod)
9822                         break;
9823                 map = trace_eval_jmp_to_tail(map);
9824                 last = &map->tail.next;
9825                 map = map->tail.next;
9826         }
9827         if (!map)
9828                 goto out;
9829
9830         *last = trace_eval_jmp_to_tail(map)->tail.next;
9831         kfree(map);
9832  out:
9833         mutex_unlock(&trace_eval_mutex);
9834 }
9835 #else
9836 static inline void trace_module_remove_evals(struct module *mod) { }
9837 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9838
9839 static int trace_module_notify(struct notifier_block *self,
9840                                unsigned long val, void *data)
9841 {
9842         struct module *mod = data;
9843
9844         switch (val) {
9845         case MODULE_STATE_COMING:
9846                 trace_module_add_evals(mod);
9847                 break;
9848         case MODULE_STATE_GOING:
9849                 trace_module_remove_evals(mod);
9850                 break;
9851         }
9852
9853         return NOTIFY_OK;
9854 }
9855
9856 static struct notifier_block trace_module_nb = {
9857         .notifier_call = trace_module_notify,
9858         .priority = 0,
9859 };
9860 #endif /* CONFIG_MODULES */
9861
9862 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9863 {
9864
9865         event_trace_init();
9866
9867         init_tracer_tracefs(&global_trace, NULL);
9868         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9869
9870         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9871                         &global_trace, &tracing_thresh_fops);
9872
9873         trace_create_file("README", TRACE_MODE_READ, NULL,
9874                         NULL, &tracing_readme_fops);
9875
9876         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9877                         NULL, &tracing_saved_cmdlines_fops);
9878
9879         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9880                           NULL, &tracing_saved_cmdlines_size_fops);
9881
9882         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9883                         NULL, &tracing_saved_tgids_fops);
9884
9885         trace_create_eval_file(NULL);
9886
9887 #ifdef CONFIG_MODULES
9888         register_module_notifier(&trace_module_nb);
9889 #endif
9890
9891 #ifdef CONFIG_DYNAMIC_FTRACE
9892         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9893                         NULL, &tracing_dyn_info_fops);
9894 #endif
9895
9896         create_trace_instances(NULL);
9897
9898         update_tracer_options(&global_trace);
9899 }
9900
9901 static __init int tracer_init_tracefs(void)
9902 {
9903         int ret;
9904
9905         trace_access_lock_init();
9906
9907         ret = tracing_init_dentry();
9908         if (ret)
9909                 return 0;
9910
9911         if (eval_map_wq) {
9912                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9913                 queue_work(eval_map_wq, &tracerfs_init_work);
9914         } else {
9915                 tracer_init_tracefs_work_func(NULL);
9916         }
9917
9918         rv_init_interface();
9919
9920         return 0;
9921 }
9922
9923 fs_initcall(tracer_init_tracefs);
9924
9925 static int trace_die_panic_handler(struct notifier_block *self,
9926                                 unsigned long ev, void *unused);
9927
9928 static struct notifier_block trace_panic_notifier = {
9929         .notifier_call = trace_die_panic_handler,
9930         .priority = INT_MAX - 1,
9931 };
9932
9933 static struct notifier_block trace_die_notifier = {
9934         .notifier_call = trace_die_panic_handler,
9935         .priority = INT_MAX - 1,
9936 };
9937
9938 /*
9939  * The idea is to execute the following die/panic callback early, in order
9940  * to avoid showing irrelevant information in the trace (like other panic
9941  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9942  * warnings get disabled (to prevent potential log flooding).
9943  */
9944 static int trace_die_panic_handler(struct notifier_block *self,
9945                                 unsigned long ev, void *unused)
9946 {
9947         if (!ftrace_dump_on_oops)
9948                 return NOTIFY_DONE;
9949
9950         /* The die notifier requires DIE_OOPS to trigger */
9951         if (self == &trace_die_notifier && ev != DIE_OOPS)
9952                 return NOTIFY_DONE;
9953
9954         ftrace_dump(ftrace_dump_on_oops);
9955
9956         return NOTIFY_DONE;
9957 }
9958
9959 /*
9960  * printk is set to max of 1024, we really don't need it that big.
9961  * Nothing should be printing 1000 characters anyway.
9962  */
9963 #define TRACE_MAX_PRINT         1000
9964
9965 /*
9966  * Define here KERN_TRACE so that we have one place to modify
9967  * it if we decide to change what log level the ftrace dump
9968  * should be at.
9969  */
9970 #define KERN_TRACE              KERN_EMERG
9971
9972 void
9973 trace_printk_seq(struct trace_seq *s)
9974 {
9975         /* Probably should print a warning here. */
9976         if (s->seq.len >= TRACE_MAX_PRINT)
9977                 s->seq.len = TRACE_MAX_PRINT;
9978
9979         /*
9980          * More paranoid code. Although the buffer size is set to
9981          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9982          * an extra layer of protection.
9983          */
9984         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9985                 s->seq.len = s->seq.size - 1;
9986
9987         /* should be zero ended, but we are paranoid. */
9988         s->buffer[s->seq.len] = 0;
9989
9990         printk(KERN_TRACE "%s", s->buffer);
9991
9992         trace_seq_init(s);
9993 }
9994
9995 void trace_init_global_iter(struct trace_iterator *iter)
9996 {
9997         iter->tr = &global_trace;
9998         iter->trace = iter->tr->current_trace;
9999         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10000         iter->array_buffer = &global_trace.array_buffer;
10001
10002         if (iter->trace && iter->trace->open)
10003                 iter->trace->open(iter);
10004
10005         /* Annotate start of buffers if we had overruns */
10006         if (ring_buffer_overruns(iter->array_buffer->buffer))
10007                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10008
10009         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10010         if (trace_clocks[iter->tr->clock_id].in_ns)
10011                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10012
10013         /* Can not use kmalloc for iter.temp and iter.fmt */
10014         iter->temp = static_temp_buf;
10015         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10016         iter->fmt = static_fmt_buf;
10017         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10018 }
10019
10020 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10021 {
10022         /* use static because iter can be a bit big for the stack */
10023         static struct trace_iterator iter;
10024         static atomic_t dump_running;
10025         struct trace_array *tr = &global_trace;
10026         unsigned int old_userobj;
10027         unsigned long flags;
10028         int cnt = 0, cpu;
10029
10030         /* Only allow one dump user at a time. */
10031         if (atomic_inc_return(&dump_running) != 1) {
10032                 atomic_dec(&dump_running);
10033                 return;
10034         }
10035
10036         /*
10037          * Always turn off tracing when we dump.
10038          * We don't need to show trace output of what happens
10039          * between multiple crashes.
10040          *
10041          * If the user does a sysrq-z, then they can re-enable
10042          * tracing with echo 1 > tracing_on.
10043          */
10044         tracing_off();
10045
10046         local_irq_save(flags);
10047
10048         /* Simulate the iterator */
10049         trace_init_global_iter(&iter);
10050
10051         for_each_tracing_cpu(cpu) {
10052                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10053         }
10054
10055         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10056
10057         /* don't look at user memory in panic mode */
10058         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10059
10060         switch (oops_dump_mode) {
10061         case DUMP_ALL:
10062                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10063                 break;
10064         case DUMP_ORIG:
10065                 iter.cpu_file = raw_smp_processor_id();
10066                 break;
10067         case DUMP_NONE:
10068                 goto out_enable;
10069         default:
10070                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10071                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10072         }
10073
10074         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10075
10076         /* Did function tracer already get disabled? */
10077         if (ftrace_is_dead()) {
10078                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10079                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10080         }
10081
10082         /*
10083          * We need to stop all tracing on all CPUS to read
10084          * the next buffer. This is a bit expensive, but is
10085          * not done often. We fill all what we can read,
10086          * and then release the locks again.
10087          */
10088
10089         while (!trace_empty(&iter)) {
10090
10091                 if (!cnt)
10092                         printk(KERN_TRACE "---------------------------------\n");
10093
10094                 cnt++;
10095
10096                 trace_iterator_reset(&iter);
10097                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10098
10099                 if (trace_find_next_entry_inc(&iter) != NULL) {
10100                         int ret;
10101
10102                         ret = print_trace_line(&iter);
10103                         if (ret != TRACE_TYPE_NO_CONSUME)
10104                                 trace_consume(&iter);
10105                 }
10106                 touch_nmi_watchdog();
10107
10108                 trace_printk_seq(&iter.seq);
10109         }
10110
10111         if (!cnt)
10112                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10113         else
10114                 printk(KERN_TRACE "---------------------------------\n");
10115
10116  out_enable:
10117         tr->trace_flags |= old_userobj;
10118
10119         for_each_tracing_cpu(cpu) {
10120                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10121         }
10122         atomic_dec(&dump_running);
10123         local_irq_restore(flags);
10124 }
10125 EXPORT_SYMBOL_GPL(ftrace_dump);
10126
10127 #define WRITE_BUFSIZE  4096
10128
10129 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10130                                 size_t count, loff_t *ppos,
10131                                 int (*createfn)(const char *))
10132 {
10133         char *kbuf, *buf, *tmp;
10134         int ret = 0;
10135         size_t done = 0;
10136         size_t size;
10137
10138         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10139         if (!kbuf)
10140                 return -ENOMEM;
10141
10142         while (done < count) {
10143                 size = count - done;
10144
10145                 if (size >= WRITE_BUFSIZE)
10146                         size = WRITE_BUFSIZE - 1;
10147
10148                 if (copy_from_user(kbuf, buffer + done, size)) {
10149                         ret = -EFAULT;
10150                         goto out;
10151                 }
10152                 kbuf[size] = '\0';
10153                 buf = kbuf;
10154                 do {
10155                         tmp = strchr(buf, '\n');
10156                         if (tmp) {
10157                                 *tmp = '\0';
10158                                 size = tmp - buf + 1;
10159                         } else {
10160                                 size = strlen(buf);
10161                                 if (done + size < count) {
10162                                         if (buf != kbuf)
10163                                                 break;
10164                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10165                                         pr_warn("Line length is too long: Should be less than %d\n",
10166                                                 WRITE_BUFSIZE - 2);
10167                                         ret = -EINVAL;
10168                                         goto out;
10169                                 }
10170                         }
10171                         done += size;
10172
10173                         /* Remove comments */
10174                         tmp = strchr(buf, '#');
10175
10176                         if (tmp)
10177                                 *tmp = '\0';
10178
10179                         ret = createfn(buf);
10180                         if (ret)
10181                                 goto out;
10182                         buf += size;
10183
10184                 } while (done < count);
10185         }
10186         ret = done;
10187
10188 out:
10189         kfree(kbuf);
10190
10191         return ret;
10192 }
10193
10194 #ifdef CONFIG_TRACER_MAX_TRACE
10195 __init static bool tr_needs_alloc_snapshot(const char *name)
10196 {
10197         char *test;
10198         int len = strlen(name);
10199         bool ret;
10200
10201         if (!boot_snapshot_index)
10202                 return false;
10203
10204         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10205             boot_snapshot_info[len] == '\t')
10206                 return true;
10207
10208         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10209         if (!test)
10210                 return false;
10211
10212         sprintf(test, "\t%s\t", name);
10213         ret = strstr(boot_snapshot_info, test) == NULL;
10214         kfree(test);
10215         return ret;
10216 }
10217
10218 __init static void do_allocate_snapshot(const char *name)
10219 {
10220         if (!tr_needs_alloc_snapshot(name))
10221                 return;
10222
10223         /*
10224          * When allocate_snapshot is set, the next call to
10225          * allocate_trace_buffers() (called by trace_array_get_by_name())
10226          * will allocate the snapshot buffer. That will alse clear
10227          * this flag.
10228          */
10229         allocate_snapshot = true;
10230 }
10231 #else
10232 static inline void do_allocate_snapshot(const char *name) { }
10233 #endif
10234
10235 __init static void enable_instances(void)
10236 {
10237         struct trace_array *tr;
10238         char *curr_str;
10239         char *str;
10240         char *tok;
10241
10242         /* A tab is always appended */
10243         boot_instance_info[boot_instance_index - 1] = '\0';
10244         str = boot_instance_info;
10245
10246         while ((curr_str = strsep(&str, "\t"))) {
10247
10248                 tok = strsep(&curr_str, ",");
10249
10250                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10251                         do_allocate_snapshot(tok);
10252
10253                 tr = trace_array_get_by_name(tok);
10254                 if (!tr) {
10255                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10256                         continue;
10257                 }
10258                 /* Allow user space to delete it */
10259                 trace_array_put(tr);
10260
10261                 while ((tok = strsep(&curr_str, ","))) {
10262                         early_enable_events(tr, tok, true);
10263                 }
10264         }
10265 }
10266
10267 __init static int tracer_alloc_buffers(void)
10268 {
10269         int ring_buf_size;
10270         int ret = -ENOMEM;
10271
10272
10273         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10274                 pr_warn("Tracing disabled due to lockdown\n");
10275                 return -EPERM;
10276         }
10277
10278         /*
10279          * Make sure we don't accidentally add more trace options
10280          * than we have bits for.
10281          */
10282         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10283
10284         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10285                 goto out;
10286
10287         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10288                 goto out_free_buffer_mask;
10289
10290         /* Only allocate trace_printk buffers if a trace_printk exists */
10291         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10292                 /* Must be called before global_trace.buffer is allocated */
10293                 trace_printk_init_buffers();
10294
10295         /* To save memory, keep the ring buffer size to its minimum */
10296         if (ring_buffer_expanded)
10297                 ring_buf_size = trace_buf_size;
10298         else
10299                 ring_buf_size = 1;
10300
10301         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10302         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10303
10304         raw_spin_lock_init(&global_trace.start_lock);
10305
10306         /*
10307          * The prepare callbacks allocates some memory for the ring buffer. We
10308          * don't free the buffer if the CPU goes down. If we were to free
10309          * the buffer, then the user would lose any trace that was in the
10310          * buffer. The memory will be removed once the "instance" is removed.
10311          */
10312         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10313                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10314                                       NULL);
10315         if (ret < 0)
10316                 goto out_free_cpumask;
10317         /* Used for event triggers */
10318         ret = -ENOMEM;
10319         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10320         if (!temp_buffer)
10321                 goto out_rm_hp_state;
10322
10323         if (trace_create_savedcmd() < 0)
10324                 goto out_free_temp_buffer;
10325
10326         /* TODO: make the number of buffers hot pluggable with CPUS */
10327         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10328                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10329                 goto out_free_savedcmd;
10330         }
10331
10332         if (global_trace.buffer_disabled)
10333                 tracing_off();
10334
10335         if (trace_boot_clock) {
10336                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10337                 if (ret < 0)
10338                         pr_warn("Trace clock %s not defined, going back to default\n",
10339                                 trace_boot_clock);
10340         }
10341
10342         /*
10343          * register_tracer() might reference current_trace, so it
10344          * needs to be set before we register anything. This is
10345          * just a bootstrap of current_trace anyway.
10346          */
10347         global_trace.current_trace = &nop_trace;
10348
10349         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10350
10351         ftrace_init_global_array_ops(&global_trace);
10352
10353         init_trace_flags_index(&global_trace);
10354
10355         register_tracer(&nop_trace);
10356
10357         /* Function tracing may start here (via kernel command line) */
10358         init_function_trace();
10359
10360         /* All seems OK, enable tracing */
10361         tracing_disabled = 0;
10362
10363         atomic_notifier_chain_register(&panic_notifier_list,
10364                                        &trace_panic_notifier);
10365
10366         register_die_notifier(&trace_die_notifier);
10367
10368         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10369
10370         INIT_LIST_HEAD(&global_trace.systems);
10371         INIT_LIST_HEAD(&global_trace.events);
10372         INIT_LIST_HEAD(&global_trace.hist_vars);
10373         INIT_LIST_HEAD(&global_trace.err_log);
10374         list_add(&global_trace.list, &ftrace_trace_arrays);
10375
10376         apply_trace_boot_options();
10377
10378         register_snapshot_cmd();
10379
10380         test_can_verify();
10381
10382         return 0;
10383
10384 out_free_savedcmd:
10385         free_saved_cmdlines_buffer(savedcmd);
10386 out_free_temp_buffer:
10387         ring_buffer_free(temp_buffer);
10388 out_rm_hp_state:
10389         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10390 out_free_cpumask:
10391         free_cpumask_var(global_trace.tracing_cpumask);
10392 out_free_buffer_mask:
10393         free_cpumask_var(tracing_buffer_mask);
10394 out:
10395         return ret;
10396 }
10397
10398 void __init ftrace_boot_snapshot(void)
10399 {
10400 #ifdef CONFIG_TRACER_MAX_TRACE
10401         struct trace_array *tr;
10402
10403         if (!snapshot_at_boot)
10404                 return;
10405
10406         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10407                 if (!tr->allocated_snapshot)
10408                         continue;
10409
10410                 tracing_snapshot_instance(tr);
10411                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10412         }
10413 #endif
10414 }
10415
10416 void __init early_trace_init(void)
10417 {
10418         if (tracepoint_printk) {
10419                 tracepoint_print_iter =
10420                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10421                 if (MEM_FAIL(!tracepoint_print_iter,
10422                              "Failed to allocate trace iterator\n"))
10423                         tracepoint_printk = 0;
10424                 else
10425                         static_key_enable(&tracepoint_printk_key.key);
10426         }
10427         tracer_alloc_buffers();
10428
10429         init_events();
10430 }
10431
10432 void __init trace_init(void)
10433 {
10434         trace_event_init();
10435
10436         if (boot_instance_index)
10437                 enable_instances();
10438 }
10439
10440 __init static void clear_boot_tracer(void)
10441 {
10442         /*
10443          * The default tracer at boot buffer is an init section.
10444          * This function is called in lateinit. If we did not
10445          * find the boot tracer, then clear it out, to prevent
10446          * later registration from accessing the buffer that is
10447          * about to be freed.
10448          */
10449         if (!default_bootup_tracer)
10450                 return;
10451
10452         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10453                default_bootup_tracer);
10454         default_bootup_tracer = NULL;
10455 }
10456
10457 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10458 __init static void tracing_set_default_clock(void)
10459 {
10460         /* sched_clock_stable() is determined in late_initcall */
10461         if (!trace_boot_clock && !sched_clock_stable()) {
10462                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10463                         pr_warn("Can not set tracing clock due to lockdown\n");
10464                         return;
10465                 }
10466
10467                 printk(KERN_WARNING
10468                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10469                        "If you want to keep using the local clock, then add:\n"
10470                        "  \"trace_clock=local\"\n"
10471                        "on the kernel command line\n");
10472                 tracing_set_clock(&global_trace, "global");
10473         }
10474 }
10475 #else
10476 static inline void tracing_set_default_clock(void) { }
10477 #endif
10478
10479 __init static int late_trace_init(void)
10480 {
10481         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10482                 static_key_disable(&tracepoint_printk_key.key);
10483                 tracepoint_printk = 0;
10484         }
10485
10486         tracing_set_default_clock();
10487         clear_boot_tracer();
10488         return 0;
10489 }
10490
10491 late_initcall_sync(late_trace_init);