Merge remote-tracking branch 'torvalds/master' into perf/core
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 static int *tgid_map;
2188
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194         unsigned *map_cmdline_to_pid;
2195         unsigned cmdline_num;
2196         int cmdline_idx;
2197         char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200
2201 static inline char *get_saved_cmdlines(int idx)
2202 {
2203         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2204 }
2205
2206 static inline void set_cmdline(int idx, const char *cmdline)
2207 {
2208         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2209 }
2210
2211 static int allocate_cmdlines_buffer(unsigned int val,
2212                                     struct saved_cmdlines_buffer *s)
2213 {
2214         s->map_cmdline_to_pid = kmalloc_array(val,
2215                                               sizeof(*s->map_cmdline_to_pid),
2216                                               GFP_KERNEL);
2217         if (!s->map_cmdline_to_pid)
2218                 return -ENOMEM;
2219
2220         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2221         if (!s->saved_cmdlines) {
2222                 kfree(s->map_cmdline_to_pid);
2223                 return -ENOMEM;
2224         }
2225
2226         s->cmdline_idx = 0;
2227         s->cmdline_num = val;
2228         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2229                sizeof(s->map_pid_to_cmdline));
2230         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2231                val * sizeof(*s->map_cmdline_to_pid));
2232
2233         return 0;
2234 }
2235
2236 static int trace_create_savedcmd(void)
2237 {
2238         int ret;
2239
2240         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2241         if (!savedcmd)
2242                 return -ENOMEM;
2243
2244         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2245         if (ret < 0) {
2246                 kfree(savedcmd);
2247                 savedcmd = NULL;
2248                 return -ENOMEM;
2249         }
2250
2251         return 0;
2252 }
2253
2254 int is_tracing_stopped(void)
2255 {
2256         return global_trace.stop_count;
2257 }
2258
2259 /**
2260  * tracing_start - quick start of the tracer
2261  *
2262  * If tracing is enabled but was stopped by tracing_stop,
2263  * this will start the tracer back up.
2264  */
2265 void tracing_start(void)
2266 {
2267         struct trace_buffer *buffer;
2268         unsigned long flags;
2269
2270         if (tracing_disabled)
2271                 return;
2272
2273         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2274         if (--global_trace.stop_count) {
2275                 if (global_trace.stop_count < 0) {
2276                         /* Someone screwed up their debugging */
2277                         WARN_ON_ONCE(1);
2278                         global_trace.stop_count = 0;
2279                 }
2280                 goto out;
2281         }
2282
2283         /* Prevent the buffers from switching */
2284         arch_spin_lock(&global_trace.max_lock);
2285
2286         buffer = global_trace.array_buffer.buffer;
2287         if (buffer)
2288                 ring_buffer_record_enable(buffer);
2289
2290 #ifdef CONFIG_TRACER_MAX_TRACE
2291         buffer = global_trace.max_buffer.buffer;
2292         if (buffer)
2293                 ring_buffer_record_enable(buffer);
2294 #endif
2295
2296         arch_spin_unlock(&global_trace.max_lock);
2297
2298  out:
2299         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2300 }
2301
2302 static void tracing_start_tr(struct trace_array *tr)
2303 {
2304         struct trace_buffer *buffer;
2305         unsigned long flags;
2306
2307         if (tracing_disabled)
2308                 return;
2309
2310         /* If global, we need to also start the max tracer */
2311         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2312                 return tracing_start();
2313
2314         raw_spin_lock_irqsave(&tr->start_lock, flags);
2315
2316         if (--tr->stop_count) {
2317                 if (tr->stop_count < 0) {
2318                         /* Someone screwed up their debugging */
2319                         WARN_ON_ONCE(1);
2320                         tr->stop_count = 0;
2321                 }
2322                 goto out;
2323         }
2324
2325         buffer = tr->array_buffer.buffer;
2326         if (buffer)
2327                 ring_buffer_record_enable(buffer);
2328
2329  out:
2330         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2331 }
2332
2333 /**
2334  * tracing_stop - quick stop of the tracer
2335  *
2336  * Light weight way to stop tracing. Use in conjunction with
2337  * tracing_start.
2338  */
2339 void tracing_stop(void)
2340 {
2341         struct trace_buffer *buffer;
2342         unsigned long flags;
2343
2344         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2345         if (global_trace.stop_count++)
2346                 goto out;
2347
2348         /* Prevent the buffers from switching */
2349         arch_spin_lock(&global_trace.max_lock);
2350
2351         buffer = global_trace.array_buffer.buffer;
2352         if (buffer)
2353                 ring_buffer_record_disable(buffer);
2354
2355 #ifdef CONFIG_TRACER_MAX_TRACE
2356         buffer = global_trace.max_buffer.buffer;
2357         if (buffer)
2358                 ring_buffer_record_disable(buffer);
2359 #endif
2360
2361         arch_spin_unlock(&global_trace.max_lock);
2362
2363  out:
2364         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2365 }
2366
2367 static void tracing_stop_tr(struct trace_array *tr)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         /* If global, we need to also stop the max tracer */
2373         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2374                 return tracing_stop();
2375
2376         raw_spin_lock_irqsave(&tr->start_lock, flags);
2377         if (tr->stop_count++)
2378                 goto out;
2379
2380         buffer = tr->array_buffer.buffer;
2381         if (buffer)
2382                 ring_buffer_record_disable(buffer);
2383
2384  out:
2385         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2386 }
2387
2388 static int trace_save_cmdline(struct task_struct *tsk)
2389 {
2390         unsigned tpid, idx;
2391
2392         /* treat recording of idle task as a success */
2393         if (!tsk->pid)
2394                 return 1;
2395
2396         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2397
2398         /*
2399          * It's not the end of the world if we don't get
2400          * the lock, but we also don't want to spin
2401          * nor do we want to disable interrupts,
2402          * so if we miss here, then better luck next time.
2403          */
2404         if (!arch_spin_trylock(&trace_cmdline_lock))
2405                 return 0;
2406
2407         idx = savedcmd->map_pid_to_cmdline[tpid];
2408         if (idx == NO_CMDLINE_MAP) {
2409                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2410
2411                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2412                 savedcmd->cmdline_idx = idx;
2413         }
2414
2415         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416         set_cmdline(idx, tsk->comm);
2417
2418         arch_spin_unlock(&trace_cmdline_lock);
2419
2420         return 1;
2421 }
2422
2423 static void __trace_find_cmdline(int pid, char comm[])
2424 {
2425         unsigned map;
2426         int tpid;
2427
2428         if (!pid) {
2429                 strcpy(comm, "<idle>");
2430                 return;
2431         }
2432
2433         if (WARN_ON_ONCE(pid < 0)) {
2434                 strcpy(comm, "<XXX>");
2435                 return;
2436         }
2437
2438         tpid = pid & (PID_MAX_DEFAULT - 1);
2439         map = savedcmd->map_pid_to_cmdline[tpid];
2440         if (map != NO_CMDLINE_MAP) {
2441                 tpid = savedcmd->map_cmdline_to_pid[map];
2442                 if (tpid == pid) {
2443                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2444                         return;
2445                 }
2446         }
2447         strcpy(comm, "<...>");
2448 }
2449
2450 void trace_find_cmdline(int pid, char comm[])
2451 {
2452         preempt_disable();
2453         arch_spin_lock(&trace_cmdline_lock);
2454
2455         __trace_find_cmdline(pid, comm);
2456
2457         arch_spin_unlock(&trace_cmdline_lock);
2458         preempt_enable();
2459 }
2460
2461 int trace_find_tgid(int pid)
2462 {
2463         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2464                 return 0;
2465
2466         return tgid_map[pid];
2467 }
2468
2469 static int trace_save_tgid(struct task_struct *tsk)
2470 {
2471         /* treat recording of idle task as a success */
2472         if (!tsk->pid)
2473                 return 1;
2474
2475         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2476                 return 0;
2477
2478         tgid_map[tsk->pid] = tsk->tgid;
2479         return 1;
2480 }
2481
2482 static bool tracing_record_taskinfo_skip(int flags)
2483 {
2484         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2485                 return true;
2486         if (!__this_cpu_read(trace_taskinfo_save))
2487                 return true;
2488         return false;
2489 }
2490
2491 /**
2492  * tracing_record_taskinfo - record the task info of a task
2493  *
2494  * @task:  task to record
2495  * @flags: TRACE_RECORD_CMDLINE for recording comm
2496  *         TRACE_RECORD_TGID for recording tgid
2497  */
2498 void tracing_record_taskinfo(struct task_struct *task, int flags)
2499 {
2500         bool done;
2501
2502         if (tracing_record_taskinfo_skip(flags))
2503                 return;
2504
2505         /*
2506          * Record as much task information as possible. If some fail, continue
2507          * to try to record the others.
2508          */
2509         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2510         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2511
2512         /* If recording any information failed, retry again soon. */
2513         if (!done)
2514                 return;
2515
2516         __this_cpu_write(trace_taskinfo_save, false);
2517 }
2518
2519 /**
2520  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2521  *
2522  * @prev: previous task during sched_switch
2523  * @next: next task during sched_switch
2524  * @flags: TRACE_RECORD_CMDLINE for recording comm
2525  *         TRACE_RECORD_TGID for recording tgid
2526  */
2527 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2528                                           struct task_struct *next, int flags)
2529 {
2530         bool done;
2531
2532         if (tracing_record_taskinfo_skip(flags))
2533                 return;
2534
2535         /*
2536          * Record as much task information as possible. If some fail, continue
2537          * to try to record the others.
2538          */
2539         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2540         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2541         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /* Helpers to record a specific task information */
2552 void tracing_record_cmdline(struct task_struct *task)
2553 {
2554         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2555 }
2556
2557 void tracing_record_tgid(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2560 }
2561
2562 /*
2563  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2564  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2565  * simplifies those functions and keeps them in sync.
2566  */
2567 enum print_line_t trace_handle_return(struct trace_seq *s)
2568 {
2569         return trace_seq_has_overflowed(s) ?
2570                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2571 }
2572 EXPORT_SYMBOL_GPL(trace_handle_return);
2573
2574 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2575 {
2576         unsigned int trace_flags = irqs_status;
2577         unsigned int pc;
2578
2579         pc = preempt_count();
2580
2581         if (pc & NMI_MASK)
2582                 trace_flags |= TRACE_FLAG_NMI;
2583         if (pc & HARDIRQ_MASK)
2584                 trace_flags |= TRACE_FLAG_HARDIRQ;
2585         if (in_serving_softirq())
2586                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2587
2588         if (tif_need_resched())
2589                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2590         if (test_preempt_need_resched())
2591                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2592         return (trace_flags << 16) | (pc & 0xff);
2593 }
2594
2595 struct ring_buffer_event *
2596 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2597                           int type,
2598                           unsigned long len,
2599                           unsigned int trace_ctx)
2600 {
2601         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2602 }
2603
2604 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2605 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2606 static int trace_buffered_event_ref;
2607
2608 /**
2609  * trace_buffered_event_enable - enable buffering events
2610  *
2611  * When events are being filtered, it is quicker to use a temporary
2612  * buffer to write the event data into if there's a likely chance
2613  * that it will not be committed. The discard of the ring buffer
2614  * is not as fast as committing, and is much slower than copying
2615  * a commit.
2616  *
2617  * When an event is to be filtered, allocate per cpu buffers to
2618  * write the event data into, and if the event is filtered and discarded
2619  * it is simply dropped, otherwise, the entire data is to be committed
2620  * in one shot.
2621  */
2622 void trace_buffered_event_enable(void)
2623 {
2624         struct ring_buffer_event *event;
2625         struct page *page;
2626         int cpu;
2627
2628         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2629
2630         if (trace_buffered_event_ref++)
2631                 return;
2632
2633         for_each_tracing_cpu(cpu) {
2634                 page = alloc_pages_node(cpu_to_node(cpu),
2635                                         GFP_KERNEL | __GFP_NORETRY, 0);
2636                 if (!page)
2637                         goto failed;
2638
2639                 event = page_address(page);
2640                 memset(event, 0, sizeof(*event));
2641
2642                 per_cpu(trace_buffered_event, cpu) = event;
2643
2644                 preempt_disable();
2645                 if (cpu == smp_processor_id() &&
2646                     __this_cpu_read(trace_buffered_event) !=
2647                     per_cpu(trace_buffered_event, cpu))
2648                         WARN_ON_ONCE(1);
2649                 preempt_enable();
2650         }
2651
2652         return;
2653  failed:
2654         trace_buffered_event_disable();
2655 }
2656
2657 static void enable_trace_buffered_event(void *data)
2658 {
2659         /* Probably not needed, but do it anyway */
2660         smp_rmb();
2661         this_cpu_dec(trace_buffered_event_cnt);
2662 }
2663
2664 static void disable_trace_buffered_event(void *data)
2665 {
2666         this_cpu_inc(trace_buffered_event_cnt);
2667 }
2668
2669 /**
2670  * trace_buffered_event_disable - disable buffering events
2671  *
2672  * When a filter is removed, it is faster to not use the buffered
2673  * events, and to commit directly into the ring buffer. Free up
2674  * the temp buffers when there are no more users. This requires
2675  * special synchronization with current events.
2676  */
2677 void trace_buffered_event_disable(void)
2678 {
2679         int cpu;
2680
2681         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2682
2683         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2684                 return;
2685
2686         if (--trace_buffered_event_ref)
2687                 return;
2688
2689         preempt_disable();
2690         /* For each CPU, set the buffer as used. */
2691         smp_call_function_many(tracing_buffer_mask,
2692                                disable_trace_buffered_event, NULL, 1);
2693         preempt_enable();
2694
2695         /* Wait for all current users to finish */
2696         synchronize_rcu();
2697
2698         for_each_tracing_cpu(cpu) {
2699                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2700                 per_cpu(trace_buffered_event, cpu) = NULL;
2701         }
2702         /*
2703          * Make sure trace_buffered_event is NULL before clearing
2704          * trace_buffered_event_cnt.
2705          */
2706         smp_wmb();
2707
2708         preempt_disable();
2709         /* Do the work on each cpu */
2710         smp_call_function_many(tracing_buffer_mask,
2711                                enable_trace_buffered_event, NULL, 1);
2712         preempt_enable();
2713 }
2714
2715 static struct trace_buffer *temp_buffer;
2716
2717 struct ring_buffer_event *
2718 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2719                           struct trace_event_file *trace_file,
2720                           int type, unsigned long len,
2721                           unsigned int trace_ctx)
2722 {
2723         struct ring_buffer_event *entry;
2724         struct trace_array *tr = trace_file->tr;
2725         int val;
2726
2727         *current_rb = tr->array_buffer.buffer;
2728
2729         if (!tr->no_filter_buffering_ref &&
2730             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2731             (entry = this_cpu_read(trace_buffered_event))) {
2732                 /* Try to use the per cpu buffer first */
2733                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2734                 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2735                         trace_event_setup(entry, type, trace_ctx);
2736                         entry->array[0] = len;
2737                         return entry;
2738                 }
2739                 this_cpu_dec(trace_buffered_event_cnt);
2740         }
2741
2742         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2743                                             trace_ctx);
2744         /*
2745          * If tracing is off, but we have triggers enabled
2746          * we still need to look at the event data. Use the temp_buffer
2747          * to store the trace event for the trigger to use. It's recursive
2748          * safe and will not be recorded anywhere.
2749          */
2750         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2751                 *current_rb = temp_buffer;
2752                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2753                                                     trace_ctx);
2754         }
2755         return entry;
2756 }
2757 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2758
2759 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2760 static DEFINE_MUTEX(tracepoint_printk_mutex);
2761
2762 static void output_printk(struct trace_event_buffer *fbuffer)
2763 {
2764         struct trace_event_call *event_call;
2765         struct trace_event_file *file;
2766         struct trace_event *event;
2767         unsigned long flags;
2768         struct trace_iterator *iter = tracepoint_print_iter;
2769
2770         /* We should never get here if iter is NULL */
2771         if (WARN_ON_ONCE(!iter))
2772                 return;
2773
2774         event_call = fbuffer->trace_file->event_call;
2775         if (!event_call || !event_call->event.funcs ||
2776             !event_call->event.funcs->trace)
2777                 return;
2778
2779         file = fbuffer->trace_file;
2780         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2781             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2782              !filter_match_preds(file->filter, fbuffer->entry)))
2783                 return;
2784
2785         event = &fbuffer->trace_file->event_call->event;
2786
2787         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2788         trace_seq_init(&iter->seq);
2789         iter->ent = fbuffer->entry;
2790         event_call->event.funcs->trace(iter, 0, event);
2791         trace_seq_putc(&iter->seq, 0);
2792         printk("%s", iter->seq.buffer);
2793
2794         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2795 }
2796
2797 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2798                              void *buffer, size_t *lenp,
2799                              loff_t *ppos)
2800 {
2801         int save_tracepoint_printk;
2802         int ret;
2803
2804         mutex_lock(&tracepoint_printk_mutex);
2805         save_tracepoint_printk = tracepoint_printk;
2806
2807         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2808
2809         /*
2810          * This will force exiting early, as tracepoint_printk
2811          * is always zero when tracepoint_printk_iter is not allocated
2812          */
2813         if (!tracepoint_print_iter)
2814                 tracepoint_printk = 0;
2815
2816         if (save_tracepoint_printk == tracepoint_printk)
2817                 goto out;
2818
2819         if (tracepoint_printk)
2820                 static_key_enable(&tracepoint_printk_key.key);
2821         else
2822                 static_key_disable(&tracepoint_printk_key.key);
2823
2824  out:
2825         mutex_unlock(&tracepoint_printk_mutex);
2826
2827         return ret;
2828 }
2829
2830 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2831 {
2832         if (static_key_false(&tracepoint_printk_key.key))
2833                 output_printk(fbuffer);
2834
2835         if (static_branch_unlikely(&trace_event_exports_enabled))
2836                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2837         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2838                                     fbuffer->event, fbuffer->entry,
2839                                     fbuffer->trace_ctx, fbuffer->regs);
2840 }
2841 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2842
2843 /*
2844  * Skip 3:
2845  *
2846  *   trace_buffer_unlock_commit_regs()
2847  *   trace_event_buffer_commit()
2848  *   trace_event_raw_event_xxx()
2849  */
2850 # define STACK_SKIP 3
2851
2852 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2853                                      struct trace_buffer *buffer,
2854                                      struct ring_buffer_event *event,
2855                                      unsigned int trace_ctx,
2856                                      struct pt_regs *regs)
2857 {
2858         __buffer_unlock_commit(buffer, event);
2859
2860         /*
2861          * If regs is not set, then skip the necessary functions.
2862          * Note, we can still get here via blktrace, wakeup tracer
2863          * and mmiotrace, but that's ok if they lose a function or
2864          * two. They are not that meaningful.
2865          */
2866         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2867         ftrace_trace_userstack(tr, buffer, trace_ctx);
2868 }
2869
2870 /*
2871  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2872  */
2873 void
2874 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2875                                    struct ring_buffer_event *event)
2876 {
2877         __buffer_unlock_commit(buffer, event);
2878 }
2879
2880 void
2881 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2882                parent_ip, unsigned int trace_ctx)
2883 {
2884         struct trace_event_call *call = &event_function;
2885         struct trace_buffer *buffer = tr->array_buffer.buffer;
2886         struct ring_buffer_event *event;
2887         struct ftrace_entry *entry;
2888
2889         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2890                                             trace_ctx);
2891         if (!event)
2892                 return;
2893         entry   = ring_buffer_event_data(event);
2894         entry->ip                       = ip;
2895         entry->parent_ip                = parent_ip;
2896
2897         if (!call_filter_check_discard(call, entry, buffer, event)) {
2898                 if (static_branch_unlikely(&trace_function_exports_enabled))
2899                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2900                 __buffer_unlock_commit(buffer, event);
2901         }
2902 }
2903
2904 #ifdef CONFIG_STACKTRACE
2905
2906 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2907 #define FTRACE_KSTACK_NESTING   4
2908
2909 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2910
2911 struct ftrace_stack {
2912         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2913 };
2914
2915
2916 struct ftrace_stacks {
2917         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2918 };
2919
2920 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2921 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2922
2923 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2924                                  unsigned int trace_ctx,
2925                                  int skip, struct pt_regs *regs)
2926 {
2927         struct trace_event_call *call = &event_kernel_stack;
2928         struct ring_buffer_event *event;
2929         unsigned int size, nr_entries;
2930         struct ftrace_stack *fstack;
2931         struct stack_entry *entry;
2932         int stackidx;
2933
2934         /*
2935          * Add one, for this function and the call to save_stack_trace()
2936          * If regs is set, then these functions will not be in the way.
2937          */
2938 #ifndef CONFIG_UNWINDER_ORC
2939         if (!regs)
2940                 skip++;
2941 #endif
2942
2943         preempt_disable_notrace();
2944
2945         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2946
2947         /* This should never happen. If it does, yell once and skip */
2948         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2949                 goto out;
2950
2951         /*
2952          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2953          * interrupt will either see the value pre increment or post
2954          * increment. If the interrupt happens pre increment it will have
2955          * restored the counter when it returns.  We just need a barrier to
2956          * keep gcc from moving things around.
2957          */
2958         barrier();
2959
2960         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2961         size = ARRAY_SIZE(fstack->calls);
2962
2963         if (regs) {
2964                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2965                                                    size, skip);
2966         } else {
2967                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2968         }
2969
2970         size = nr_entries * sizeof(unsigned long);
2971         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2972                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2973                                     trace_ctx);
2974         if (!event)
2975                 goto out;
2976         entry = ring_buffer_event_data(event);
2977
2978         memcpy(&entry->caller, fstack->calls, size);
2979         entry->size = nr_entries;
2980
2981         if (!call_filter_check_discard(call, entry, buffer, event))
2982                 __buffer_unlock_commit(buffer, event);
2983
2984  out:
2985         /* Again, don't let gcc optimize things here */
2986         barrier();
2987         __this_cpu_dec(ftrace_stack_reserve);
2988         preempt_enable_notrace();
2989
2990 }
2991
2992 static inline void ftrace_trace_stack(struct trace_array *tr,
2993                                       struct trace_buffer *buffer,
2994                                       unsigned int trace_ctx,
2995                                       int skip, struct pt_regs *regs)
2996 {
2997         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2998                 return;
2999
3000         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3001 }
3002
3003 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3004                    int skip)
3005 {
3006         struct trace_buffer *buffer = tr->array_buffer.buffer;
3007
3008         if (rcu_is_watching()) {
3009                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3010                 return;
3011         }
3012
3013         /*
3014          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3015          * but if the above rcu_is_watching() failed, then the NMI
3016          * triggered someplace critical, and rcu_irq_enter() should
3017          * not be called from NMI.
3018          */
3019         if (unlikely(in_nmi()))
3020                 return;
3021
3022         rcu_irq_enter_irqson();
3023         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3024         rcu_irq_exit_irqson();
3025 }
3026
3027 /**
3028  * trace_dump_stack - record a stack back trace in the trace buffer
3029  * @skip: Number of functions to skip (helper handlers)
3030  */
3031 void trace_dump_stack(int skip)
3032 {
3033         if (tracing_disabled || tracing_selftest_running)
3034                 return;
3035
3036 #ifndef CONFIG_UNWINDER_ORC
3037         /* Skip 1 to skip this function. */
3038         skip++;
3039 #endif
3040         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3041                              tracing_gen_ctx(), skip, NULL);
3042 }
3043 EXPORT_SYMBOL_GPL(trace_dump_stack);
3044
3045 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3046 static DEFINE_PER_CPU(int, user_stack_count);
3047
3048 static void
3049 ftrace_trace_userstack(struct trace_array *tr,
3050                        struct trace_buffer *buffer, unsigned int trace_ctx)
3051 {
3052         struct trace_event_call *call = &event_user_stack;
3053         struct ring_buffer_event *event;
3054         struct userstack_entry *entry;
3055
3056         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3057                 return;
3058
3059         /*
3060          * NMIs can not handle page faults, even with fix ups.
3061          * The save user stack can (and often does) fault.
3062          */
3063         if (unlikely(in_nmi()))
3064                 return;
3065
3066         /*
3067          * prevent recursion, since the user stack tracing may
3068          * trigger other kernel events.
3069          */
3070         preempt_disable();
3071         if (__this_cpu_read(user_stack_count))
3072                 goto out;
3073
3074         __this_cpu_inc(user_stack_count);
3075
3076         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077                                             sizeof(*entry), trace_ctx);
3078         if (!event)
3079                 goto out_drop_count;
3080         entry   = ring_buffer_event_data(event);
3081
3082         entry->tgid             = current->tgid;
3083         memset(&entry->caller, 0, sizeof(entry->caller));
3084
3085         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086         if (!call_filter_check_discard(call, entry, buffer, event))
3087                 __buffer_unlock_commit(buffer, event);
3088
3089  out_drop_count:
3090         __this_cpu_dec(user_stack_count);
3091  out:
3092         preempt_enable();
3093 }
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_array *tr,
3096                                    struct trace_buffer *buffer,
3097                                    unsigned int trace_ctx)
3098 {
3099 }
3100 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3101
3102 #endif /* CONFIG_STACKTRACE */
3103
3104 static inline void
3105 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3106                           unsigned long long delta)
3107 {
3108         entry->bottom_delta_ts = delta & U32_MAX;
3109         entry->top_delta_ts = (delta >> 32);
3110 }
3111
3112 void trace_last_func_repeats(struct trace_array *tr,
3113                              struct trace_func_repeats *last_info,
3114                              unsigned int trace_ctx)
3115 {
3116         struct trace_buffer *buffer = tr->array_buffer.buffer;
3117         struct func_repeats_entry *entry;
3118         struct ring_buffer_event *event;
3119         u64 delta;
3120
3121         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3122                                             sizeof(*entry), trace_ctx);
3123         if (!event)
3124                 return;
3125
3126         delta = ring_buffer_event_time_stamp(buffer, event) -
3127                 last_info->ts_last_call;
3128
3129         entry = ring_buffer_event_data(event);
3130         entry->ip = last_info->ip;
3131         entry->parent_ip = last_info->parent_ip;
3132         entry->count = last_info->count;
3133         func_repeats_set_delta_ts(entry, delta);
3134
3135         __buffer_unlock_commit(buffer, event);
3136 }
3137
3138 /* created for use with alloc_percpu */
3139 struct trace_buffer_struct {
3140         int nesting;
3141         char buffer[4][TRACE_BUF_SIZE];
3142 };
3143
3144 static struct trace_buffer_struct *trace_percpu_buffer;
3145
3146 /*
3147  * This allows for lockless recording.  If we're nested too deeply, then
3148  * this returns NULL.
3149  */
3150 static char *get_trace_buf(void)
3151 {
3152         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3153
3154         if (!buffer || buffer->nesting >= 4)
3155                 return NULL;
3156
3157         buffer->nesting++;
3158
3159         /* Interrupts must see nesting incremented before we use the buffer */
3160         barrier();
3161         return &buffer->buffer[buffer->nesting - 1][0];
3162 }
3163
3164 static void put_trace_buf(void)
3165 {
3166         /* Don't let the decrement of nesting leak before this */
3167         barrier();
3168         this_cpu_dec(trace_percpu_buffer->nesting);
3169 }
3170
3171 static int alloc_percpu_trace_buffer(void)
3172 {
3173         struct trace_buffer_struct *buffers;
3174
3175         if (trace_percpu_buffer)
3176                 return 0;
3177
3178         buffers = alloc_percpu(struct trace_buffer_struct);
3179         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3180                 return -ENOMEM;
3181
3182         trace_percpu_buffer = buffers;
3183         return 0;
3184 }
3185
3186 static int buffers_allocated;
3187
3188 void trace_printk_init_buffers(void)
3189 {
3190         if (buffers_allocated)
3191                 return;
3192
3193         if (alloc_percpu_trace_buffer())
3194                 return;
3195
3196         /* trace_printk() is for debug use only. Don't use it in production. */
3197
3198         pr_warn("\n");
3199         pr_warn("**********************************************************\n");
3200         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3201         pr_warn("**                                                      **\n");
3202         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3203         pr_warn("**                                                      **\n");
3204         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3205         pr_warn("** unsafe for production use.                           **\n");
3206         pr_warn("**                                                      **\n");
3207         pr_warn("** If you see this message and you are not debugging    **\n");
3208         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3209         pr_warn("**                                                      **\n");
3210         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3211         pr_warn("**********************************************************\n");
3212
3213         /* Expand the buffers to set size */
3214         tracing_update_buffers();
3215
3216         buffers_allocated = 1;
3217
3218         /*
3219          * trace_printk_init_buffers() can be called by modules.
3220          * If that happens, then we need to start cmdline recording
3221          * directly here. If the global_trace.buffer is already
3222          * allocated here, then this was called by module code.
3223          */
3224         if (global_trace.array_buffer.buffer)
3225                 tracing_start_cmdline_record();
3226 }
3227 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3228
3229 void trace_printk_start_comm(void)
3230 {
3231         /* Start tracing comms if trace printk is set */
3232         if (!buffers_allocated)
3233                 return;
3234         tracing_start_cmdline_record();
3235 }
3236
3237 static void trace_printk_start_stop_comm(int enabled)
3238 {
3239         if (!buffers_allocated)
3240                 return;
3241
3242         if (enabled)
3243                 tracing_start_cmdline_record();
3244         else
3245                 tracing_stop_cmdline_record();
3246 }
3247
3248 /**
3249  * trace_vbprintk - write binary msg to tracing buffer
3250  * @ip:    The address of the caller
3251  * @fmt:   The string format to write to the buffer
3252  * @args:  Arguments for @fmt
3253  */
3254 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3255 {
3256         struct trace_event_call *call = &event_bprint;
3257         struct ring_buffer_event *event;
3258         struct trace_buffer *buffer;
3259         struct trace_array *tr = &global_trace;
3260         struct bprint_entry *entry;
3261         unsigned int trace_ctx;
3262         char *tbuffer;
3263         int len = 0, size;
3264
3265         if (unlikely(tracing_selftest_running || tracing_disabled))
3266                 return 0;
3267
3268         /* Don't pollute graph traces with trace_vprintk internals */
3269         pause_graph_tracing();
3270
3271         trace_ctx = tracing_gen_ctx();
3272         preempt_disable_notrace();
3273
3274         tbuffer = get_trace_buf();
3275         if (!tbuffer) {
3276                 len = 0;
3277                 goto out_nobuffer;
3278         }
3279
3280         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3281
3282         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3283                 goto out_put;
3284
3285         size = sizeof(*entry) + sizeof(u32) * len;
3286         buffer = tr->array_buffer.buffer;
3287         ring_buffer_nest_start(buffer);
3288         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3289                                             trace_ctx);
3290         if (!event)
3291                 goto out;
3292         entry = ring_buffer_event_data(event);
3293         entry->ip                       = ip;
3294         entry->fmt                      = fmt;
3295
3296         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3297         if (!call_filter_check_discard(call, entry, buffer, event)) {
3298                 __buffer_unlock_commit(buffer, event);
3299                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3300         }
3301
3302 out:
3303         ring_buffer_nest_end(buffer);
3304 out_put:
3305         put_trace_buf();
3306
3307 out_nobuffer:
3308         preempt_enable_notrace();
3309         unpause_graph_tracing();
3310
3311         return len;
3312 }
3313 EXPORT_SYMBOL_GPL(trace_vbprintk);
3314
3315 __printf(3, 0)
3316 static int
3317 __trace_array_vprintk(struct trace_buffer *buffer,
3318                       unsigned long ip, const char *fmt, va_list args)
3319 {
3320         struct trace_event_call *call = &event_print;
3321         struct ring_buffer_event *event;
3322         int len = 0, size;
3323         struct print_entry *entry;
3324         unsigned int trace_ctx;
3325         char *tbuffer;
3326
3327         if (tracing_disabled || tracing_selftest_running)
3328                 return 0;
3329
3330         /* Don't pollute graph traces with trace_vprintk internals */
3331         pause_graph_tracing();
3332
3333         trace_ctx = tracing_gen_ctx();
3334         preempt_disable_notrace();
3335
3336
3337         tbuffer = get_trace_buf();
3338         if (!tbuffer) {
3339                 len = 0;
3340                 goto out_nobuffer;
3341         }
3342
3343         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3344
3345         size = sizeof(*entry) + len + 1;
3346         ring_buffer_nest_start(buffer);
3347         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3348                                             trace_ctx);
3349         if (!event)
3350                 goto out;
3351         entry = ring_buffer_event_data(event);
3352         entry->ip = ip;
3353
3354         memcpy(&entry->buf, tbuffer, len + 1);
3355         if (!call_filter_check_discard(call, entry, buffer, event)) {
3356                 __buffer_unlock_commit(buffer, event);
3357                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3358         }
3359
3360 out:
3361         ring_buffer_nest_end(buffer);
3362         put_trace_buf();
3363
3364 out_nobuffer:
3365         preempt_enable_notrace();
3366         unpause_graph_tracing();
3367
3368         return len;
3369 }
3370
3371 __printf(3, 0)
3372 int trace_array_vprintk(struct trace_array *tr,
3373                         unsigned long ip, const char *fmt, va_list args)
3374 {
3375         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3376 }
3377
3378 /**
3379  * trace_array_printk - Print a message to a specific instance
3380  * @tr: The instance trace_array descriptor
3381  * @ip: The instruction pointer that this is called from.
3382  * @fmt: The format to print (printf format)
3383  *
3384  * If a subsystem sets up its own instance, they have the right to
3385  * printk strings into their tracing instance buffer using this
3386  * function. Note, this function will not write into the top level
3387  * buffer (use trace_printk() for that), as writing into the top level
3388  * buffer should only have events that can be individually disabled.
3389  * trace_printk() is only used for debugging a kernel, and should not
3390  * be ever incorporated in normal use.
3391  *
3392  * trace_array_printk() can be used, as it will not add noise to the
3393  * top level tracing buffer.
3394  *
3395  * Note, trace_array_init_printk() must be called on @tr before this
3396  * can be used.
3397  */
3398 __printf(3, 0)
3399 int trace_array_printk(struct trace_array *tr,
3400                        unsigned long ip, const char *fmt, ...)
3401 {
3402         int ret;
3403         va_list ap;
3404
3405         if (!tr)
3406                 return -ENOENT;
3407
3408         /* This is only allowed for created instances */
3409         if (tr == &global_trace)
3410                 return 0;
3411
3412         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3413                 return 0;
3414
3415         va_start(ap, fmt);
3416         ret = trace_array_vprintk(tr, ip, fmt, ap);
3417         va_end(ap);
3418         return ret;
3419 }
3420 EXPORT_SYMBOL_GPL(trace_array_printk);
3421
3422 /**
3423  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3424  * @tr: The trace array to initialize the buffers for
3425  *
3426  * As trace_array_printk() only writes into instances, they are OK to
3427  * have in the kernel (unlike trace_printk()). This needs to be called
3428  * before trace_array_printk() can be used on a trace_array.
3429  */
3430 int trace_array_init_printk(struct trace_array *tr)
3431 {
3432         if (!tr)
3433                 return -ENOENT;
3434
3435         /* This is only allowed for created instances */
3436         if (tr == &global_trace)
3437                 return -EINVAL;
3438
3439         return alloc_percpu_trace_buffer();
3440 }
3441 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3442
3443 __printf(3, 4)
3444 int trace_array_printk_buf(struct trace_buffer *buffer,
3445                            unsigned long ip, const char *fmt, ...)
3446 {
3447         int ret;
3448         va_list ap;
3449
3450         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3451                 return 0;
3452
3453         va_start(ap, fmt);
3454         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3455         va_end(ap);
3456         return ret;
3457 }
3458
3459 __printf(2, 0)
3460 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3461 {
3462         return trace_array_vprintk(&global_trace, ip, fmt, args);
3463 }
3464 EXPORT_SYMBOL_GPL(trace_vprintk);
3465
3466 static void trace_iterator_increment(struct trace_iterator *iter)
3467 {
3468         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3469
3470         iter->idx++;
3471         if (buf_iter)
3472                 ring_buffer_iter_advance(buf_iter);
3473 }
3474
3475 static struct trace_entry *
3476 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3477                 unsigned long *lost_events)
3478 {
3479         struct ring_buffer_event *event;
3480         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3481
3482         if (buf_iter) {
3483                 event = ring_buffer_iter_peek(buf_iter, ts);
3484                 if (lost_events)
3485                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3486                                 (unsigned long)-1 : 0;
3487         } else {
3488                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3489                                          lost_events);
3490         }
3491
3492         if (event) {
3493                 iter->ent_size = ring_buffer_event_length(event);
3494                 return ring_buffer_event_data(event);
3495         }
3496         iter->ent_size = 0;
3497         return NULL;
3498 }
3499
3500 static struct trace_entry *
3501 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3502                   unsigned long *missing_events, u64 *ent_ts)
3503 {
3504         struct trace_buffer *buffer = iter->array_buffer->buffer;
3505         struct trace_entry *ent, *next = NULL;
3506         unsigned long lost_events = 0, next_lost = 0;
3507         int cpu_file = iter->cpu_file;
3508         u64 next_ts = 0, ts;
3509         int next_cpu = -1;
3510         int next_size = 0;
3511         int cpu;
3512
3513         /*
3514          * If we are in a per_cpu trace file, don't bother by iterating over
3515          * all cpu and peek directly.
3516          */
3517         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3518                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3519                         return NULL;
3520                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3521                 if (ent_cpu)
3522                         *ent_cpu = cpu_file;
3523
3524                 return ent;
3525         }
3526
3527         for_each_tracing_cpu(cpu) {
3528
3529                 if (ring_buffer_empty_cpu(buffer, cpu))
3530                         continue;
3531
3532                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3533
3534                 /*
3535                  * Pick the entry with the smallest timestamp:
3536                  */
3537                 if (ent && (!next || ts < next_ts)) {
3538                         next = ent;
3539                         next_cpu = cpu;
3540                         next_ts = ts;
3541                         next_lost = lost_events;
3542                         next_size = iter->ent_size;
3543                 }
3544         }
3545
3546         iter->ent_size = next_size;
3547
3548         if (ent_cpu)
3549                 *ent_cpu = next_cpu;
3550
3551         if (ent_ts)
3552                 *ent_ts = next_ts;
3553
3554         if (missing_events)
3555                 *missing_events = next_lost;
3556
3557         return next;
3558 }
3559
3560 #define STATIC_FMT_BUF_SIZE     128
3561 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3562
3563 static char *trace_iter_expand_format(struct trace_iterator *iter)
3564 {
3565         char *tmp;
3566
3567         /*
3568          * iter->tr is NULL when used with tp_printk, which makes
3569          * this get called where it is not safe to call krealloc().
3570          */
3571         if (!iter->tr || iter->fmt == static_fmt_buf)
3572                 return NULL;
3573
3574         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3575                        GFP_KERNEL);
3576         if (tmp) {
3577                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3578                 iter->fmt = tmp;
3579         }
3580
3581         return tmp;
3582 }
3583
3584 /* Returns true if the string is safe to dereference from an event */
3585 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3586 {
3587         unsigned long addr = (unsigned long)str;
3588         struct trace_event *trace_event;
3589         struct trace_event_call *event;
3590
3591         /* OK if part of the event data */
3592         if ((addr >= (unsigned long)iter->ent) &&
3593             (addr < (unsigned long)iter->ent + iter->ent_size))
3594                 return true;
3595
3596         /* OK if part of the temp seq buffer */
3597         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3598             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3599                 return true;
3600
3601         /* Core rodata can not be freed */
3602         if (is_kernel_rodata(addr))
3603                 return true;
3604
3605         if (trace_is_tracepoint_string(str))
3606                 return true;
3607
3608         /*
3609          * Now this could be a module event, referencing core module
3610          * data, which is OK.
3611          */
3612         if (!iter->ent)
3613                 return false;
3614
3615         trace_event = ftrace_find_event(iter->ent->type);
3616         if (!trace_event)
3617                 return false;
3618
3619         event = container_of(trace_event, struct trace_event_call, event);
3620         if (!event->mod)
3621                 return false;
3622
3623         /* Would rather have rodata, but this will suffice */
3624         if (within_module_core(addr, event->mod))
3625                 return true;
3626
3627         return false;
3628 }
3629
3630 static const char *show_buffer(struct trace_seq *s)
3631 {
3632         struct seq_buf *seq = &s->seq;
3633
3634         seq_buf_terminate(seq);
3635
3636         return seq->buffer;
3637 }
3638
3639 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3640
3641 static int test_can_verify_check(const char *fmt, ...)
3642 {
3643         char buf[16];
3644         va_list ap;
3645         int ret;
3646
3647         /*
3648          * The verifier is dependent on vsnprintf() modifies the va_list
3649          * passed to it, where it is sent as a reference. Some architectures
3650          * (like x86_32) passes it by value, which means that vsnprintf()
3651          * does not modify the va_list passed to it, and the verifier
3652          * would then need to be able to understand all the values that
3653          * vsnprintf can use. If it is passed by value, then the verifier
3654          * is disabled.
3655          */
3656         va_start(ap, fmt);
3657         vsnprintf(buf, 16, "%d", ap);
3658         ret = va_arg(ap, int);
3659         va_end(ap);
3660
3661         return ret;
3662 }
3663
3664 static void test_can_verify(void)
3665 {
3666         if (!test_can_verify_check("%d %d", 0, 1)) {
3667                 pr_info("trace event string verifier disabled\n");
3668                 static_branch_inc(&trace_no_verify);
3669         }
3670 }
3671
3672 /**
3673  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3674  * @iter: The iterator that holds the seq buffer and the event being printed
3675  * @fmt: The format used to print the event
3676  * @ap: The va_list holding the data to print from @fmt.
3677  *
3678  * This writes the data into the @iter->seq buffer using the data from
3679  * @fmt and @ap. If the format has a %s, then the source of the string
3680  * is examined to make sure it is safe to print, otherwise it will
3681  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3682  * pointer.
3683  */
3684 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3685                          va_list ap)
3686 {
3687         const char *p = fmt;
3688         const char *str;
3689         int i, j;
3690
3691         if (WARN_ON_ONCE(!fmt))
3692                 return;
3693
3694         if (static_branch_unlikely(&trace_no_verify))
3695                 goto print;
3696
3697         /* Don't bother checking when doing a ftrace_dump() */
3698         if (iter->fmt == static_fmt_buf)
3699                 goto print;
3700
3701         while (*p) {
3702                 bool star = false;
3703                 int len = 0;
3704
3705                 j = 0;
3706
3707                 /* We only care about %s and variants */
3708                 for (i = 0; p[i]; i++) {
3709                         if (i + 1 >= iter->fmt_size) {
3710                                 /*
3711                                  * If we can't expand the copy buffer,
3712                                  * just print it.
3713                                  */
3714                                 if (!trace_iter_expand_format(iter))
3715                                         goto print;
3716                         }
3717
3718                         if (p[i] == '\\' && p[i+1]) {
3719                                 i++;
3720                                 continue;
3721                         }
3722                         if (p[i] == '%') {
3723                                 /* Need to test cases like %08.*s */
3724                                 for (j = 1; p[i+j]; j++) {
3725                                         if (isdigit(p[i+j]) ||
3726                                             p[i+j] == '.')
3727                                                 continue;
3728                                         if (p[i+j] == '*') {
3729                                                 star = true;
3730                                                 continue;
3731                                         }
3732                                         break;
3733                                 }
3734                                 if (p[i+j] == 's')
3735                                         break;
3736                                 star = false;
3737                         }
3738                         j = 0;
3739                 }
3740                 /* If no %s found then just print normally */
3741                 if (!p[i])
3742                         break;
3743
3744                 /* Copy up to the %s, and print that */
3745                 strncpy(iter->fmt, p, i);
3746                 iter->fmt[i] = '\0';
3747                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3748
3749                 if (star)
3750                         len = va_arg(ap, int);
3751
3752                 /* The ap now points to the string data of the %s */
3753                 str = va_arg(ap, const char *);
3754
3755                 /*
3756                  * If you hit this warning, it is likely that the
3757                  * trace event in question used %s on a string that
3758                  * was saved at the time of the event, but may not be
3759                  * around when the trace is read. Use __string(),
3760                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3761                  * instead. See samples/trace_events/trace-events-sample.h
3762                  * for reference.
3763                  */
3764                 if (WARN_ONCE(!trace_safe_str(iter, str),
3765                               "fmt: '%s' current_buffer: '%s'",
3766                               fmt, show_buffer(&iter->seq))) {
3767                         int ret;
3768
3769                         /* Try to safely read the string */
3770                         if (star) {
3771                                 if (len + 1 > iter->fmt_size)
3772                                         len = iter->fmt_size - 1;
3773                                 if (len < 0)
3774                                         len = 0;
3775                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3776                                 iter->fmt[len] = 0;
3777                                 star = false;
3778                         } else {
3779                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3780                                                                   iter->fmt_size);
3781                         }
3782                         if (ret < 0)
3783                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3784                         else
3785                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3786                                                  str, iter->fmt);
3787                         str = "[UNSAFE-MEMORY]";
3788                         strcpy(iter->fmt, "%s");
3789                 } else {
3790                         strncpy(iter->fmt, p + i, j + 1);
3791                         iter->fmt[j+1] = '\0';
3792                 }
3793                 if (star)
3794                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3795                 else
3796                         trace_seq_printf(&iter->seq, iter->fmt, str);
3797
3798                 p += i + j + 1;
3799         }
3800  print:
3801         if (*p)
3802                 trace_seq_vprintf(&iter->seq, p, ap);
3803 }
3804
3805 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3806 {
3807         const char *p, *new_fmt;
3808         char *q;
3809
3810         if (WARN_ON_ONCE(!fmt))
3811                 return fmt;
3812
3813         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3814                 return fmt;
3815
3816         p = fmt;
3817         new_fmt = q = iter->fmt;
3818         while (*p) {
3819                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3820                         if (!trace_iter_expand_format(iter))
3821                                 return fmt;
3822
3823                         q += iter->fmt - new_fmt;
3824                         new_fmt = iter->fmt;
3825                 }
3826
3827                 *q++ = *p++;
3828
3829                 /* Replace %p with %px */
3830                 if (p[-1] == '%') {
3831                         if (p[0] == '%') {
3832                                 *q++ = *p++;
3833                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3834                                 *q++ = *p++;
3835                                 *q++ = 'x';
3836                         }
3837                 }
3838         }
3839         *q = '\0';
3840
3841         return new_fmt;
3842 }
3843
3844 #define STATIC_TEMP_BUF_SIZE    128
3845 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3846
3847 /* Find the next real entry, without updating the iterator itself */
3848 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3849                                           int *ent_cpu, u64 *ent_ts)
3850 {
3851         /* __find_next_entry will reset ent_size */
3852         int ent_size = iter->ent_size;
3853         struct trace_entry *entry;
3854
3855         /*
3856          * If called from ftrace_dump(), then the iter->temp buffer
3857          * will be the static_temp_buf and not created from kmalloc.
3858          * If the entry size is greater than the buffer, we can
3859          * not save it. Just return NULL in that case. This is only
3860          * used to add markers when two consecutive events' time
3861          * stamps have a large delta. See trace_print_lat_context()
3862          */
3863         if (iter->temp == static_temp_buf &&
3864             STATIC_TEMP_BUF_SIZE < ent_size)
3865                 return NULL;
3866
3867         /*
3868          * The __find_next_entry() may call peek_next_entry(), which may
3869          * call ring_buffer_peek() that may make the contents of iter->ent
3870          * undefined. Need to copy iter->ent now.
3871          */
3872         if (iter->ent && iter->ent != iter->temp) {
3873                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3874                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3875                         void *temp;
3876                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3877                         if (!temp)
3878                                 return NULL;
3879                         kfree(iter->temp);
3880                         iter->temp = temp;
3881                         iter->temp_size = iter->ent_size;
3882                 }
3883                 memcpy(iter->temp, iter->ent, iter->ent_size);
3884                 iter->ent = iter->temp;
3885         }
3886         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3887         /* Put back the original ent_size */
3888         iter->ent_size = ent_size;
3889
3890         return entry;
3891 }
3892
3893 /* Find the next real entry, and increment the iterator to the next entry */
3894 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3895 {
3896         iter->ent = __find_next_entry(iter, &iter->cpu,
3897                                       &iter->lost_events, &iter->ts);
3898
3899         if (iter->ent)
3900                 trace_iterator_increment(iter);
3901
3902         return iter->ent ? iter : NULL;
3903 }
3904
3905 static void trace_consume(struct trace_iterator *iter)
3906 {
3907         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3908                             &iter->lost_events);
3909 }
3910
3911 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3912 {
3913         struct trace_iterator *iter = m->private;
3914         int i = (int)*pos;
3915         void *ent;
3916
3917         WARN_ON_ONCE(iter->leftover);
3918
3919         (*pos)++;
3920
3921         /* can't go backwards */
3922         if (iter->idx > i)
3923                 return NULL;
3924
3925         if (iter->idx < 0)
3926                 ent = trace_find_next_entry_inc(iter);
3927         else
3928                 ent = iter;
3929
3930         while (ent && iter->idx < i)
3931                 ent = trace_find_next_entry_inc(iter);
3932
3933         iter->pos = *pos;
3934
3935         return ent;
3936 }
3937
3938 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3939 {
3940         struct ring_buffer_iter *buf_iter;
3941         unsigned long entries = 0;
3942         u64 ts;
3943
3944         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3945
3946         buf_iter = trace_buffer_iter(iter, cpu);
3947         if (!buf_iter)
3948                 return;
3949
3950         ring_buffer_iter_reset(buf_iter);
3951
3952         /*
3953          * We could have the case with the max latency tracers
3954          * that a reset never took place on a cpu. This is evident
3955          * by the timestamp being before the start of the buffer.
3956          */
3957         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3958                 if (ts >= iter->array_buffer->time_start)
3959                         break;
3960                 entries++;
3961                 ring_buffer_iter_advance(buf_iter);
3962         }
3963
3964         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3965 }
3966
3967 /*
3968  * The current tracer is copied to avoid a global locking
3969  * all around.
3970  */
3971 static void *s_start(struct seq_file *m, loff_t *pos)
3972 {
3973         struct trace_iterator *iter = m->private;
3974         struct trace_array *tr = iter->tr;
3975         int cpu_file = iter->cpu_file;
3976         void *p = NULL;
3977         loff_t l = 0;
3978         int cpu;
3979
3980         /*
3981          * copy the tracer to avoid using a global lock all around.
3982          * iter->trace is a copy of current_trace, the pointer to the
3983          * name may be used instead of a strcmp(), as iter->trace->name
3984          * will point to the same string as current_trace->name.
3985          */
3986         mutex_lock(&trace_types_lock);
3987         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3988                 *iter->trace = *tr->current_trace;
3989         mutex_unlock(&trace_types_lock);
3990
3991 #ifdef CONFIG_TRACER_MAX_TRACE
3992         if (iter->snapshot && iter->trace->use_max_tr)
3993                 return ERR_PTR(-EBUSY);
3994 #endif
3995
3996         if (*pos != iter->pos) {
3997                 iter->ent = NULL;
3998                 iter->cpu = 0;
3999                 iter->idx = -1;
4000
4001                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4002                         for_each_tracing_cpu(cpu)
4003                                 tracing_iter_reset(iter, cpu);
4004                 } else
4005                         tracing_iter_reset(iter, cpu_file);
4006
4007                 iter->leftover = 0;
4008                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4009                         ;
4010
4011         } else {
4012                 /*
4013                  * If we overflowed the seq_file before, then we want
4014                  * to just reuse the trace_seq buffer again.
4015                  */
4016                 if (iter->leftover)
4017                         p = iter;
4018                 else {
4019                         l = *pos - 1;
4020                         p = s_next(m, p, &l);
4021                 }
4022         }
4023
4024         trace_event_read_lock();
4025         trace_access_lock(cpu_file);
4026         return p;
4027 }
4028
4029 static void s_stop(struct seq_file *m, void *p)
4030 {
4031         struct trace_iterator *iter = m->private;
4032
4033 #ifdef CONFIG_TRACER_MAX_TRACE
4034         if (iter->snapshot && iter->trace->use_max_tr)
4035                 return;
4036 #endif
4037
4038         trace_access_unlock(iter->cpu_file);
4039         trace_event_read_unlock();
4040 }
4041
4042 static void
4043 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4044                       unsigned long *entries, int cpu)
4045 {
4046         unsigned long count;
4047
4048         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4049         /*
4050          * If this buffer has skipped entries, then we hold all
4051          * entries for the trace and we need to ignore the
4052          * ones before the time stamp.
4053          */
4054         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4055                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4056                 /* total is the same as the entries */
4057                 *total = count;
4058         } else
4059                 *total = count +
4060                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4061         *entries = count;
4062 }
4063
4064 static void
4065 get_total_entries(struct array_buffer *buf,
4066                   unsigned long *total, unsigned long *entries)
4067 {
4068         unsigned long t, e;
4069         int cpu;
4070
4071         *total = 0;
4072         *entries = 0;
4073
4074         for_each_tracing_cpu(cpu) {
4075                 get_total_entries_cpu(buf, &t, &e, cpu);
4076                 *total += t;
4077                 *entries += e;
4078         }
4079 }
4080
4081 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4082 {
4083         unsigned long total, entries;
4084
4085         if (!tr)
4086                 tr = &global_trace;
4087
4088         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4089
4090         return entries;
4091 }
4092
4093 unsigned long trace_total_entries(struct trace_array *tr)
4094 {
4095         unsigned long total, entries;
4096
4097         if (!tr)
4098                 tr = &global_trace;
4099
4100         get_total_entries(&tr->array_buffer, &total, &entries);
4101
4102         return entries;
4103 }
4104
4105 static void print_lat_help_header(struct seq_file *m)
4106 {
4107         seq_puts(m, "#                    _------=> CPU#            \n"
4108                     "#                   / _-----=> irqs-off        \n"
4109                     "#                  | / _----=> need-resched    \n"
4110                     "#                  || / _---=> hardirq/softirq \n"
4111                     "#                  ||| / _--=> preempt-depth   \n"
4112                     "#                  |||| /     delay            \n"
4113                     "#  cmd     pid     ||||| time  |   caller      \n"
4114                     "#     \\   /        |||||  \\    |   /         \n");
4115 }
4116
4117 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4118 {
4119         unsigned long total;
4120         unsigned long entries;
4121
4122         get_total_entries(buf, &total, &entries);
4123         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4124                    entries, total, num_online_cpus());
4125         seq_puts(m, "#\n");
4126 }
4127
4128 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4129                                    unsigned int flags)
4130 {
4131         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4132
4133         print_event_info(buf, m);
4134
4135         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4136         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4137 }
4138
4139 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4140                                        unsigned int flags)
4141 {
4142         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4143         const char *space = "            ";
4144         int prec = tgid ? 12 : 2;
4145
4146         print_event_info(buf, m);
4147
4148         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4149         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4150         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4151         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4152         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4153         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4154         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4155 }
4156
4157 void
4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4161         struct array_buffer *buf = iter->array_buffer;
4162         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4163         struct tracer *type = iter->trace;
4164         unsigned long entries;
4165         unsigned long total;
4166         const char *name = "preemption";
4167
4168         name = type->name;
4169
4170         get_total_entries(buf, &total, &entries);
4171
4172         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4173                    name, UTS_RELEASE);
4174         seq_puts(m, "# -----------------------------------"
4175                  "---------------------------------\n");
4176         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4177                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4178                    nsecs_to_usecs(data->saved_latency),
4179                    entries,
4180                    total,
4181                    buf->cpu,
4182 #if defined(CONFIG_PREEMPT_NONE)
4183                    "server",
4184 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4185                    "desktop",
4186 #elif defined(CONFIG_PREEMPT)
4187                    "preempt",
4188 #elif defined(CONFIG_PREEMPT_RT)
4189                    "preempt_rt",
4190 #else
4191                    "unknown",
4192 #endif
4193                    /* These are reserved for later use */
4194                    0, 0, 0, 0);
4195 #ifdef CONFIG_SMP
4196         seq_printf(m, " #P:%d)\n", num_online_cpus());
4197 #else
4198         seq_puts(m, ")\n");
4199 #endif
4200         seq_puts(m, "#    -----------------\n");
4201         seq_printf(m, "#    | task: %.16s-%d "
4202                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4203                    data->comm, data->pid,
4204                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4205                    data->policy, data->rt_priority);
4206         seq_puts(m, "#    -----------------\n");
4207
4208         if (data->critical_start) {
4209                 seq_puts(m, "#  => started at: ");
4210                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4211                 trace_print_seq(m, &iter->seq);
4212                 seq_puts(m, "\n#  => ended at:   ");
4213                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4214                 trace_print_seq(m, &iter->seq);
4215                 seq_puts(m, "\n#\n");
4216         }
4217
4218         seq_puts(m, "#\n");
4219 }
4220
4221 static void test_cpu_buff_start(struct trace_iterator *iter)
4222 {
4223         struct trace_seq *s = &iter->seq;
4224         struct trace_array *tr = iter->tr;
4225
4226         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4227                 return;
4228
4229         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4230                 return;
4231
4232         if (cpumask_available(iter->started) &&
4233             cpumask_test_cpu(iter->cpu, iter->started))
4234                 return;
4235
4236         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4237                 return;
4238
4239         if (cpumask_available(iter->started))
4240                 cpumask_set_cpu(iter->cpu, iter->started);
4241
4242         /* Don't print started cpu buffer for the first entry of the trace */
4243         if (iter->idx > 1)
4244                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4245                                 iter->cpu);
4246 }
4247
4248 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4249 {
4250         struct trace_array *tr = iter->tr;
4251         struct trace_seq *s = &iter->seq;
4252         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4253         struct trace_entry *entry;
4254         struct trace_event *event;
4255
4256         entry = iter->ent;
4257
4258         test_cpu_buff_start(iter);
4259
4260         event = ftrace_find_event(entry->type);
4261
4262         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4263                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4264                         trace_print_lat_context(iter);
4265                 else
4266                         trace_print_context(iter);
4267         }
4268
4269         if (trace_seq_has_overflowed(s))
4270                 return TRACE_TYPE_PARTIAL_LINE;
4271
4272         if (event)
4273                 return event->funcs->trace(iter, sym_flags, event);
4274
4275         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4276
4277         return trace_handle_return(s);
4278 }
4279
4280 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4281 {
4282         struct trace_array *tr = iter->tr;
4283         struct trace_seq *s = &iter->seq;
4284         struct trace_entry *entry;
4285         struct trace_event *event;
4286
4287         entry = iter->ent;
4288
4289         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4290                 trace_seq_printf(s, "%d %d %llu ",
4291                                  entry->pid, iter->cpu, iter->ts);
4292
4293         if (trace_seq_has_overflowed(s))
4294                 return TRACE_TYPE_PARTIAL_LINE;
4295
4296         event = ftrace_find_event(entry->type);
4297         if (event)
4298                 return event->funcs->raw(iter, 0, event);
4299
4300         trace_seq_printf(s, "%d ?\n", entry->type);
4301
4302         return trace_handle_return(s);
4303 }
4304
4305 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4306 {
4307         struct trace_array *tr = iter->tr;
4308         struct trace_seq *s = &iter->seq;
4309         unsigned char newline = '\n';
4310         struct trace_entry *entry;
4311         struct trace_event *event;
4312
4313         entry = iter->ent;
4314
4315         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4316                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4317                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4318                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4319                 if (trace_seq_has_overflowed(s))
4320                         return TRACE_TYPE_PARTIAL_LINE;
4321         }
4322
4323         event = ftrace_find_event(entry->type);
4324         if (event) {
4325                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4326                 if (ret != TRACE_TYPE_HANDLED)
4327                         return ret;
4328         }
4329
4330         SEQ_PUT_FIELD(s, newline);
4331
4332         return trace_handle_return(s);
4333 }
4334
4335 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4336 {
4337         struct trace_array *tr = iter->tr;
4338         struct trace_seq *s = &iter->seq;
4339         struct trace_entry *entry;
4340         struct trace_event *event;
4341
4342         entry = iter->ent;
4343
4344         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4345                 SEQ_PUT_FIELD(s, entry->pid);
4346                 SEQ_PUT_FIELD(s, iter->cpu);
4347                 SEQ_PUT_FIELD(s, iter->ts);
4348                 if (trace_seq_has_overflowed(s))
4349                         return TRACE_TYPE_PARTIAL_LINE;
4350         }
4351
4352         event = ftrace_find_event(entry->type);
4353         return event ? event->funcs->binary(iter, 0, event) :
4354                 TRACE_TYPE_HANDLED;
4355 }
4356
4357 int trace_empty(struct trace_iterator *iter)
4358 {
4359         struct ring_buffer_iter *buf_iter;
4360         int cpu;
4361
4362         /* If we are looking at one CPU buffer, only check that one */
4363         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4364                 cpu = iter->cpu_file;
4365                 buf_iter = trace_buffer_iter(iter, cpu);
4366                 if (buf_iter) {
4367                         if (!ring_buffer_iter_empty(buf_iter))
4368                                 return 0;
4369                 } else {
4370                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4371                                 return 0;
4372                 }
4373                 return 1;
4374         }
4375
4376         for_each_tracing_cpu(cpu) {
4377                 buf_iter = trace_buffer_iter(iter, cpu);
4378                 if (buf_iter) {
4379                         if (!ring_buffer_iter_empty(buf_iter))
4380                                 return 0;
4381                 } else {
4382                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4383                                 return 0;
4384                 }
4385         }
4386
4387         return 1;
4388 }
4389
4390 /*  Called with trace_event_read_lock() held. */
4391 enum print_line_t print_trace_line(struct trace_iterator *iter)
4392 {
4393         struct trace_array *tr = iter->tr;
4394         unsigned long trace_flags = tr->trace_flags;
4395         enum print_line_t ret;
4396
4397         if (iter->lost_events) {
4398                 if (iter->lost_events == (unsigned long)-1)
4399                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4400                                          iter->cpu);
4401                 else
4402                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4403                                          iter->cpu, iter->lost_events);
4404                 if (trace_seq_has_overflowed(&iter->seq))
4405                         return TRACE_TYPE_PARTIAL_LINE;
4406         }
4407
4408         if (iter->trace && iter->trace->print_line) {
4409                 ret = iter->trace->print_line(iter);
4410                 if (ret != TRACE_TYPE_UNHANDLED)
4411                         return ret;
4412         }
4413
4414         if (iter->ent->type == TRACE_BPUTS &&
4415                         trace_flags & TRACE_ITER_PRINTK &&
4416                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4417                 return trace_print_bputs_msg_only(iter);
4418
4419         if (iter->ent->type == TRACE_BPRINT &&
4420                         trace_flags & TRACE_ITER_PRINTK &&
4421                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4422                 return trace_print_bprintk_msg_only(iter);
4423
4424         if (iter->ent->type == TRACE_PRINT &&
4425                         trace_flags & TRACE_ITER_PRINTK &&
4426                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4427                 return trace_print_printk_msg_only(iter);
4428
4429         if (trace_flags & TRACE_ITER_BIN)
4430                 return print_bin_fmt(iter);
4431
4432         if (trace_flags & TRACE_ITER_HEX)
4433                 return print_hex_fmt(iter);
4434
4435         if (trace_flags & TRACE_ITER_RAW)
4436                 return print_raw_fmt(iter);
4437
4438         return print_trace_fmt(iter);
4439 }
4440
4441 void trace_latency_header(struct seq_file *m)
4442 {
4443         struct trace_iterator *iter = m->private;
4444         struct trace_array *tr = iter->tr;
4445
4446         /* print nothing if the buffers are empty */
4447         if (trace_empty(iter))
4448                 return;
4449
4450         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4451                 print_trace_header(m, iter);
4452
4453         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4454                 print_lat_help_header(m);
4455 }
4456
4457 void trace_default_header(struct seq_file *m)
4458 {
4459         struct trace_iterator *iter = m->private;
4460         struct trace_array *tr = iter->tr;
4461         unsigned long trace_flags = tr->trace_flags;
4462
4463         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4464                 return;
4465
4466         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4467                 /* print nothing if the buffers are empty */
4468                 if (trace_empty(iter))
4469                         return;
4470                 print_trace_header(m, iter);
4471                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4472                         print_lat_help_header(m);
4473         } else {
4474                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4475                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4476                                 print_func_help_header_irq(iter->array_buffer,
4477                                                            m, trace_flags);
4478                         else
4479                                 print_func_help_header(iter->array_buffer, m,
4480                                                        trace_flags);
4481                 }
4482         }
4483 }
4484
4485 static void test_ftrace_alive(struct seq_file *m)
4486 {
4487         if (!ftrace_is_dead())
4488                 return;
4489         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4490                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4491 }
4492
4493 #ifdef CONFIG_TRACER_MAX_TRACE
4494 static void show_snapshot_main_help(struct seq_file *m)
4495 {
4496         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4497                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4498                     "#                      Takes a snapshot of the main buffer.\n"
4499                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4500                     "#                      (Doesn't have to be '2' works with any number that\n"
4501                     "#                       is not a '0' or '1')\n");
4502 }
4503
4504 static void show_snapshot_percpu_help(struct seq_file *m)
4505 {
4506         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4507 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4508         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4509                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4510 #else
4511         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4512                     "#                     Must use main snapshot file to allocate.\n");
4513 #endif
4514         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4515                     "#                      (Doesn't have to be '2' works with any number that\n"
4516                     "#                       is not a '0' or '1')\n");
4517 }
4518
4519 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4520 {
4521         if (iter->tr->allocated_snapshot)
4522                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4523         else
4524                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4525
4526         seq_puts(m, "# Snapshot commands:\n");
4527         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4528                 show_snapshot_main_help(m);
4529         else
4530                 show_snapshot_percpu_help(m);
4531 }
4532 #else
4533 /* Should never be called */
4534 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4535 #endif
4536
4537 static int s_show(struct seq_file *m, void *v)
4538 {
4539         struct trace_iterator *iter = v;
4540         int ret;
4541
4542         if (iter->ent == NULL) {
4543                 if (iter->tr) {
4544                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4545                         seq_puts(m, "#\n");
4546                         test_ftrace_alive(m);
4547                 }
4548                 if (iter->snapshot && trace_empty(iter))
4549                         print_snapshot_help(m, iter);
4550                 else if (iter->trace && iter->trace->print_header)
4551                         iter->trace->print_header(m);
4552                 else
4553                         trace_default_header(m);
4554
4555         } else if (iter->leftover) {
4556                 /*
4557                  * If we filled the seq_file buffer earlier, we
4558                  * want to just show it now.
4559                  */
4560                 ret = trace_print_seq(m, &iter->seq);
4561
4562                 /* ret should this time be zero, but you never know */
4563                 iter->leftover = ret;
4564
4565         } else {
4566                 print_trace_line(iter);
4567                 ret = trace_print_seq(m, &iter->seq);
4568                 /*
4569                  * If we overflow the seq_file buffer, then it will
4570                  * ask us for this data again at start up.
4571                  * Use that instead.
4572                  *  ret is 0 if seq_file write succeeded.
4573                  *        -1 otherwise.
4574                  */
4575                 iter->leftover = ret;
4576         }
4577
4578         return 0;
4579 }
4580
4581 /*
4582  * Should be used after trace_array_get(), trace_types_lock
4583  * ensures that i_cdev was already initialized.
4584  */
4585 static inline int tracing_get_cpu(struct inode *inode)
4586 {
4587         if (inode->i_cdev) /* See trace_create_cpu_file() */
4588                 return (long)inode->i_cdev - 1;
4589         return RING_BUFFER_ALL_CPUS;
4590 }
4591
4592 static const struct seq_operations tracer_seq_ops = {
4593         .start          = s_start,
4594         .next           = s_next,
4595         .stop           = s_stop,
4596         .show           = s_show,
4597 };
4598
4599 static struct trace_iterator *
4600 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4601 {
4602         struct trace_array *tr = inode->i_private;
4603         struct trace_iterator *iter;
4604         int cpu;
4605
4606         if (tracing_disabled)
4607                 return ERR_PTR(-ENODEV);
4608
4609         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4610         if (!iter)
4611                 return ERR_PTR(-ENOMEM);
4612
4613         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4614                                     GFP_KERNEL);
4615         if (!iter->buffer_iter)
4616                 goto release;
4617
4618         /*
4619          * trace_find_next_entry() may need to save off iter->ent.
4620          * It will place it into the iter->temp buffer. As most
4621          * events are less than 128, allocate a buffer of that size.
4622          * If one is greater, then trace_find_next_entry() will
4623          * allocate a new buffer to adjust for the bigger iter->ent.
4624          * It's not critical if it fails to get allocated here.
4625          */
4626         iter->temp = kmalloc(128, GFP_KERNEL);
4627         if (iter->temp)
4628                 iter->temp_size = 128;
4629
4630         /*
4631          * trace_event_printf() may need to modify given format
4632          * string to replace %p with %px so that it shows real address
4633          * instead of hash value. However, that is only for the event
4634          * tracing, other tracer may not need. Defer the allocation
4635          * until it is needed.
4636          */
4637         iter->fmt = NULL;
4638         iter->fmt_size = 0;
4639
4640         /*
4641          * We make a copy of the current tracer to avoid concurrent
4642          * changes on it while we are reading.
4643          */
4644         mutex_lock(&trace_types_lock);
4645         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4646         if (!iter->trace)
4647                 goto fail;
4648
4649         *iter->trace = *tr->current_trace;
4650
4651         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4652                 goto fail;
4653
4654         iter->tr = tr;
4655
4656 #ifdef CONFIG_TRACER_MAX_TRACE
4657         /* Currently only the top directory has a snapshot */
4658         if (tr->current_trace->print_max || snapshot)
4659                 iter->array_buffer = &tr->max_buffer;
4660         else
4661 #endif
4662                 iter->array_buffer = &tr->array_buffer;
4663         iter->snapshot = snapshot;
4664         iter->pos = -1;
4665         iter->cpu_file = tracing_get_cpu(inode);
4666         mutex_init(&iter->mutex);
4667
4668         /* Notify the tracer early; before we stop tracing. */
4669         if (iter->trace->open)
4670                 iter->trace->open(iter);
4671
4672         /* Annotate start of buffers if we had overruns */
4673         if (ring_buffer_overruns(iter->array_buffer->buffer))
4674                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4675
4676         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4677         if (trace_clocks[tr->clock_id].in_ns)
4678                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4679
4680         /*
4681          * If pause-on-trace is enabled, then stop the trace while
4682          * dumping, unless this is the "snapshot" file
4683          */
4684         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4685                 tracing_stop_tr(tr);
4686
4687         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4688                 for_each_tracing_cpu(cpu) {
4689                         iter->buffer_iter[cpu] =
4690                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4691                                                          cpu, GFP_KERNEL);
4692                 }
4693                 ring_buffer_read_prepare_sync();
4694                 for_each_tracing_cpu(cpu) {
4695                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4696                         tracing_iter_reset(iter, cpu);
4697                 }
4698         } else {
4699                 cpu = iter->cpu_file;
4700                 iter->buffer_iter[cpu] =
4701                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4702                                                  cpu, GFP_KERNEL);
4703                 ring_buffer_read_prepare_sync();
4704                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4705                 tracing_iter_reset(iter, cpu);
4706         }
4707
4708         mutex_unlock(&trace_types_lock);
4709
4710         return iter;
4711
4712  fail:
4713         mutex_unlock(&trace_types_lock);
4714         kfree(iter->trace);
4715         kfree(iter->temp);
4716         kfree(iter->buffer_iter);
4717 release:
4718         seq_release_private(inode, file);
4719         return ERR_PTR(-ENOMEM);
4720 }
4721
4722 int tracing_open_generic(struct inode *inode, struct file *filp)
4723 {
4724         int ret;
4725
4726         ret = tracing_check_open_get_tr(NULL);
4727         if (ret)
4728                 return ret;
4729
4730         filp->private_data = inode->i_private;
4731         return 0;
4732 }
4733
4734 bool tracing_is_disabled(void)
4735 {
4736         return (tracing_disabled) ? true: false;
4737 }
4738
4739 /*
4740  * Open and update trace_array ref count.
4741  * Must have the current trace_array passed to it.
4742  */
4743 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4744 {
4745         struct trace_array *tr = inode->i_private;
4746         int ret;
4747
4748         ret = tracing_check_open_get_tr(tr);
4749         if (ret)
4750                 return ret;
4751
4752         filp->private_data = inode->i_private;
4753
4754         return 0;
4755 }
4756
4757 static int tracing_release(struct inode *inode, struct file *file)
4758 {
4759         struct trace_array *tr = inode->i_private;
4760         struct seq_file *m = file->private_data;
4761         struct trace_iterator *iter;
4762         int cpu;
4763
4764         if (!(file->f_mode & FMODE_READ)) {
4765                 trace_array_put(tr);
4766                 return 0;
4767         }
4768
4769         /* Writes do not use seq_file */
4770         iter = m->private;
4771         mutex_lock(&trace_types_lock);
4772
4773         for_each_tracing_cpu(cpu) {
4774                 if (iter->buffer_iter[cpu])
4775                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4776         }
4777
4778         if (iter->trace && iter->trace->close)
4779                 iter->trace->close(iter);
4780
4781         if (!iter->snapshot && tr->stop_count)
4782                 /* reenable tracing if it was previously enabled */
4783                 tracing_start_tr(tr);
4784
4785         __trace_array_put(tr);
4786
4787         mutex_unlock(&trace_types_lock);
4788
4789         mutex_destroy(&iter->mutex);
4790         free_cpumask_var(iter->started);
4791         kfree(iter->fmt);
4792         kfree(iter->temp);
4793         kfree(iter->trace);
4794         kfree(iter->buffer_iter);
4795         seq_release_private(inode, file);
4796
4797         return 0;
4798 }
4799
4800 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4801 {
4802         struct trace_array *tr = inode->i_private;
4803
4804         trace_array_put(tr);
4805         return 0;
4806 }
4807
4808 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4809 {
4810         struct trace_array *tr = inode->i_private;
4811
4812         trace_array_put(tr);
4813
4814         return single_release(inode, file);
4815 }
4816
4817 static int tracing_open(struct inode *inode, struct file *file)
4818 {
4819         struct trace_array *tr = inode->i_private;
4820         struct trace_iterator *iter;
4821         int ret;
4822
4823         ret = tracing_check_open_get_tr(tr);
4824         if (ret)
4825                 return ret;
4826
4827         /* If this file was open for write, then erase contents */
4828         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4829                 int cpu = tracing_get_cpu(inode);
4830                 struct array_buffer *trace_buf = &tr->array_buffer;
4831
4832 #ifdef CONFIG_TRACER_MAX_TRACE
4833                 if (tr->current_trace->print_max)
4834                         trace_buf = &tr->max_buffer;
4835 #endif
4836
4837                 if (cpu == RING_BUFFER_ALL_CPUS)
4838                         tracing_reset_online_cpus(trace_buf);
4839                 else
4840                         tracing_reset_cpu(trace_buf, cpu);
4841         }
4842
4843         if (file->f_mode & FMODE_READ) {
4844                 iter = __tracing_open(inode, file, false);
4845                 if (IS_ERR(iter))
4846                         ret = PTR_ERR(iter);
4847                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4848                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4849         }
4850
4851         if (ret < 0)
4852                 trace_array_put(tr);
4853
4854         return ret;
4855 }
4856
4857 /*
4858  * Some tracers are not suitable for instance buffers.
4859  * A tracer is always available for the global array (toplevel)
4860  * or if it explicitly states that it is.
4861  */
4862 static bool
4863 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4864 {
4865         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4866 }
4867
4868 /* Find the next tracer that this trace array may use */
4869 static struct tracer *
4870 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4871 {
4872         while (t && !trace_ok_for_array(t, tr))
4873                 t = t->next;
4874
4875         return t;
4876 }
4877
4878 static void *
4879 t_next(struct seq_file *m, void *v, loff_t *pos)
4880 {
4881         struct trace_array *tr = m->private;
4882         struct tracer *t = v;
4883
4884         (*pos)++;
4885
4886         if (t)
4887                 t = get_tracer_for_array(tr, t->next);
4888
4889         return t;
4890 }
4891
4892 static void *t_start(struct seq_file *m, loff_t *pos)
4893 {
4894         struct trace_array *tr = m->private;
4895         struct tracer *t;
4896         loff_t l = 0;
4897
4898         mutex_lock(&trace_types_lock);
4899
4900         t = get_tracer_for_array(tr, trace_types);
4901         for (; t && l < *pos; t = t_next(m, t, &l))
4902                         ;
4903
4904         return t;
4905 }
4906
4907 static void t_stop(struct seq_file *m, void *p)
4908 {
4909         mutex_unlock(&trace_types_lock);
4910 }
4911
4912 static int t_show(struct seq_file *m, void *v)
4913 {
4914         struct tracer *t = v;
4915
4916         if (!t)
4917                 return 0;
4918
4919         seq_puts(m, t->name);
4920         if (t->next)
4921                 seq_putc(m, ' ');
4922         else
4923                 seq_putc(m, '\n');
4924
4925         return 0;
4926 }
4927
4928 static const struct seq_operations show_traces_seq_ops = {
4929         .start          = t_start,
4930         .next           = t_next,
4931         .stop           = t_stop,
4932         .show           = t_show,
4933 };
4934
4935 static int show_traces_open(struct inode *inode, struct file *file)
4936 {
4937         struct trace_array *tr = inode->i_private;
4938         struct seq_file *m;
4939         int ret;
4940
4941         ret = tracing_check_open_get_tr(tr);
4942         if (ret)
4943                 return ret;
4944
4945         ret = seq_open(file, &show_traces_seq_ops);
4946         if (ret) {
4947                 trace_array_put(tr);
4948                 return ret;
4949         }
4950
4951         m = file->private_data;
4952         m->private = tr;
4953
4954         return 0;
4955 }
4956
4957 static int show_traces_release(struct inode *inode, struct file *file)
4958 {
4959         struct trace_array *tr = inode->i_private;
4960
4961         trace_array_put(tr);
4962         return seq_release(inode, file);
4963 }
4964
4965 static ssize_t
4966 tracing_write_stub(struct file *filp, const char __user *ubuf,
4967                    size_t count, loff_t *ppos)
4968 {
4969         return count;
4970 }
4971
4972 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4973 {
4974         int ret;
4975
4976         if (file->f_mode & FMODE_READ)
4977                 ret = seq_lseek(file, offset, whence);
4978         else
4979                 file->f_pos = ret = 0;
4980
4981         return ret;
4982 }
4983
4984 static const struct file_operations tracing_fops = {
4985         .open           = tracing_open,
4986         .read           = seq_read,
4987         .write          = tracing_write_stub,
4988         .llseek         = tracing_lseek,
4989         .release        = tracing_release,
4990 };
4991
4992 static const struct file_operations show_traces_fops = {
4993         .open           = show_traces_open,
4994         .read           = seq_read,
4995         .llseek         = seq_lseek,
4996         .release        = show_traces_release,
4997 };
4998
4999 static ssize_t
5000 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5001                      size_t count, loff_t *ppos)
5002 {
5003         struct trace_array *tr = file_inode(filp)->i_private;
5004         char *mask_str;
5005         int len;
5006
5007         len = snprintf(NULL, 0, "%*pb\n",
5008                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5009         mask_str = kmalloc(len, GFP_KERNEL);
5010         if (!mask_str)
5011                 return -ENOMEM;
5012
5013         len = snprintf(mask_str, len, "%*pb\n",
5014                        cpumask_pr_args(tr->tracing_cpumask));
5015         if (len >= count) {
5016                 count = -EINVAL;
5017                 goto out_err;
5018         }
5019         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5020
5021 out_err:
5022         kfree(mask_str);
5023
5024         return count;
5025 }
5026
5027 int tracing_set_cpumask(struct trace_array *tr,
5028                         cpumask_var_t tracing_cpumask_new)
5029 {
5030         int cpu;
5031
5032         if (!tr)
5033                 return -EINVAL;
5034
5035         local_irq_disable();
5036         arch_spin_lock(&tr->max_lock);
5037         for_each_tracing_cpu(cpu) {
5038                 /*
5039                  * Increase/decrease the disabled counter if we are
5040                  * about to flip a bit in the cpumask:
5041                  */
5042                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5043                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5044                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5045                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5046                 }
5047                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5048                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5049                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5050                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5051                 }
5052         }
5053         arch_spin_unlock(&tr->max_lock);
5054         local_irq_enable();
5055
5056         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5057
5058         return 0;
5059 }
5060
5061 static ssize_t
5062 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5063                       size_t count, loff_t *ppos)
5064 {
5065         struct trace_array *tr = file_inode(filp)->i_private;
5066         cpumask_var_t tracing_cpumask_new;
5067         int err;
5068
5069         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5070                 return -ENOMEM;
5071
5072         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5073         if (err)
5074                 goto err_free;
5075
5076         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5077         if (err)
5078                 goto err_free;
5079
5080         free_cpumask_var(tracing_cpumask_new);
5081
5082         return count;
5083
5084 err_free:
5085         free_cpumask_var(tracing_cpumask_new);
5086
5087         return err;
5088 }
5089
5090 static const struct file_operations tracing_cpumask_fops = {
5091         .open           = tracing_open_generic_tr,
5092         .read           = tracing_cpumask_read,
5093         .write          = tracing_cpumask_write,
5094         .release        = tracing_release_generic_tr,
5095         .llseek         = generic_file_llseek,
5096 };
5097
5098 static int tracing_trace_options_show(struct seq_file *m, void *v)
5099 {
5100         struct tracer_opt *trace_opts;
5101         struct trace_array *tr = m->private;
5102         u32 tracer_flags;
5103         int i;
5104
5105         mutex_lock(&trace_types_lock);
5106         tracer_flags = tr->current_trace->flags->val;
5107         trace_opts = tr->current_trace->flags->opts;
5108
5109         for (i = 0; trace_options[i]; i++) {
5110                 if (tr->trace_flags & (1 << i))
5111                         seq_printf(m, "%s\n", trace_options[i]);
5112                 else
5113                         seq_printf(m, "no%s\n", trace_options[i]);
5114         }
5115
5116         for (i = 0; trace_opts[i].name; i++) {
5117                 if (tracer_flags & trace_opts[i].bit)
5118                         seq_printf(m, "%s\n", trace_opts[i].name);
5119                 else
5120                         seq_printf(m, "no%s\n", trace_opts[i].name);
5121         }
5122         mutex_unlock(&trace_types_lock);
5123
5124         return 0;
5125 }
5126
5127 static int __set_tracer_option(struct trace_array *tr,
5128                                struct tracer_flags *tracer_flags,
5129                                struct tracer_opt *opts, int neg)
5130 {
5131         struct tracer *trace = tracer_flags->trace;
5132         int ret;
5133
5134         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5135         if (ret)
5136                 return ret;
5137
5138         if (neg)
5139                 tracer_flags->val &= ~opts->bit;
5140         else
5141                 tracer_flags->val |= opts->bit;
5142         return 0;
5143 }
5144
5145 /* Try to assign a tracer specific option */
5146 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5147 {
5148         struct tracer *trace = tr->current_trace;
5149         struct tracer_flags *tracer_flags = trace->flags;
5150         struct tracer_opt *opts = NULL;
5151         int i;
5152
5153         for (i = 0; tracer_flags->opts[i].name; i++) {
5154                 opts = &tracer_flags->opts[i];
5155
5156                 if (strcmp(cmp, opts->name) == 0)
5157                         return __set_tracer_option(tr, trace->flags, opts, neg);
5158         }
5159
5160         return -EINVAL;
5161 }
5162
5163 /* Some tracers require overwrite to stay enabled */
5164 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5165 {
5166         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5167                 return -1;
5168
5169         return 0;
5170 }
5171
5172 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5173 {
5174         if ((mask == TRACE_ITER_RECORD_TGID) ||
5175             (mask == TRACE_ITER_RECORD_CMD))
5176                 lockdep_assert_held(&event_mutex);
5177
5178         /* do nothing if flag is already set */
5179         if (!!(tr->trace_flags & mask) == !!enabled)
5180                 return 0;
5181
5182         /* Give the tracer a chance to approve the change */
5183         if (tr->current_trace->flag_changed)
5184                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5185                         return -EINVAL;
5186
5187         if (enabled)
5188                 tr->trace_flags |= mask;
5189         else
5190                 tr->trace_flags &= ~mask;
5191
5192         if (mask == TRACE_ITER_RECORD_CMD)
5193                 trace_event_enable_cmd_record(enabled);
5194
5195         if (mask == TRACE_ITER_RECORD_TGID) {
5196                 if (!tgid_map)
5197                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5198                                            sizeof(*tgid_map),
5199                                            GFP_KERNEL);
5200                 if (!tgid_map) {
5201                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5202                         return -ENOMEM;
5203                 }
5204
5205                 trace_event_enable_tgid_record(enabled);
5206         }
5207
5208         if (mask == TRACE_ITER_EVENT_FORK)
5209                 trace_event_follow_fork(tr, enabled);
5210
5211         if (mask == TRACE_ITER_FUNC_FORK)
5212                 ftrace_pid_follow_fork(tr, enabled);
5213
5214         if (mask == TRACE_ITER_OVERWRITE) {
5215                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5216 #ifdef CONFIG_TRACER_MAX_TRACE
5217                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5218 #endif
5219         }
5220
5221         if (mask == TRACE_ITER_PRINTK) {
5222                 trace_printk_start_stop_comm(enabled);
5223                 trace_printk_control(enabled);
5224         }
5225
5226         return 0;
5227 }
5228
5229 int trace_set_options(struct trace_array *tr, char *option)
5230 {
5231         char *cmp;
5232         int neg = 0;
5233         int ret;
5234         size_t orig_len = strlen(option);
5235         int len;
5236
5237         cmp = strstrip(option);
5238
5239         len = str_has_prefix(cmp, "no");
5240         if (len)
5241                 neg = 1;
5242
5243         cmp += len;
5244
5245         mutex_lock(&event_mutex);
5246         mutex_lock(&trace_types_lock);
5247
5248         ret = match_string(trace_options, -1, cmp);
5249         /* If no option could be set, test the specific tracer options */
5250         if (ret < 0)
5251                 ret = set_tracer_option(tr, cmp, neg);
5252         else
5253                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5254
5255         mutex_unlock(&trace_types_lock);
5256         mutex_unlock(&event_mutex);
5257
5258         /*
5259          * If the first trailing whitespace is replaced with '\0' by strstrip,
5260          * turn it back into a space.
5261          */
5262         if (orig_len > strlen(option))
5263                 option[strlen(option)] = ' ';
5264
5265         return ret;
5266 }
5267
5268 static void __init apply_trace_boot_options(void)
5269 {
5270         char *buf = trace_boot_options_buf;
5271         char *option;
5272
5273         while (true) {
5274                 option = strsep(&buf, ",");
5275
5276                 if (!option)
5277                         break;
5278
5279                 if (*option)
5280                         trace_set_options(&global_trace, option);
5281
5282                 /* Put back the comma to allow this to be called again */
5283                 if (buf)
5284                         *(buf - 1) = ',';
5285         }
5286 }
5287
5288 static ssize_t
5289 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5290                         size_t cnt, loff_t *ppos)
5291 {
5292         struct seq_file *m = filp->private_data;
5293         struct trace_array *tr = m->private;
5294         char buf[64];
5295         int ret;
5296
5297         if (cnt >= sizeof(buf))
5298                 return -EINVAL;
5299
5300         if (copy_from_user(buf, ubuf, cnt))
5301                 return -EFAULT;
5302
5303         buf[cnt] = 0;
5304
5305         ret = trace_set_options(tr, buf);
5306         if (ret < 0)
5307                 return ret;
5308
5309         *ppos += cnt;
5310
5311         return cnt;
5312 }
5313
5314 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5315 {
5316         struct trace_array *tr = inode->i_private;
5317         int ret;
5318
5319         ret = tracing_check_open_get_tr(tr);
5320         if (ret)
5321                 return ret;
5322
5323         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5324         if (ret < 0)
5325                 trace_array_put(tr);
5326
5327         return ret;
5328 }
5329
5330 static const struct file_operations tracing_iter_fops = {
5331         .open           = tracing_trace_options_open,
5332         .read           = seq_read,
5333         .llseek         = seq_lseek,
5334         .release        = tracing_single_release_tr,
5335         .write          = tracing_trace_options_write,
5336 };
5337
5338 static const char readme_msg[] =
5339         "tracing mini-HOWTO:\n\n"
5340         "# echo 0 > tracing_on : quick way to disable tracing\n"
5341         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5342         " Important files:\n"
5343         "  trace\t\t\t- The static contents of the buffer\n"
5344         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5345         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5346         "  current_tracer\t- function and latency tracers\n"
5347         "  available_tracers\t- list of configured tracers for current_tracer\n"
5348         "  error_log\t- error log for failed commands (that support it)\n"
5349         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5350         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5351         "  trace_clock\t\t-change the clock used to order events\n"
5352         "       local:   Per cpu clock but may not be synced across CPUs\n"
5353         "      global:   Synced across CPUs but slows tracing down.\n"
5354         "     counter:   Not a clock, but just an increment\n"
5355         "      uptime:   Jiffy counter from time of boot\n"
5356         "        perf:   Same clock that perf events use\n"
5357 #ifdef CONFIG_X86_64
5358         "     x86-tsc:   TSC cycle counter\n"
5359 #endif
5360         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5361         "       delta:   Delta difference against a buffer-wide timestamp\n"
5362         "    absolute:   Absolute (standalone) timestamp\n"
5363         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5364         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5365         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5366         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5367         "\t\t\t  Remove sub-buffer with rmdir\n"
5368         "  trace_options\t\t- Set format or modify how tracing happens\n"
5369         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5370         "\t\t\t  option name\n"
5371         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5372 #ifdef CONFIG_DYNAMIC_FTRACE
5373         "\n  available_filter_functions - list of functions that can be filtered on\n"
5374         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5375         "\t\t\t  functions\n"
5376         "\t     accepts: func_full_name or glob-matching-pattern\n"
5377         "\t     modules: Can select a group via module\n"
5378         "\t      Format: :mod:<module-name>\n"
5379         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5380         "\t    triggers: a command to perform when function is hit\n"
5381         "\t      Format: <function>:<trigger>[:count]\n"
5382         "\t     trigger: traceon, traceoff\n"
5383         "\t\t      enable_event:<system>:<event>\n"
5384         "\t\t      disable_event:<system>:<event>\n"
5385 #ifdef CONFIG_STACKTRACE
5386         "\t\t      stacktrace\n"
5387 #endif
5388 #ifdef CONFIG_TRACER_SNAPSHOT
5389         "\t\t      snapshot\n"
5390 #endif
5391         "\t\t      dump\n"
5392         "\t\t      cpudump\n"
5393         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5394         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5395         "\t     The first one will disable tracing every time do_fault is hit\n"
5396         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5397         "\t       The first time do trap is hit and it disables tracing, the\n"
5398         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5399         "\t       the counter will not decrement. It only decrements when the\n"
5400         "\t       trigger did work\n"
5401         "\t     To remove trigger without count:\n"
5402         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5403         "\t     To remove trigger with a count:\n"
5404         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5405         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5406         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5407         "\t    modules: Can select a group via module command :mod:\n"
5408         "\t    Does not accept triggers\n"
5409 #endif /* CONFIG_DYNAMIC_FTRACE */
5410 #ifdef CONFIG_FUNCTION_TRACER
5411         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5412         "\t\t    (function)\n"
5413         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5414         "\t\t    (function)\n"
5415 #endif
5416 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5417         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5418         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5419         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5420 #endif
5421 #ifdef CONFIG_TRACER_SNAPSHOT
5422         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5423         "\t\t\t  snapshot buffer. Read the contents for more\n"
5424         "\t\t\t  information\n"
5425 #endif
5426 #ifdef CONFIG_STACK_TRACER
5427         "  stack_trace\t\t- Shows the max stack trace when active\n"
5428         "  stack_max_size\t- Shows current max stack size that was traced\n"
5429         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5430         "\t\t\t  new trace)\n"
5431 #ifdef CONFIG_DYNAMIC_FTRACE
5432         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5433         "\t\t\t  traces\n"
5434 #endif
5435 #endif /* CONFIG_STACK_TRACER */
5436 #ifdef CONFIG_DYNAMIC_EVENTS
5437         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5438         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5439 #endif
5440 #ifdef CONFIG_KPROBE_EVENTS
5441         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5442         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5443 #endif
5444 #ifdef CONFIG_UPROBE_EVENTS
5445         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5446         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5447 #endif
5448 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5449         "\t  accepts: event-definitions (one definition per line)\n"
5450         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5451         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5452 #ifdef CONFIG_HIST_TRIGGERS
5453         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5454 #endif
5455         "\t           -:[<group>/]<event>\n"
5456 #ifdef CONFIG_KPROBE_EVENTS
5457         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5458   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5459 #endif
5460 #ifdef CONFIG_UPROBE_EVENTS
5461   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5462 #endif
5463         "\t     args: <name>=fetcharg[:type]\n"
5464         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5465 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5466         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5467 #else
5468         "\t           $stack<index>, $stack, $retval, $comm,\n"
5469 #endif
5470         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5471         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5472         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5473         "\t           <type>\\[<array-size>\\]\n"
5474 #ifdef CONFIG_HIST_TRIGGERS
5475         "\t    field: <stype> <name>;\n"
5476         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5477         "\t           [unsigned] char/int/long\n"
5478 #endif
5479 #endif
5480         "  events/\t\t- Directory containing all trace event subsystems:\n"
5481         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5482         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5483         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5484         "\t\t\t  events\n"
5485         "      filter\t\t- If set, only events passing filter are traced\n"
5486         "  events/<system>/<event>/\t- Directory containing control files for\n"
5487         "\t\t\t  <event>:\n"
5488         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5489         "      filter\t\t- If set, only events passing filter are traced\n"
5490         "      trigger\t\t- If set, a command to perform when event is hit\n"
5491         "\t    Format: <trigger>[:count][if <filter>]\n"
5492         "\t   trigger: traceon, traceoff\n"
5493         "\t            enable_event:<system>:<event>\n"
5494         "\t            disable_event:<system>:<event>\n"
5495 #ifdef CONFIG_HIST_TRIGGERS
5496         "\t            enable_hist:<system>:<event>\n"
5497         "\t            disable_hist:<system>:<event>\n"
5498 #endif
5499 #ifdef CONFIG_STACKTRACE
5500         "\t\t    stacktrace\n"
5501 #endif
5502 #ifdef CONFIG_TRACER_SNAPSHOT
5503         "\t\t    snapshot\n"
5504 #endif
5505 #ifdef CONFIG_HIST_TRIGGERS
5506         "\t\t    hist (see below)\n"
5507 #endif
5508         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5509         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5510         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5511         "\t                  events/block/block_unplug/trigger\n"
5512         "\t   The first disables tracing every time block_unplug is hit.\n"
5513         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5514         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5515         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5516         "\t   Like function triggers, the counter is only decremented if it\n"
5517         "\t    enabled or disabled tracing.\n"
5518         "\t   To remove a trigger without a count:\n"
5519         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5520         "\t   To remove a trigger with a count:\n"
5521         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5522         "\t   Filters can be ignored when removing a trigger.\n"
5523 #ifdef CONFIG_HIST_TRIGGERS
5524         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5525         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5526         "\t            [:values=<field1[,field2,...]>]\n"
5527         "\t            [:sort=<field1[,field2,...]>]\n"
5528         "\t            [:size=#entries]\n"
5529         "\t            [:pause][:continue][:clear]\n"
5530         "\t            [:name=histname1]\n"
5531         "\t            [:<handler>.<action>]\n"
5532         "\t            [if <filter>]\n\n"
5533         "\t    When a matching event is hit, an entry is added to a hash\n"
5534         "\t    table using the key(s) and value(s) named, and the value of a\n"
5535         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5536         "\t    correspond to fields in the event's format description.  Keys\n"
5537         "\t    can be any field, or the special string 'stacktrace'.\n"
5538         "\t    Compound keys consisting of up to two fields can be specified\n"
5539         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5540         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5541         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5542         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5543         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5544         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5545         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5546         "\t    its histogram data will be shared with other triggers of the\n"
5547         "\t    same name, and trigger hits will update this common data.\n\n"
5548         "\t    Reading the 'hist' file for the event will dump the hash\n"
5549         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5550         "\t    triggers attached to an event, there will be a table for each\n"
5551         "\t    trigger in the output.  The table displayed for a named\n"
5552         "\t    trigger will be the same as any other instance having the\n"
5553         "\t    same name.  The default format used to display a given field\n"
5554         "\t    can be modified by appending any of the following modifiers\n"
5555         "\t    to the field name, as applicable:\n\n"
5556         "\t            .hex        display a number as a hex value\n"
5557         "\t            .sym        display an address as a symbol\n"
5558         "\t            .sym-offset display an address as a symbol and offset\n"
5559         "\t            .execname   display a common_pid as a program name\n"
5560         "\t            .syscall    display a syscall id as a syscall name\n"
5561         "\t            .log2       display log2 value rather than raw number\n"
5562         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5563         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5564         "\t    trigger or to start a hist trigger but not log any events\n"
5565         "\t    until told to do so.  'continue' can be used to start or\n"
5566         "\t    restart a paused hist trigger.\n\n"
5567         "\t    The 'clear' parameter will clear the contents of a running\n"
5568         "\t    hist trigger and leave its current paused/active state\n"
5569         "\t    unchanged.\n\n"
5570         "\t    The enable_hist and disable_hist triggers can be used to\n"
5571         "\t    have one event conditionally start and stop another event's\n"
5572         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5573         "\t    the enable_event and disable_event triggers.\n\n"
5574         "\t    Hist trigger handlers and actions are executed whenever a\n"
5575         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5576         "\t        <handler>.<action>\n\n"
5577         "\t    The available handlers are:\n\n"
5578         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5579         "\t        onmax(var)               - invoke if var exceeds current max\n"
5580         "\t        onchange(var)            - invoke action if var changes\n\n"
5581         "\t    The available actions are:\n\n"
5582         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5583         "\t        save(field,...)                      - save current event fields\n"
5584 #ifdef CONFIG_TRACER_SNAPSHOT
5585         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5586 #endif
5587 #ifdef CONFIG_SYNTH_EVENTS
5588         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5589         "\t  Write into this file to define/undefine new synthetic events.\n"
5590         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5591 #endif
5592 #endif
5593 ;
5594
5595 static ssize_t
5596 tracing_readme_read(struct file *filp, char __user *ubuf,
5597                        size_t cnt, loff_t *ppos)
5598 {
5599         return simple_read_from_buffer(ubuf, cnt, ppos,
5600                                         readme_msg, strlen(readme_msg));
5601 }
5602
5603 static const struct file_operations tracing_readme_fops = {
5604         .open           = tracing_open_generic,
5605         .read           = tracing_readme_read,
5606         .llseek         = generic_file_llseek,
5607 };
5608
5609 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5610 {
5611         int *ptr = v;
5612
5613         if (*pos || m->count)
5614                 ptr++;
5615
5616         (*pos)++;
5617
5618         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5619                 if (trace_find_tgid(*ptr))
5620                         return ptr;
5621         }
5622
5623         return NULL;
5624 }
5625
5626 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5627 {
5628         void *v;
5629         loff_t l = 0;
5630
5631         if (!tgid_map)
5632                 return NULL;
5633
5634         v = &tgid_map[0];
5635         while (l <= *pos) {
5636                 v = saved_tgids_next(m, v, &l);
5637                 if (!v)
5638                         return NULL;
5639         }
5640
5641         return v;
5642 }
5643
5644 static void saved_tgids_stop(struct seq_file *m, void *v)
5645 {
5646 }
5647
5648 static int saved_tgids_show(struct seq_file *m, void *v)
5649 {
5650         int pid = (int *)v - tgid_map;
5651
5652         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5653         return 0;
5654 }
5655
5656 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5657         .start          = saved_tgids_start,
5658         .stop           = saved_tgids_stop,
5659         .next           = saved_tgids_next,
5660         .show           = saved_tgids_show,
5661 };
5662
5663 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5664 {
5665         int ret;
5666
5667         ret = tracing_check_open_get_tr(NULL);
5668         if (ret)
5669                 return ret;
5670
5671         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5672 }
5673
5674
5675 static const struct file_operations tracing_saved_tgids_fops = {
5676         .open           = tracing_saved_tgids_open,
5677         .read           = seq_read,
5678         .llseek         = seq_lseek,
5679         .release        = seq_release,
5680 };
5681
5682 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5683 {
5684         unsigned int *ptr = v;
5685
5686         if (*pos || m->count)
5687                 ptr++;
5688
5689         (*pos)++;
5690
5691         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5692              ptr++) {
5693                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5694                         continue;
5695
5696                 return ptr;
5697         }
5698
5699         return NULL;
5700 }
5701
5702 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5703 {
5704         void *v;
5705         loff_t l = 0;
5706
5707         preempt_disable();
5708         arch_spin_lock(&trace_cmdline_lock);
5709
5710         v = &savedcmd->map_cmdline_to_pid[0];
5711         while (l <= *pos) {
5712                 v = saved_cmdlines_next(m, v, &l);
5713                 if (!v)
5714                         return NULL;
5715         }
5716
5717         return v;
5718 }
5719
5720 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5721 {
5722         arch_spin_unlock(&trace_cmdline_lock);
5723         preempt_enable();
5724 }
5725
5726 static int saved_cmdlines_show(struct seq_file *m, void *v)
5727 {
5728         char buf[TASK_COMM_LEN];
5729         unsigned int *pid = v;
5730
5731         __trace_find_cmdline(*pid, buf);
5732         seq_printf(m, "%d %s\n", *pid, buf);
5733         return 0;
5734 }
5735
5736 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5737         .start          = saved_cmdlines_start,
5738         .next           = saved_cmdlines_next,
5739         .stop           = saved_cmdlines_stop,
5740         .show           = saved_cmdlines_show,
5741 };
5742
5743 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5744 {
5745         int ret;
5746
5747         ret = tracing_check_open_get_tr(NULL);
5748         if (ret)
5749                 return ret;
5750
5751         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5752 }
5753
5754 static const struct file_operations tracing_saved_cmdlines_fops = {
5755         .open           = tracing_saved_cmdlines_open,
5756         .read           = seq_read,
5757         .llseek         = seq_lseek,
5758         .release        = seq_release,
5759 };
5760
5761 static ssize_t
5762 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5763                                  size_t cnt, loff_t *ppos)
5764 {
5765         char buf[64];
5766         int r;
5767
5768         arch_spin_lock(&trace_cmdline_lock);
5769         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5770         arch_spin_unlock(&trace_cmdline_lock);
5771
5772         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5773 }
5774
5775 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5776 {
5777         kfree(s->saved_cmdlines);
5778         kfree(s->map_cmdline_to_pid);
5779         kfree(s);
5780 }
5781
5782 static int tracing_resize_saved_cmdlines(unsigned int val)
5783 {
5784         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5785
5786         s = kmalloc(sizeof(*s), GFP_KERNEL);
5787         if (!s)
5788                 return -ENOMEM;
5789
5790         if (allocate_cmdlines_buffer(val, s) < 0) {
5791                 kfree(s);
5792                 return -ENOMEM;
5793         }
5794
5795         arch_spin_lock(&trace_cmdline_lock);
5796         savedcmd_temp = savedcmd;
5797         savedcmd = s;
5798         arch_spin_unlock(&trace_cmdline_lock);
5799         free_saved_cmdlines_buffer(savedcmd_temp);
5800
5801         return 0;
5802 }
5803
5804 static ssize_t
5805 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5806                                   size_t cnt, loff_t *ppos)
5807 {
5808         unsigned long val;
5809         int ret;
5810
5811         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5812         if (ret)
5813                 return ret;
5814
5815         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5816         if (!val || val > PID_MAX_DEFAULT)
5817                 return -EINVAL;
5818
5819         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5820         if (ret < 0)
5821                 return ret;
5822
5823         *ppos += cnt;
5824
5825         return cnt;
5826 }
5827
5828 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5829         .open           = tracing_open_generic,
5830         .read           = tracing_saved_cmdlines_size_read,
5831         .write          = tracing_saved_cmdlines_size_write,
5832 };
5833
5834 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5835 static union trace_eval_map_item *
5836 update_eval_map(union trace_eval_map_item *ptr)
5837 {
5838         if (!ptr->map.eval_string) {
5839                 if (ptr->tail.next) {
5840                         ptr = ptr->tail.next;
5841                         /* Set ptr to the next real item (skip head) */
5842                         ptr++;
5843                 } else
5844                         return NULL;
5845         }
5846         return ptr;
5847 }
5848
5849 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5850 {
5851         union trace_eval_map_item *ptr = v;
5852
5853         /*
5854          * Paranoid! If ptr points to end, we don't want to increment past it.
5855          * This really should never happen.
5856          */
5857         (*pos)++;
5858         ptr = update_eval_map(ptr);
5859         if (WARN_ON_ONCE(!ptr))
5860                 return NULL;
5861
5862         ptr++;
5863         ptr = update_eval_map(ptr);
5864
5865         return ptr;
5866 }
5867
5868 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5869 {
5870         union trace_eval_map_item *v;
5871         loff_t l = 0;
5872
5873         mutex_lock(&trace_eval_mutex);
5874
5875         v = trace_eval_maps;
5876         if (v)
5877                 v++;
5878
5879         while (v && l < *pos) {
5880                 v = eval_map_next(m, v, &l);
5881         }
5882
5883         return v;
5884 }
5885
5886 static void eval_map_stop(struct seq_file *m, void *v)
5887 {
5888         mutex_unlock(&trace_eval_mutex);
5889 }
5890
5891 static int eval_map_show(struct seq_file *m, void *v)
5892 {
5893         union trace_eval_map_item *ptr = v;
5894
5895         seq_printf(m, "%s %ld (%s)\n",
5896                    ptr->map.eval_string, ptr->map.eval_value,
5897                    ptr->map.system);
5898
5899         return 0;
5900 }
5901
5902 static const struct seq_operations tracing_eval_map_seq_ops = {
5903         .start          = eval_map_start,
5904         .next           = eval_map_next,
5905         .stop           = eval_map_stop,
5906         .show           = eval_map_show,
5907 };
5908
5909 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5910 {
5911         int ret;
5912
5913         ret = tracing_check_open_get_tr(NULL);
5914         if (ret)
5915                 return ret;
5916
5917         return seq_open(filp, &tracing_eval_map_seq_ops);
5918 }
5919
5920 static const struct file_operations tracing_eval_map_fops = {
5921         .open           = tracing_eval_map_open,
5922         .read           = seq_read,
5923         .llseek         = seq_lseek,
5924         .release        = seq_release,
5925 };
5926
5927 static inline union trace_eval_map_item *
5928 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5929 {
5930         /* Return tail of array given the head */
5931         return ptr + ptr->head.length + 1;
5932 }
5933
5934 static void
5935 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5936                            int len)
5937 {
5938         struct trace_eval_map **stop;
5939         struct trace_eval_map **map;
5940         union trace_eval_map_item *map_array;
5941         union trace_eval_map_item *ptr;
5942
5943         stop = start + len;
5944
5945         /*
5946          * The trace_eval_maps contains the map plus a head and tail item,
5947          * where the head holds the module and length of array, and the
5948          * tail holds a pointer to the next list.
5949          */
5950         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5951         if (!map_array) {
5952                 pr_warn("Unable to allocate trace eval mapping\n");
5953                 return;
5954         }
5955
5956         mutex_lock(&trace_eval_mutex);
5957
5958         if (!trace_eval_maps)
5959                 trace_eval_maps = map_array;
5960         else {
5961                 ptr = trace_eval_maps;
5962                 for (;;) {
5963                         ptr = trace_eval_jmp_to_tail(ptr);
5964                         if (!ptr->tail.next)
5965                                 break;
5966                         ptr = ptr->tail.next;
5967
5968                 }
5969                 ptr->tail.next = map_array;
5970         }
5971         map_array->head.mod = mod;
5972         map_array->head.length = len;
5973         map_array++;
5974
5975         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5976                 map_array->map = **map;
5977                 map_array++;
5978         }
5979         memset(map_array, 0, sizeof(*map_array));
5980
5981         mutex_unlock(&trace_eval_mutex);
5982 }
5983
5984 static void trace_create_eval_file(struct dentry *d_tracer)
5985 {
5986         trace_create_file("eval_map", 0444, d_tracer,
5987                           NULL, &tracing_eval_map_fops);
5988 }
5989
5990 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5991 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5992 static inline void trace_insert_eval_map_file(struct module *mod,
5993                               struct trace_eval_map **start, int len) { }
5994 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5995
5996 static void trace_insert_eval_map(struct module *mod,
5997                                   struct trace_eval_map **start, int len)
5998 {
5999         struct trace_eval_map **map;
6000
6001         if (len <= 0)
6002                 return;
6003
6004         map = start;
6005
6006         trace_event_eval_update(map, len);
6007
6008         trace_insert_eval_map_file(mod, start, len);
6009 }
6010
6011 static ssize_t
6012 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6013                        size_t cnt, loff_t *ppos)
6014 {
6015         struct trace_array *tr = filp->private_data;
6016         char buf[MAX_TRACER_SIZE+2];
6017         int r;
6018
6019         mutex_lock(&trace_types_lock);
6020         r = sprintf(buf, "%s\n", tr->current_trace->name);
6021         mutex_unlock(&trace_types_lock);
6022
6023         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6024 }
6025
6026 int tracer_init(struct tracer *t, struct trace_array *tr)
6027 {
6028         tracing_reset_online_cpus(&tr->array_buffer);
6029         return t->init(tr);
6030 }
6031
6032 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6033 {
6034         int cpu;
6035
6036         for_each_tracing_cpu(cpu)
6037                 per_cpu_ptr(buf->data, cpu)->entries = val;
6038 }
6039
6040 #ifdef CONFIG_TRACER_MAX_TRACE
6041 /* resize @tr's buffer to the size of @size_tr's entries */
6042 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6043                                         struct array_buffer *size_buf, int cpu_id)
6044 {
6045         int cpu, ret = 0;
6046
6047         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6048                 for_each_tracing_cpu(cpu) {
6049                         ret = ring_buffer_resize(trace_buf->buffer,
6050                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6051                         if (ret < 0)
6052                                 break;
6053                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6054                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6055                 }
6056         } else {
6057                 ret = ring_buffer_resize(trace_buf->buffer,
6058                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6059                 if (ret == 0)
6060                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6061                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6062         }
6063
6064         return ret;
6065 }
6066 #endif /* CONFIG_TRACER_MAX_TRACE */
6067
6068 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6069                                         unsigned long size, int cpu)
6070 {
6071         int ret;
6072
6073         /*
6074          * If kernel or user changes the size of the ring buffer
6075          * we use the size that was given, and we can forget about
6076          * expanding it later.
6077          */
6078         ring_buffer_expanded = true;
6079
6080         /* May be called before buffers are initialized */
6081         if (!tr->array_buffer.buffer)
6082                 return 0;
6083
6084         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6085         if (ret < 0)
6086                 return ret;
6087
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6090             !tr->current_trace->use_max_tr)
6091                 goto out;
6092
6093         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6094         if (ret < 0) {
6095                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6096                                                      &tr->array_buffer, cpu);
6097                 if (r < 0) {
6098                         /*
6099                          * AARGH! We are left with different
6100                          * size max buffer!!!!
6101                          * The max buffer is our "snapshot" buffer.
6102                          * When a tracer needs a snapshot (one of the
6103                          * latency tracers), it swaps the max buffer
6104                          * with the saved snap shot. We succeeded to
6105                          * update the size of the main buffer, but failed to
6106                          * update the size of the max buffer. But when we tried
6107                          * to reset the main buffer to the original size, we
6108                          * failed there too. This is very unlikely to
6109                          * happen, but if it does, warn and kill all
6110                          * tracing.
6111                          */
6112                         WARN_ON(1);
6113                         tracing_disabled = 1;
6114                 }
6115                 return ret;
6116         }
6117
6118         if (cpu == RING_BUFFER_ALL_CPUS)
6119                 set_buffer_entries(&tr->max_buffer, size);
6120         else
6121                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6122
6123  out:
6124 #endif /* CONFIG_TRACER_MAX_TRACE */
6125
6126         if (cpu == RING_BUFFER_ALL_CPUS)
6127                 set_buffer_entries(&tr->array_buffer, size);
6128         else
6129                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6130
6131         return ret;
6132 }
6133
6134 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6135                                   unsigned long size, int cpu_id)
6136 {
6137         int ret = size;
6138
6139         mutex_lock(&trace_types_lock);
6140
6141         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6142                 /* make sure, this cpu is enabled in the mask */
6143                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6144                         ret = -EINVAL;
6145                         goto out;
6146                 }
6147         }
6148
6149         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6150         if (ret < 0)
6151                 ret = -ENOMEM;
6152
6153 out:
6154         mutex_unlock(&trace_types_lock);
6155
6156         return ret;
6157 }
6158
6159
6160 /**
6161  * tracing_update_buffers - used by tracing facility to expand ring buffers
6162  *
6163  * To save on memory when the tracing is never used on a system with it
6164  * configured in. The ring buffers are set to a minimum size. But once
6165  * a user starts to use the tracing facility, then they need to grow
6166  * to their default size.
6167  *
6168  * This function is to be called when a tracer is about to be used.
6169  */
6170 int tracing_update_buffers(void)
6171 {
6172         int ret = 0;
6173
6174         mutex_lock(&trace_types_lock);
6175         if (!ring_buffer_expanded)
6176                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6177                                                 RING_BUFFER_ALL_CPUS);
6178         mutex_unlock(&trace_types_lock);
6179
6180         return ret;
6181 }
6182
6183 struct trace_option_dentry;
6184
6185 static void
6186 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6187
6188 /*
6189  * Used to clear out the tracer before deletion of an instance.
6190  * Must have trace_types_lock held.
6191  */
6192 static void tracing_set_nop(struct trace_array *tr)
6193 {
6194         if (tr->current_trace == &nop_trace)
6195                 return;
6196         
6197         tr->current_trace->enabled--;
6198
6199         if (tr->current_trace->reset)
6200                 tr->current_trace->reset(tr);
6201
6202         tr->current_trace = &nop_trace;
6203 }
6204
6205 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6206 {
6207         /* Only enable if the directory has been created already. */
6208         if (!tr->dir)
6209                 return;
6210
6211         create_trace_option_files(tr, t);
6212 }
6213
6214 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6215 {
6216         struct tracer *t;
6217 #ifdef CONFIG_TRACER_MAX_TRACE
6218         bool had_max_tr;
6219 #endif
6220         int ret = 0;
6221
6222         mutex_lock(&trace_types_lock);
6223
6224         if (!ring_buffer_expanded) {
6225                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6226                                                 RING_BUFFER_ALL_CPUS);
6227                 if (ret < 0)
6228                         goto out;
6229                 ret = 0;
6230         }
6231
6232         for (t = trace_types; t; t = t->next) {
6233                 if (strcmp(t->name, buf) == 0)
6234                         break;
6235         }
6236         if (!t) {
6237                 ret = -EINVAL;
6238                 goto out;
6239         }
6240         if (t == tr->current_trace)
6241                 goto out;
6242
6243 #ifdef CONFIG_TRACER_SNAPSHOT
6244         if (t->use_max_tr) {
6245                 arch_spin_lock(&tr->max_lock);
6246                 if (tr->cond_snapshot)
6247                         ret = -EBUSY;
6248                 arch_spin_unlock(&tr->max_lock);
6249                 if (ret)
6250                         goto out;
6251         }
6252 #endif
6253         /* Some tracers won't work on kernel command line */
6254         if (system_state < SYSTEM_RUNNING && t->noboot) {
6255                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6256                         t->name);
6257                 goto out;
6258         }
6259
6260         /* Some tracers are only allowed for the top level buffer */
6261         if (!trace_ok_for_array(t, tr)) {
6262                 ret = -EINVAL;
6263                 goto out;
6264         }
6265
6266         /* If trace pipe files are being read, we can't change the tracer */
6267         if (tr->trace_ref) {
6268                 ret = -EBUSY;
6269                 goto out;
6270         }
6271
6272         trace_branch_disable();
6273
6274         tr->current_trace->enabled--;
6275
6276         if (tr->current_trace->reset)
6277                 tr->current_trace->reset(tr);
6278
6279         /* Current trace needs to be nop_trace before synchronize_rcu */
6280         tr->current_trace = &nop_trace;
6281
6282 #ifdef CONFIG_TRACER_MAX_TRACE
6283         had_max_tr = tr->allocated_snapshot;
6284
6285         if (had_max_tr && !t->use_max_tr) {
6286                 /*
6287                  * We need to make sure that the update_max_tr sees that
6288                  * current_trace changed to nop_trace to keep it from
6289                  * swapping the buffers after we resize it.
6290                  * The update_max_tr is called from interrupts disabled
6291                  * so a synchronized_sched() is sufficient.
6292                  */
6293                 synchronize_rcu();
6294                 free_snapshot(tr);
6295         }
6296 #endif
6297
6298 #ifdef CONFIG_TRACER_MAX_TRACE
6299         if (t->use_max_tr && !had_max_tr) {
6300                 ret = tracing_alloc_snapshot_instance(tr);
6301                 if (ret < 0)
6302                         goto out;
6303         }
6304 #endif
6305
6306         if (t->init) {
6307                 ret = tracer_init(t, tr);
6308                 if (ret)
6309                         goto out;
6310         }
6311
6312         tr->current_trace = t;
6313         tr->current_trace->enabled++;
6314         trace_branch_enable(tr);
6315  out:
6316         mutex_unlock(&trace_types_lock);
6317
6318         return ret;
6319 }
6320
6321 static ssize_t
6322 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6323                         size_t cnt, loff_t *ppos)
6324 {
6325         struct trace_array *tr = filp->private_data;
6326         char buf[MAX_TRACER_SIZE+1];
6327         int i;
6328         size_t ret;
6329         int err;
6330
6331         ret = cnt;
6332
6333         if (cnt > MAX_TRACER_SIZE)
6334                 cnt = MAX_TRACER_SIZE;
6335
6336         if (copy_from_user(buf, ubuf, cnt))
6337                 return -EFAULT;
6338
6339         buf[cnt] = 0;
6340
6341         /* strip ending whitespace. */
6342         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6343                 buf[i] = 0;
6344
6345         err = tracing_set_tracer(tr, buf);
6346         if (err)
6347                 return err;
6348
6349         *ppos += ret;
6350
6351         return ret;
6352 }
6353
6354 static ssize_t
6355 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6356                    size_t cnt, loff_t *ppos)
6357 {
6358         char buf[64];
6359         int r;
6360
6361         r = snprintf(buf, sizeof(buf), "%ld\n",
6362                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6363         if (r > sizeof(buf))
6364                 r = sizeof(buf);
6365         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6366 }
6367
6368 static ssize_t
6369 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6370                     size_t cnt, loff_t *ppos)
6371 {
6372         unsigned long val;
6373         int ret;
6374
6375         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6376         if (ret)
6377                 return ret;
6378
6379         *ptr = val * 1000;
6380
6381         return cnt;
6382 }
6383
6384 static ssize_t
6385 tracing_thresh_read(struct file *filp, char __user *ubuf,
6386                     size_t cnt, loff_t *ppos)
6387 {
6388         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6389 }
6390
6391 static ssize_t
6392 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6393                      size_t cnt, loff_t *ppos)
6394 {
6395         struct trace_array *tr = filp->private_data;
6396         int ret;
6397
6398         mutex_lock(&trace_types_lock);
6399         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6400         if (ret < 0)
6401                 goto out;
6402
6403         if (tr->current_trace->update_thresh) {
6404                 ret = tr->current_trace->update_thresh(tr);
6405                 if (ret < 0)
6406                         goto out;
6407         }
6408
6409         ret = cnt;
6410 out:
6411         mutex_unlock(&trace_types_lock);
6412
6413         return ret;
6414 }
6415
6416 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6417
6418 static ssize_t
6419 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6420                      size_t cnt, loff_t *ppos)
6421 {
6422         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6423 }
6424
6425 static ssize_t
6426 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6427                       size_t cnt, loff_t *ppos)
6428 {
6429         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6430 }
6431
6432 #endif
6433
6434 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6435 {
6436         struct trace_array *tr = inode->i_private;
6437         struct trace_iterator *iter;
6438         int ret;
6439
6440         ret = tracing_check_open_get_tr(tr);
6441         if (ret)
6442                 return ret;
6443
6444         mutex_lock(&trace_types_lock);
6445
6446         /* create a buffer to store the information to pass to userspace */
6447         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6448         if (!iter) {
6449                 ret = -ENOMEM;
6450                 __trace_array_put(tr);
6451                 goto out;
6452         }
6453
6454         trace_seq_init(&iter->seq);
6455         iter->trace = tr->current_trace;
6456
6457         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6458                 ret = -ENOMEM;
6459                 goto fail;
6460         }
6461
6462         /* trace pipe does not show start of buffer */
6463         cpumask_setall(iter->started);
6464
6465         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6466                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6467
6468         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6469         if (trace_clocks[tr->clock_id].in_ns)
6470                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6471
6472         iter->tr = tr;
6473         iter->array_buffer = &tr->array_buffer;
6474         iter->cpu_file = tracing_get_cpu(inode);
6475         mutex_init(&iter->mutex);
6476         filp->private_data = iter;
6477
6478         if (iter->trace->pipe_open)
6479                 iter->trace->pipe_open(iter);
6480
6481         nonseekable_open(inode, filp);
6482
6483         tr->trace_ref++;
6484 out:
6485         mutex_unlock(&trace_types_lock);
6486         return ret;
6487
6488 fail:
6489         kfree(iter);
6490         __trace_array_put(tr);
6491         mutex_unlock(&trace_types_lock);
6492         return ret;
6493 }
6494
6495 static int tracing_release_pipe(struct inode *inode, struct file *file)
6496 {
6497         struct trace_iterator *iter = file->private_data;
6498         struct trace_array *tr = inode->i_private;
6499
6500         mutex_lock(&trace_types_lock);
6501
6502         tr->trace_ref--;
6503
6504         if (iter->trace->pipe_close)
6505                 iter->trace->pipe_close(iter);
6506
6507         mutex_unlock(&trace_types_lock);
6508
6509         free_cpumask_var(iter->started);
6510         mutex_destroy(&iter->mutex);
6511         kfree(iter);
6512
6513         trace_array_put(tr);
6514
6515         return 0;
6516 }
6517
6518 static __poll_t
6519 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6520 {
6521         struct trace_array *tr = iter->tr;
6522
6523         /* Iterators are static, they should be filled or empty */
6524         if (trace_buffer_iter(iter, iter->cpu_file))
6525                 return EPOLLIN | EPOLLRDNORM;
6526
6527         if (tr->trace_flags & TRACE_ITER_BLOCK)
6528                 /*
6529                  * Always select as readable when in blocking mode
6530                  */
6531                 return EPOLLIN | EPOLLRDNORM;
6532         else
6533                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6534                                              filp, poll_table);
6535 }
6536
6537 static __poll_t
6538 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6539 {
6540         struct trace_iterator *iter = filp->private_data;
6541
6542         return trace_poll(iter, filp, poll_table);
6543 }
6544
6545 /* Must be called with iter->mutex held. */
6546 static int tracing_wait_pipe(struct file *filp)
6547 {
6548         struct trace_iterator *iter = filp->private_data;
6549         int ret;
6550
6551         while (trace_empty(iter)) {
6552
6553                 if ((filp->f_flags & O_NONBLOCK)) {
6554                         return -EAGAIN;
6555                 }
6556
6557                 /*
6558                  * We block until we read something and tracing is disabled.
6559                  * We still block if tracing is disabled, but we have never
6560                  * read anything. This allows a user to cat this file, and
6561                  * then enable tracing. But after we have read something,
6562                  * we give an EOF when tracing is again disabled.
6563                  *
6564                  * iter->pos will be 0 if we haven't read anything.
6565                  */
6566                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6567                         break;
6568
6569                 mutex_unlock(&iter->mutex);
6570
6571                 ret = wait_on_pipe(iter, 0);
6572
6573                 mutex_lock(&iter->mutex);
6574
6575                 if (ret)
6576                         return ret;
6577         }
6578
6579         return 1;
6580 }
6581
6582 /*
6583  * Consumer reader.
6584  */
6585 static ssize_t
6586 tracing_read_pipe(struct file *filp, char __user *ubuf,
6587                   size_t cnt, loff_t *ppos)
6588 {
6589         struct trace_iterator *iter = filp->private_data;
6590         ssize_t sret;
6591
6592         /*
6593          * Avoid more than one consumer on a single file descriptor
6594          * This is just a matter of traces coherency, the ring buffer itself
6595          * is protected.
6596          */
6597         mutex_lock(&iter->mutex);
6598
6599         /* return any leftover data */
6600         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6601         if (sret != -EBUSY)
6602                 goto out;
6603
6604         trace_seq_init(&iter->seq);
6605
6606         if (iter->trace->read) {
6607                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6608                 if (sret)
6609                         goto out;
6610         }
6611
6612 waitagain:
6613         sret = tracing_wait_pipe(filp);
6614         if (sret <= 0)
6615                 goto out;
6616
6617         /* stop when tracing is finished */
6618         if (trace_empty(iter)) {
6619                 sret = 0;
6620                 goto out;
6621         }
6622
6623         if (cnt >= PAGE_SIZE)
6624                 cnt = PAGE_SIZE - 1;
6625
6626         /* reset all but tr, trace, and overruns */
6627         memset(&iter->seq, 0,
6628                sizeof(struct trace_iterator) -
6629                offsetof(struct trace_iterator, seq));
6630         cpumask_clear(iter->started);
6631         trace_seq_init(&iter->seq);
6632         iter->pos = -1;
6633
6634         trace_event_read_lock();
6635         trace_access_lock(iter->cpu_file);
6636         while (trace_find_next_entry_inc(iter) != NULL) {
6637                 enum print_line_t ret;
6638                 int save_len = iter->seq.seq.len;
6639
6640                 ret = print_trace_line(iter);
6641                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6642                         /* don't print partial lines */
6643                         iter->seq.seq.len = save_len;
6644                         break;
6645                 }
6646                 if (ret != TRACE_TYPE_NO_CONSUME)
6647                         trace_consume(iter);
6648
6649                 if (trace_seq_used(&iter->seq) >= cnt)
6650                         break;
6651
6652                 /*
6653                  * Setting the full flag means we reached the trace_seq buffer
6654                  * size and we should leave by partial output condition above.
6655                  * One of the trace_seq_* functions is not used properly.
6656                  */
6657                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6658                           iter->ent->type);
6659         }
6660         trace_access_unlock(iter->cpu_file);
6661         trace_event_read_unlock();
6662
6663         /* Now copy what we have to the user */
6664         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6665         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6666                 trace_seq_init(&iter->seq);
6667
6668         /*
6669          * If there was nothing to send to user, in spite of consuming trace
6670          * entries, go back to wait for more entries.
6671          */
6672         if (sret == -EBUSY)
6673                 goto waitagain;
6674
6675 out:
6676         mutex_unlock(&iter->mutex);
6677
6678         return sret;
6679 }
6680
6681 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6682                                      unsigned int idx)
6683 {
6684         __free_page(spd->pages[idx]);
6685 }
6686
6687 static size_t
6688 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6689 {
6690         size_t count;
6691         int save_len;
6692         int ret;
6693
6694         /* Seq buffer is page-sized, exactly what we need. */
6695         for (;;) {
6696                 save_len = iter->seq.seq.len;
6697                 ret = print_trace_line(iter);
6698
6699                 if (trace_seq_has_overflowed(&iter->seq)) {
6700                         iter->seq.seq.len = save_len;
6701                         break;
6702                 }
6703
6704                 /*
6705                  * This should not be hit, because it should only
6706                  * be set if the iter->seq overflowed. But check it
6707                  * anyway to be safe.
6708                  */
6709                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6710                         iter->seq.seq.len = save_len;
6711                         break;
6712                 }
6713
6714                 count = trace_seq_used(&iter->seq) - save_len;
6715                 if (rem < count) {
6716                         rem = 0;
6717                         iter->seq.seq.len = save_len;
6718                         break;
6719                 }
6720
6721                 if (ret != TRACE_TYPE_NO_CONSUME)
6722                         trace_consume(iter);
6723                 rem -= count;
6724                 if (!trace_find_next_entry_inc(iter))   {
6725                         rem = 0;
6726                         iter->ent = NULL;
6727                         break;
6728                 }
6729         }
6730
6731         return rem;
6732 }
6733
6734 static ssize_t tracing_splice_read_pipe(struct file *filp,
6735                                         loff_t *ppos,
6736                                         struct pipe_inode_info *pipe,
6737                                         size_t len,
6738                                         unsigned int flags)
6739 {
6740         struct page *pages_def[PIPE_DEF_BUFFERS];
6741         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6742         struct trace_iterator *iter = filp->private_data;
6743         struct splice_pipe_desc spd = {
6744                 .pages          = pages_def,
6745                 .partial        = partial_def,
6746                 .nr_pages       = 0, /* This gets updated below. */
6747                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6748                 .ops            = &default_pipe_buf_ops,
6749                 .spd_release    = tracing_spd_release_pipe,
6750         };
6751         ssize_t ret;
6752         size_t rem;
6753         unsigned int i;
6754
6755         if (splice_grow_spd(pipe, &spd))
6756                 return -ENOMEM;
6757
6758         mutex_lock(&iter->mutex);
6759
6760         if (iter->trace->splice_read) {
6761                 ret = iter->trace->splice_read(iter, filp,
6762                                                ppos, pipe, len, flags);
6763                 if (ret)
6764                         goto out_err;
6765         }
6766
6767         ret = tracing_wait_pipe(filp);
6768         if (ret <= 0)
6769                 goto out_err;
6770
6771         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6772                 ret = -EFAULT;
6773                 goto out_err;
6774         }
6775
6776         trace_event_read_lock();
6777         trace_access_lock(iter->cpu_file);
6778
6779         /* Fill as many pages as possible. */
6780         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6781                 spd.pages[i] = alloc_page(GFP_KERNEL);
6782                 if (!spd.pages[i])
6783                         break;
6784
6785                 rem = tracing_fill_pipe_page(rem, iter);
6786
6787                 /* Copy the data into the page, so we can start over. */
6788                 ret = trace_seq_to_buffer(&iter->seq,
6789                                           page_address(spd.pages[i]),
6790                                           trace_seq_used(&iter->seq));
6791                 if (ret < 0) {
6792                         __free_page(spd.pages[i]);
6793                         break;
6794                 }
6795                 spd.partial[i].offset = 0;
6796                 spd.partial[i].len = trace_seq_used(&iter->seq);
6797
6798                 trace_seq_init(&iter->seq);
6799         }
6800
6801         trace_access_unlock(iter->cpu_file);
6802         trace_event_read_unlock();
6803         mutex_unlock(&iter->mutex);
6804
6805         spd.nr_pages = i;
6806
6807         if (i)
6808                 ret = splice_to_pipe(pipe, &spd);
6809         else
6810                 ret = 0;
6811 out:
6812         splice_shrink_spd(&spd);
6813         return ret;
6814
6815 out_err:
6816         mutex_unlock(&iter->mutex);
6817         goto out;
6818 }
6819
6820 static ssize_t
6821 tracing_entries_read(struct file *filp, char __user *ubuf,
6822                      size_t cnt, loff_t *ppos)
6823 {
6824         struct inode *inode = file_inode(filp);
6825         struct trace_array *tr = inode->i_private;
6826         int cpu = tracing_get_cpu(inode);
6827         char buf[64];
6828         int r = 0;
6829         ssize_t ret;
6830
6831         mutex_lock(&trace_types_lock);
6832
6833         if (cpu == RING_BUFFER_ALL_CPUS) {
6834                 int cpu, buf_size_same;
6835                 unsigned long size;
6836
6837                 size = 0;
6838                 buf_size_same = 1;
6839                 /* check if all cpu sizes are same */
6840                 for_each_tracing_cpu(cpu) {
6841                         /* fill in the size from first enabled cpu */
6842                         if (size == 0)
6843                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6844                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6845                                 buf_size_same = 0;
6846                                 break;
6847                         }
6848                 }
6849
6850                 if (buf_size_same) {
6851                         if (!ring_buffer_expanded)
6852                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6853                                             size >> 10,
6854                                             trace_buf_size >> 10);
6855                         else
6856                                 r = sprintf(buf, "%lu\n", size >> 10);
6857                 } else
6858                         r = sprintf(buf, "X\n");
6859         } else
6860                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6861
6862         mutex_unlock(&trace_types_lock);
6863
6864         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6865         return ret;
6866 }
6867
6868 static ssize_t
6869 tracing_entries_write(struct file *filp, const char __user *ubuf,
6870                       size_t cnt, loff_t *ppos)
6871 {
6872         struct inode *inode = file_inode(filp);
6873         struct trace_array *tr = inode->i_private;
6874         unsigned long val;
6875         int ret;
6876
6877         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6878         if (ret)
6879                 return ret;
6880
6881         /* must have at least 1 entry */
6882         if (!val)
6883                 return -EINVAL;
6884
6885         /* value is in KB */
6886         val <<= 10;
6887         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6888         if (ret < 0)
6889                 return ret;
6890
6891         *ppos += cnt;
6892
6893         return cnt;
6894 }
6895
6896 static ssize_t
6897 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6898                                 size_t cnt, loff_t *ppos)
6899 {
6900         struct trace_array *tr = filp->private_data;
6901         char buf[64];
6902         int r, cpu;
6903         unsigned long size = 0, expanded_size = 0;
6904
6905         mutex_lock(&trace_types_lock);
6906         for_each_tracing_cpu(cpu) {
6907                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6908                 if (!ring_buffer_expanded)
6909                         expanded_size += trace_buf_size >> 10;
6910         }
6911         if (ring_buffer_expanded)
6912                 r = sprintf(buf, "%lu\n", size);
6913         else
6914                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6915         mutex_unlock(&trace_types_lock);
6916
6917         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6918 }
6919
6920 static ssize_t
6921 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6922                           size_t cnt, loff_t *ppos)
6923 {
6924         /*
6925          * There is no need to read what the user has written, this function
6926          * is just to make sure that there is no error when "echo" is used
6927          */
6928
6929         *ppos += cnt;
6930
6931         return cnt;
6932 }
6933
6934 static int
6935 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6936 {
6937         struct trace_array *tr = inode->i_private;
6938
6939         /* disable tracing ? */
6940         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6941                 tracer_tracing_off(tr);
6942         /* resize the ring buffer to 0 */
6943         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6944
6945         trace_array_put(tr);
6946
6947         return 0;
6948 }
6949
6950 static ssize_t
6951 tracing_mark_write(struct file *filp, const char __user *ubuf,
6952                                         size_t cnt, loff_t *fpos)
6953 {
6954         struct trace_array *tr = filp->private_data;
6955         struct ring_buffer_event *event;
6956         enum event_trigger_type tt = ETT_NONE;
6957         struct trace_buffer *buffer;
6958         struct print_entry *entry;
6959         ssize_t written;
6960         int size;
6961         int len;
6962
6963 /* Used in tracing_mark_raw_write() as well */
6964 #define FAULTED_STR "<faulted>"
6965 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6966
6967         if (tracing_disabled)
6968                 return -EINVAL;
6969
6970         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6971                 return -EINVAL;
6972
6973         if (cnt > TRACE_BUF_SIZE)
6974                 cnt = TRACE_BUF_SIZE;
6975
6976         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6977
6978         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6979
6980         /* If less than "<faulted>", then make sure we can still add that */
6981         if (cnt < FAULTED_SIZE)
6982                 size += FAULTED_SIZE - cnt;
6983
6984         buffer = tr->array_buffer.buffer;
6985         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6986                                             tracing_gen_ctx());
6987         if (unlikely(!event))
6988                 /* Ring buffer disabled, return as if not open for write */
6989                 return -EBADF;
6990
6991         entry = ring_buffer_event_data(event);
6992         entry->ip = _THIS_IP_;
6993
6994         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6995         if (len) {
6996                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6997                 cnt = FAULTED_SIZE;
6998                 written = -EFAULT;
6999         } else
7000                 written = cnt;
7001
7002         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7003                 /* do not add \n before testing triggers, but add \0 */
7004                 entry->buf[cnt] = '\0';
7005                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7006         }
7007
7008         if (entry->buf[cnt - 1] != '\n') {
7009                 entry->buf[cnt] = '\n';
7010                 entry->buf[cnt + 1] = '\0';
7011         } else
7012                 entry->buf[cnt] = '\0';
7013
7014         if (static_branch_unlikely(&trace_marker_exports_enabled))
7015                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7016         __buffer_unlock_commit(buffer, event);
7017
7018         if (tt)
7019                 event_triggers_post_call(tr->trace_marker_file, tt);
7020
7021         if (written > 0)
7022                 *fpos += written;
7023
7024         return written;
7025 }
7026
7027 /* Limit it for now to 3K (including tag) */
7028 #define RAW_DATA_MAX_SIZE (1024*3)
7029
7030 static ssize_t
7031 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7032                                         size_t cnt, loff_t *fpos)
7033 {
7034         struct trace_array *tr = filp->private_data;
7035         struct ring_buffer_event *event;
7036         struct trace_buffer *buffer;
7037         struct raw_data_entry *entry;
7038         ssize_t written;
7039         int size;
7040         int len;
7041
7042 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7043
7044         if (tracing_disabled)
7045                 return -EINVAL;
7046
7047         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7048                 return -EINVAL;
7049
7050         /* The marker must at least have a tag id */
7051         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7052                 return -EINVAL;
7053
7054         if (cnt > TRACE_BUF_SIZE)
7055                 cnt = TRACE_BUF_SIZE;
7056
7057         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7058
7059         size = sizeof(*entry) + cnt;
7060         if (cnt < FAULT_SIZE_ID)
7061                 size += FAULT_SIZE_ID - cnt;
7062
7063         buffer = tr->array_buffer.buffer;
7064         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7065                                             tracing_gen_ctx());
7066         if (!event)
7067                 /* Ring buffer disabled, return as if not open for write */
7068                 return -EBADF;
7069
7070         entry = ring_buffer_event_data(event);
7071
7072         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7073         if (len) {
7074                 entry->id = -1;
7075                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7076                 written = -EFAULT;
7077         } else
7078                 written = cnt;
7079
7080         __buffer_unlock_commit(buffer, event);
7081
7082         if (written > 0)
7083                 *fpos += written;
7084
7085         return written;
7086 }
7087
7088 static int tracing_clock_show(struct seq_file *m, void *v)
7089 {
7090         struct trace_array *tr = m->private;
7091         int i;
7092
7093         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7094                 seq_printf(m,
7095                         "%s%s%s%s", i ? " " : "",
7096                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7097                         i == tr->clock_id ? "]" : "");
7098         seq_putc(m, '\n');
7099
7100         return 0;
7101 }
7102
7103 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7104 {
7105         int i;
7106
7107         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7108                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7109                         break;
7110         }
7111         if (i == ARRAY_SIZE(trace_clocks))
7112                 return -EINVAL;
7113
7114         mutex_lock(&trace_types_lock);
7115
7116         tr->clock_id = i;
7117
7118         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7119
7120         /*
7121          * New clock may not be consistent with the previous clock.
7122          * Reset the buffer so that it doesn't have incomparable timestamps.
7123          */
7124         tracing_reset_online_cpus(&tr->array_buffer);
7125
7126 #ifdef CONFIG_TRACER_MAX_TRACE
7127         if (tr->max_buffer.buffer)
7128                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7129         tracing_reset_online_cpus(&tr->max_buffer);
7130 #endif
7131
7132         mutex_unlock(&trace_types_lock);
7133
7134         return 0;
7135 }
7136
7137 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7138                                    size_t cnt, loff_t *fpos)
7139 {
7140         struct seq_file *m = filp->private_data;
7141         struct trace_array *tr = m->private;
7142         char buf[64];
7143         const char *clockstr;
7144         int ret;
7145
7146         if (cnt >= sizeof(buf))
7147                 return -EINVAL;
7148
7149         if (copy_from_user(buf, ubuf, cnt))
7150                 return -EFAULT;
7151
7152         buf[cnt] = 0;
7153
7154         clockstr = strstrip(buf);
7155
7156         ret = tracing_set_clock(tr, clockstr);
7157         if (ret)
7158                 return ret;
7159
7160         *fpos += cnt;
7161
7162         return cnt;
7163 }
7164
7165 static int tracing_clock_open(struct inode *inode, struct file *file)
7166 {
7167         struct trace_array *tr = inode->i_private;
7168         int ret;
7169
7170         ret = tracing_check_open_get_tr(tr);
7171         if (ret)
7172                 return ret;
7173
7174         ret = single_open(file, tracing_clock_show, inode->i_private);
7175         if (ret < 0)
7176                 trace_array_put(tr);
7177
7178         return ret;
7179 }
7180
7181 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7182 {
7183         struct trace_array *tr = m->private;
7184
7185         mutex_lock(&trace_types_lock);
7186
7187         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7188                 seq_puts(m, "delta [absolute]\n");
7189         else
7190                 seq_puts(m, "[delta] absolute\n");
7191
7192         mutex_unlock(&trace_types_lock);
7193
7194         return 0;
7195 }
7196
7197 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7198 {
7199         struct trace_array *tr = inode->i_private;
7200         int ret;
7201
7202         ret = tracing_check_open_get_tr(tr);
7203         if (ret)
7204                 return ret;
7205
7206         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7207         if (ret < 0)
7208                 trace_array_put(tr);
7209
7210         return ret;
7211 }
7212
7213 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7214 {
7215         if (rbe == this_cpu_read(trace_buffered_event))
7216                 return ring_buffer_time_stamp(buffer);
7217
7218         return ring_buffer_event_time_stamp(buffer, rbe);
7219 }
7220
7221 /*
7222  * Set or disable using the per CPU trace_buffer_event when possible.
7223  */
7224 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7225 {
7226         int ret = 0;
7227
7228         mutex_lock(&trace_types_lock);
7229
7230         if (set && tr->no_filter_buffering_ref++)
7231                 goto out;
7232
7233         if (!set) {
7234                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7235                         ret = -EINVAL;
7236                         goto out;
7237                 }
7238
7239                 --tr->no_filter_buffering_ref;
7240         }
7241  out:
7242         mutex_unlock(&trace_types_lock);
7243
7244         return ret;
7245 }
7246
7247 struct ftrace_buffer_info {
7248         struct trace_iterator   iter;
7249         void                    *spare;
7250         unsigned int            spare_cpu;
7251         unsigned int            read;
7252 };
7253
7254 #ifdef CONFIG_TRACER_SNAPSHOT
7255 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7256 {
7257         struct trace_array *tr = inode->i_private;
7258         struct trace_iterator *iter;
7259         struct seq_file *m;
7260         int ret;
7261
7262         ret = tracing_check_open_get_tr(tr);
7263         if (ret)
7264                 return ret;
7265
7266         if (file->f_mode & FMODE_READ) {
7267                 iter = __tracing_open(inode, file, true);
7268                 if (IS_ERR(iter))
7269                         ret = PTR_ERR(iter);
7270         } else {
7271                 /* Writes still need the seq_file to hold the private data */
7272                 ret = -ENOMEM;
7273                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7274                 if (!m)
7275                         goto out;
7276                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7277                 if (!iter) {
7278                         kfree(m);
7279                         goto out;
7280                 }
7281                 ret = 0;
7282
7283                 iter->tr = tr;
7284                 iter->array_buffer = &tr->max_buffer;
7285                 iter->cpu_file = tracing_get_cpu(inode);
7286                 m->private = iter;
7287                 file->private_data = m;
7288         }
7289 out:
7290         if (ret < 0)
7291                 trace_array_put(tr);
7292
7293         return ret;
7294 }
7295
7296 static ssize_t
7297 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7298                        loff_t *ppos)
7299 {
7300         struct seq_file *m = filp->private_data;
7301         struct trace_iterator *iter = m->private;
7302         struct trace_array *tr = iter->tr;
7303         unsigned long val;
7304         int ret;
7305
7306         ret = tracing_update_buffers();
7307         if (ret < 0)
7308                 return ret;
7309
7310         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7311         if (ret)
7312                 return ret;
7313
7314         mutex_lock(&trace_types_lock);
7315
7316         if (tr->current_trace->use_max_tr) {
7317                 ret = -EBUSY;
7318                 goto out;
7319         }
7320
7321         arch_spin_lock(&tr->max_lock);
7322         if (tr->cond_snapshot)
7323                 ret = -EBUSY;
7324         arch_spin_unlock(&tr->max_lock);
7325         if (ret)
7326                 goto out;
7327
7328         switch (val) {
7329         case 0:
7330                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7331                         ret = -EINVAL;
7332                         break;
7333                 }
7334                 if (tr->allocated_snapshot)
7335                         free_snapshot(tr);
7336                 break;
7337         case 1:
7338 /* Only allow per-cpu swap if the ring buffer supports it */
7339 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7340                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7341                         ret = -EINVAL;
7342                         break;
7343                 }
7344 #endif
7345                 if (tr->allocated_snapshot)
7346                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7347                                         &tr->array_buffer, iter->cpu_file);
7348                 else
7349                         ret = tracing_alloc_snapshot_instance(tr);
7350                 if (ret < 0)
7351                         break;
7352                 local_irq_disable();
7353                 /* Now, we're going to swap */
7354                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7355                         update_max_tr(tr, current, smp_processor_id(), NULL);
7356                 else
7357                         update_max_tr_single(tr, current, iter->cpu_file);
7358                 local_irq_enable();
7359                 break;
7360         default:
7361                 if (tr->allocated_snapshot) {
7362                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7363                                 tracing_reset_online_cpus(&tr->max_buffer);
7364                         else
7365                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7366                 }
7367                 break;
7368         }
7369
7370         if (ret >= 0) {
7371                 *ppos += cnt;
7372                 ret = cnt;
7373         }
7374 out:
7375         mutex_unlock(&trace_types_lock);
7376         return ret;
7377 }
7378
7379 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7380 {
7381         struct seq_file *m = file->private_data;
7382         int ret;
7383
7384         ret = tracing_release(inode, file);
7385
7386         if (file->f_mode & FMODE_READ)
7387                 return ret;
7388
7389         /* If write only, the seq_file is just a stub */
7390         if (m)
7391                 kfree(m->private);
7392         kfree(m);
7393
7394         return 0;
7395 }
7396
7397 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7398 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7399                                     size_t count, loff_t *ppos);
7400 static int tracing_buffers_release(struct inode *inode, struct file *file);
7401 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7402                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7403
7404 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7405 {
7406         struct ftrace_buffer_info *info;
7407         int ret;
7408
7409         /* The following checks for tracefs lockdown */
7410         ret = tracing_buffers_open(inode, filp);
7411         if (ret < 0)
7412                 return ret;
7413
7414         info = filp->private_data;
7415
7416         if (info->iter.trace->use_max_tr) {
7417                 tracing_buffers_release(inode, filp);
7418                 return -EBUSY;
7419         }
7420
7421         info->iter.snapshot = true;
7422         info->iter.array_buffer = &info->iter.tr->max_buffer;
7423
7424         return ret;
7425 }
7426
7427 #endif /* CONFIG_TRACER_SNAPSHOT */
7428
7429
7430 static const struct file_operations tracing_thresh_fops = {
7431         .open           = tracing_open_generic,
7432         .read           = tracing_thresh_read,
7433         .write          = tracing_thresh_write,
7434         .llseek         = generic_file_llseek,
7435 };
7436
7437 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7438 static const struct file_operations tracing_max_lat_fops = {
7439         .open           = tracing_open_generic,
7440         .read           = tracing_max_lat_read,
7441         .write          = tracing_max_lat_write,
7442         .llseek         = generic_file_llseek,
7443 };
7444 #endif
7445
7446 static const struct file_operations set_tracer_fops = {
7447         .open           = tracing_open_generic,
7448         .read           = tracing_set_trace_read,
7449         .write          = tracing_set_trace_write,
7450         .llseek         = generic_file_llseek,
7451 };
7452
7453 static const struct file_operations tracing_pipe_fops = {
7454         .open           = tracing_open_pipe,
7455         .poll           = tracing_poll_pipe,
7456         .read           = tracing_read_pipe,
7457         .splice_read    = tracing_splice_read_pipe,
7458         .release        = tracing_release_pipe,
7459         .llseek         = no_llseek,
7460 };
7461
7462 static const struct file_operations tracing_entries_fops = {
7463         .open           = tracing_open_generic_tr,
7464         .read           = tracing_entries_read,
7465         .write          = tracing_entries_write,
7466         .llseek         = generic_file_llseek,
7467         .release        = tracing_release_generic_tr,
7468 };
7469
7470 static const struct file_operations tracing_total_entries_fops = {
7471         .open           = tracing_open_generic_tr,
7472         .read           = tracing_total_entries_read,
7473         .llseek         = generic_file_llseek,
7474         .release        = tracing_release_generic_tr,
7475 };
7476
7477 static const struct file_operations tracing_free_buffer_fops = {
7478         .open           = tracing_open_generic_tr,
7479         .write          = tracing_free_buffer_write,
7480         .release        = tracing_free_buffer_release,
7481 };
7482
7483 static const struct file_operations tracing_mark_fops = {
7484         .open           = tracing_open_generic_tr,
7485         .write          = tracing_mark_write,
7486         .llseek         = generic_file_llseek,
7487         .release        = tracing_release_generic_tr,
7488 };
7489
7490 static const struct file_operations tracing_mark_raw_fops = {
7491         .open           = tracing_open_generic_tr,
7492         .write          = tracing_mark_raw_write,
7493         .llseek         = generic_file_llseek,
7494         .release        = tracing_release_generic_tr,
7495 };
7496
7497 static const struct file_operations trace_clock_fops = {
7498         .open           = tracing_clock_open,
7499         .read           = seq_read,
7500         .llseek         = seq_lseek,
7501         .release        = tracing_single_release_tr,
7502         .write          = tracing_clock_write,
7503 };
7504
7505 static const struct file_operations trace_time_stamp_mode_fops = {
7506         .open           = tracing_time_stamp_mode_open,
7507         .read           = seq_read,
7508         .llseek         = seq_lseek,
7509         .release        = tracing_single_release_tr,
7510 };
7511
7512 #ifdef CONFIG_TRACER_SNAPSHOT
7513 static const struct file_operations snapshot_fops = {
7514         .open           = tracing_snapshot_open,
7515         .read           = seq_read,
7516         .write          = tracing_snapshot_write,
7517         .llseek         = tracing_lseek,
7518         .release        = tracing_snapshot_release,
7519 };
7520
7521 static const struct file_operations snapshot_raw_fops = {
7522         .open           = snapshot_raw_open,
7523         .read           = tracing_buffers_read,
7524         .release        = tracing_buffers_release,
7525         .splice_read    = tracing_buffers_splice_read,
7526         .llseek         = no_llseek,
7527 };
7528
7529 #endif /* CONFIG_TRACER_SNAPSHOT */
7530
7531 #define TRACING_LOG_ERRS_MAX    8
7532 #define TRACING_LOG_LOC_MAX     128
7533
7534 #define CMD_PREFIX "  Command: "
7535
7536 struct err_info {
7537         const char      **errs; /* ptr to loc-specific array of err strings */
7538         u8              type;   /* index into errs -> specific err string */
7539         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7540         u64             ts;
7541 };
7542
7543 struct tracing_log_err {
7544         struct list_head        list;
7545         struct err_info         info;
7546         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7547         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7548 };
7549
7550 static DEFINE_MUTEX(tracing_err_log_lock);
7551
7552 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7553 {
7554         struct tracing_log_err *err;
7555
7556         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7557                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7558                 if (!err)
7559                         err = ERR_PTR(-ENOMEM);
7560                 tr->n_err_log_entries++;
7561
7562                 return err;
7563         }
7564
7565         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7566         list_del(&err->list);
7567
7568         return err;
7569 }
7570
7571 /**
7572  * err_pos - find the position of a string within a command for error careting
7573  * @cmd: The tracing command that caused the error
7574  * @str: The string to position the caret at within @cmd
7575  *
7576  * Finds the position of the first occurrence of @str within @cmd.  The
7577  * return value can be passed to tracing_log_err() for caret placement
7578  * within @cmd.
7579  *
7580  * Returns the index within @cmd of the first occurrence of @str or 0
7581  * if @str was not found.
7582  */
7583 unsigned int err_pos(char *cmd, const char *str)
7584 {
7585         char *found;
7586
7587         if (WARN_ON(!strlen(cmd)))
7588                 return 0;
7589
7590         found = strstr(cmd, str);
7591         if (found)
7592                 return found - cmd;
7593
7594         return 0;
7595 }
7596
7597 /**
7598  * tracing_log_err - write an error to the tracing error log
7599  * @tr: The associated trace array for the error (NULL for top level array)
7600  * @loc: A string describing where the error occurred
7601  * @cmd: The tracing command that caused the error
7602  * @errs: The array of loc-specific static error strings
7603  * @type: The index into errs[], which produces the specific static err string
7604  * @pos: The position the caret should be placed in the cmd
7605  *
7606  * Writes an error into tracing/error_log of the form:
7607  *
7608  * <loc>: error: <text>
7609  *   Command: <cmd>
7610  *              ^
7611  *
7612  * tracing/error_log is a small log file containing the last
7613  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7614  * unless there has been a tracing error, and the error log can be
7615  * cleared and have its memory freed by writing the empty string in
7616  * truncation mode to it i.e. echo > tracing/error_log.
7617  *
7618  * NOTE: the @errs array along with the @type param are used to
7619  * produce a static error string - this string is not copied and saved
7620  * when the error is logged - only a pointer to it is saved.  See
7621  * existing callers for examples of how static strings are typically
7622  * defined for use with tracing_log_err().
7623  */
7624 void tracing_log_err(struct trace_array *tr,
7625                      const char *loc, const char *cmd,
7626                      const char **errs, u8 type, u8 pos)
7627 {
7628         struct tracing_log_err *err;
7629
7630         if (!tr)
7631                 tr = &global_trace;
7632
7633         mutex_lock(&tracing_err_log_lock);
7634         err = get_tracing_log_err(tr);
7635         if (PTR_ERR(err) == -ENOMEM) {
7636                 mutex_unlock(&tracing_err_log_lock);
7637                 return;
7638         }
7639
7640         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7641         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7642
7643         err->info.errs = errs;
7644         err->info.type = type;
7645         err->info.pos = pos;
7646         err->info.ts = local_clock();
7647
7648         list_add_tail(&err->list, &tr->err_log);
7649         mutex_unlock(&tracing_err_log_lock);
7650 }
7651
7652 static void clear_tracing_err_log(struct trace_array *tr)
7653 {
7654         struct tracing_log_err *err, *next;
7655
7656         mutex_lock(&tracing_err_log_lock);
7657         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7658                 list_del(&err->list);
7659                 kfree(err);
7660         }
7661
7662         tr->n_err_log_entries = 0;
7663         mutex_unlock(&tracing_err_log_lock);
7664 }
7665
7666 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7667 {
7668         struct trace_array *tr = m->private;
7669
7670         mutex_lock(&tracing_err_log_lock);
7671
7672         return seq_list_start(&tr->err_log, *pos);
7673 }
7674
7675 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7676 {
7677         struct trace_array *tr = m->private;
7678
7679         return seq_list_next(v, &tr->err_log, pos);
7680 }
7681
7682 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7683 {
7684         mutex_unlock(&tracing_err_log_lock);
7685 }
7686
7687 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7688 {
7689         u8 i;
7690
7691         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7692                 seq_putc(m, ' ');
7693         for (i = 0; i < pos; i++)
7694                 seq_putc(m, ' ');
7695         seq_puts(m, "^\n");
7696 }
7697
7698 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7699 {
7700         struct tracing_log_err *err = v;
7701
7702         if (err) {
7703                 const char *err_text = err->info.errs[err->info.type];
7704                 u64 sec = err->info.ts;
7705                 u32 nsec;
7706
7707                 nsec = do_div(sec, NSEC_PER_SEC);
7708                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7709                            err->loc, err_text);
7710                 seq_printf(m, "%s", err->cmd);
7711                 tracing_err_log_show_pos(m, err->info.pos);
7712         }
7713
7714         return 0;
7715 }
7716
7717 static const struct seq_operations tracing_err_log_seq_ops = {
7718         .start  = tracing_err_log_seq_start,
7719         .next   = tracing_err_log_seq_next,
7720         .stop   = tracing_err_log_seq_stop,
7721         .show   = tracing_err_log_seq_show
7722 };
7723
7724 static int tracing_err_log_open(struct inode *inode, struct file *file)
7725 {
7726         struct trace_array *tr = inode->i_private;
7727         int ret = 0;
7728
7729         ret = tracing_check_open_get_tr(tr);
7730         if (ret)
7731                 return ret;
7732
7733         /* If this file was opened for write, then erase contents */
7734         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7735                 clear_tracing_err_log(tr);
7736
7737         if (file->f_mode & FMODE_READ) {
7738                 ret = seq_open(file, &tracing_err_log_seq_ops);
7739                 if (!ret) {
7740                         struct seq_file *m = file->private_data;
7741                         m->private = tr;
7742                 } else {
7743                         trace_array_put(tr);
7744                 }
7745         }
7746         return ret;
7747 }
7748
7749 static ssize_t tracing_err_log_write(struct file *file,
7750                                      const char __user *buffer,
7751                                      size_t count, loff_t *ppos)
7752 {
7753         return count;
7754 }
7755
7756 static int tracing_err_log_release(struct inode *inode, struct file *file)
7757 {
7758         struct trace_array *tr = inode->i_private;
7759
7760         trace_array_put(tr);
7761
7762         if (file->f_mode & FMODE_READ)
7763                 seq_release(inode, file);
7764
7765         return 0;
7766 }
7767
7768 static const struct file_operations tracing_err_log_fops = {
7769         .open           = tracing_err_log_open,
7770         .write          = tracing_err_log_write,
7771         .read           = seq_read,
7772         .llseek         = seq_lseek,
7773         .release        = tracing_err_log_release,
7774 };
7775
7776 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7777 {
7778         struct trace_array *tr = inode->i_private;
7779         struct ftrace_buffer_info *info;
7780         int ret;
7781
7782         ret = tracing_check_open_get_tr(tr);
7783         if (ret)
7784                 return ret;
7785
7786         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7787         if (!info) {
7788                 trace_array_put(tr);
7789                 return -ENOMEM;
7790         }
7791
7792         mutex_lock(&trace_types_lock);
7793
7794         info->iter.tr           = tr;
7795         info->iter.cpu_file     = tracing_get_cpu(inode);
7796         info->iter.trace        = tr->current_trace;
7797         info->iter.array_buffer = &tr->array_buffer;
7798         info->spare             = NULL;
7799         /* Force reading ring buffer for first read */
7800         info->read              = (unsigned int)-1;
7801
7802         filp->private_data = info;
7803
7804         tr->trace_ref++;
7805
7806         mutex_unlock(&trace_types_lock);
7807
7808         ret = nonseekable_open(inode, filp);
7809         if (ret < 0)
7810                 trace_array_put(tr);
7811
7812         return ret;
7813 }
7814
7815 static __poll_t
7816 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7817 {
7818         struct ftrace_buffer_info *info = filp->private_data;
7819         struct trace_iterator *iter = &info->iter;
7820
7821         return trace_poll(iter, filp, poll_table);
7822 }
7823
7824 static ssize_t
7825 tracing_buffers_read(struct file *filp, char __user *ubuf,
7826                      size_t count, loff_t *ppos)
7827 {
7828         struct ftrace_buffer_info *info = filp->private_data;
7829         struct trace_iterator *iter = &info->iter;
7830         ssize_t ret = 0;
7831         ssize_t size;
7832
7833         if (!count)
7834                 return 0;
7835
7836 #ifdef CONFIG_TRACER_MAX_TRACE
7837         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7838                 return -EBUSY;
7839 #endif
7840
7841         if (!info->spare) {
7842                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7843                                                           iter->cpu_file);
7844                 if (IS_ERR(info->spare)) {
7845                         ret = PTR_ERR(info->spare);
7846                         info->spare = NULL;
7847                 } else {
7848                         info->spare_cpu = iter->cpu_file;
7849                 }
7850         }
7851         if (!info->spare)
7852                 return ret;
7853
7854         /* Do we have previous read data to read? */
7855         if (info->read < PAGE_SIZE)
7856                 goto read;
7857
7858  again:
7859         trace_access_lock(iter->cpu_file);
7860         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7861                                     &info->spare,
7862                                     count,
7863                                     iter->cpu_file, 0);
7864         trace_access_unlock(iter->cpu_file);
7865
7866         if (ret < 0) {
7867                 if (trace_empty(iter)) {
7868                         if ((filp->f_flags & O_NONBLOCK))
7869                                 return -EAGAIN;
7870
7871                         ret = wait_on_pipe(iter, 0);
7872                         if (ret)
7873                                 return ret;
7874
7875                         goto again;
7876                 }
7877                 return 0;
7878         }
7879
7880         info->read = 0;
7881  read:
7882         size = PAGE_SIZE - info->read;
7883         if (size > count)
7884                 size = count;
7885
7886         ret = copy_to_user(ubuf, info->spare + info->read, size);
7887         if (ret == size)
7888                 return -EFAULT;
7889
7890         size -= ret;
7891
7892         *ppos += size;
7893         info->read += size;
7894
7895         return size;
7896 }
7897
7898 static int tracing_buffers_release(struct inode *inode, struct file *file)
7899 {
7900         struct ftrace_buffer_info *info = file->private_data;
7901         struct trace_iterator *iter = &info->iter;
7902
7903         mutex_lock(&trace_types_lock);
7904
7905         iter->tr->trace_ref--;
7906
7907         __trace_array_put(iter->tr);
7908
7909         if (info->spare)
7910                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7911                                            info->spare_cpu, info->spare);
7912         kvfree(info);
7913
7914         mutex_unlock(&trace_types_lock);
7915
7916         return 0;
7917 }
7918
7919 struct buffer_ref {
7920         struct trace_buffer     *buffer;
7921         void                    *page;
7922         int                     cpu;
7923         refcount_t              refcount;
7924 };
7925
7926 static void buffer_ref_release(struct buffer_ref *ref)
7927 {
7928         if (!refcount_dec_and_test(&ref->refcount))
7929                 return;
7930         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7931         kfree(ref);
7932 }
7933
7934 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7935                                     struct pipe_buffer *buf)
7936 {
7937         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7938
7939         buffer_ref_release(ref);
7940         buf->private = 0;
7941 }
7942
7943 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7944                                 struct pipe_buffer *buf)
7945 {
7946         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7947
7948         if (refcount_read(&ref->refcount) > INT_MAX/2)
7949                 return false;
7950
7951         refcount_inc(&ref->refcount);
7952         return true;
7953 }
7954
7955 /* Pipe buffer operations for a buffer. */
7956 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7957         .release                = buffer_pipe_buf_release,
7958         .get                    = buffer_pipe_buf_get,
7959 };
7960
7961 /*
7962  * Callback from splice_to_pipe(), if we need to release some pages
7963  * at the end of the spd in case we error'ed out in filling the pipe.
7964  */
7965 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7966 {
7967         struct buffer_ref *ref =
7968                 (struct buffer_ref *)spd->partial[i].private;
7969
7970         buffer_ref_release(ref);
7971         spd->partial[i].private = 0;
7972 }
7973
7974 static ssize_t
7975 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7976                             struct pipe_inode_info *pipe, size_t len,
7977                             unsigned int flags)
7978 {
7979         struct ftrace_buffer_info *info = file->private_data;
7980         struct trace_iterator *iter = &info->iter;
7981         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7982         struct page *pages_def[PIPE_DEF_BUFFERS];
7983         struct splice_pipe_desc spd = {
7984                 .pages          = pages_def,
7985                 .partial        = partial_def,
7986                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7987                 .ops            = &buffer_pipe_buf_ops,
7988                 .spd_release    = buffer_spd_release,
7989         };
7990         struct buffer_ref *ref;
7991         int entries, i;
7992         ssize_t ret = 0;
7993
7994 #ifdef CONFIG_TRACER_MAX_TRACE
7995         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7996                 return -EBUSY;
7997 #endif
7998
7999         if (*ppos & (PAGE_SIZE - 1))
8000                 return -EINVAL;
8001
8002         if (len & (PAGE_SIZE - 1)) {
8003                 if (len < PAGE_SIZE)
8004                         return -EINVAL;
8005                 len &= PAGE_MASK;
8006         }
8007
8008         if (splice_grow_spd(pipe, &spd))
8009                 return -ENOMEM;
8010
8011  again:
8012         trace_access_lock(iter->cpu_file);
8013         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8014
8015         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8016                 struct page *page;
8017                 int r;
8018
8019                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8020                 if (!ref) {
8021                         ret = -ENOMEM;
8022                         break;
8023                 }
8024
8025                 refcount_set(&ref->refcount, 1);
8026                 ref->buffer = iter->array_buffer->buffer;
8027                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8028                 if (IS_ERR(ref->page)) {
8029                         ret = PTR_ERR(ref->page);
8030                         ref->page = NULL;
8031                         kfree(ref);
8032                         break;
8033                 }
8034                 ref->cpu = iter->cpu_file;
8035
8036                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8037                                           len, iter->cpu_file, 1);
8038                 if (r < 0) {
8039                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8040                                                    ref->page);
8041                         kfree(ref);
8042                         break;
8043                 }
8044
8045                 page = virt_to_page(ref->page);
8046
8047                 spd.pages[i] = page;
8048                 spd.partial[i].len = PAGE_SIZE;
8049                 spd.partial[i].offset = 0;
8050                 spd.partial[i].private = (unsigned long)ref;
8051                 spd.nr_pages++;
8052                 *ppos += PAGE_SIZE;
8053
8054                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8055         }
8056
8057         trace_access_unlock(iter->cpu_file);
8058         spd.nr_pages = i;
8059
8060         /* did we read anything? */
8061         if (!spd.nr_pages) {
8062                 if (ret)
8063                         goto out;
8064
8065                 ret = -EAGAIN;
8066                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8067                         goto out;
8068
8069                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8070                 if (ret)
8071                         goto out;
8072
8073                 goto again;
8074         }
8075
8076         ret = splice_to_pipe(pipe, &spd);
8077 out:
8078         splice_shrink_spd(&spd);
8079
8080         return ret;
8081 }
8082
8083 static const struct file_operations tracing_buffers_fops = {
8084         .open           = tracing_buffers_open,
8085         .read           = tracing_buffers_read,
8086         .poll           = tracing_buffers_poll,
8087         .release        = tracing_buffers_release,
8088         .splice_read    = tracing_buffers_splice_read,
8089         .llseek         = no_llseek,
8090 };
8091
8092 static ssize_t
8093 tracing_stats_read(struct file *filp, char __user *ubuf,
8094                    size_t count, loff_t *ppos)
8095 {
8096         struct inode *inode = file_inode(filp);
8097         struct trace_array *tr = inode->i_private;
8098         struct array_buffer *trace_buf = &tr->array_buffer;
8099         int cpu = tracing_get_cpu(inode);
8100         struct trace_seq *s;
8101         unsigned long cnt;
8102         unsigned long long t;
8103         unsigned long usec_rem;
8104
8105         s = kmalloc(sizeof(*s), GFP_KERNEL);
8106         if (!s)
8107                 return -ENOMEM;
8108
8109         trace_seq_init(s);
8110
8111         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8112         trace_seq_printf(s, "entries: %ld\n", cnt);
8113
8114         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8115         trace_seq_printf(s, "overrun: %ld\n", cnt);
8116
8117         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8118         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8119
8120         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8121         trace_seq_printf(s, "bytes: %ld\n", cnt);
8122
8123         if (trace_clocks[tr->clock_id].in_ns) {
8124                 /* local or global for trace_clock */
8125                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8126                 usec_rem = do_div(t, USEC_PER_SEC);
8127                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8128                                                                 t, usec_rem);
8129
8130                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8131                 usec_rem = do_div(t, USEC_PER_SEC);
8132                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8133         } else {
8134                 /* counter or tsc mode for trace_clock */
8135                 trace_seq_printf(s, "oldest event ts: %llu\n",
8136                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8137
8138                 trace_seq_printf(s, "now ts: %llu\n",
8139                                 ring_buffer_time_stamp(trace_buf->buffer));
8140         }
8141
8142         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8143         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8144
8145         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8146         trace_seq_printf(s, "read events: %ld\n", cnt);
8147
8148         count = simple_read_from_buffer(ubuf, count, ppos,
8149                                         s->buffer, trace_seq_used(s));
8150
8151         kfree(s);
8152
8153         return count;
8154 }
8155
8156 static const struct file_operations tracing_stats_fops = {
8157         .open           = tracing_open_generic_tr,
8158         .read           = tracing_stats_read,
8159         .llseek         = generic_file_llseek,
8160         .release        = tracing_release_generic_tr,
8161 };
8162
8163 #ifdef CONFIG_DYNAMIC_FTRACE
8164
8165 static ssize_t
8166 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8167                   size_t cnt, loff_t *ppos)
8168 {
8169         ssize_t ret;
8170         char *buf;
8171         int r;
8172
8173         /* 256 should be plenty to hold the amount needed */
8174         buf = kmalloc(256, GFP_KERNEL);
8175         if (!buf)
8176                 return -ENOMEM;
8177
8178         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8179                       ftrace_update_tot_cnt,
8180                       ftrace_number_of_pages,
8181                       ftrace_number_of_groups);
8182
8183         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8184         kfree(buf);
8185         return ret;
8186 }
8187
8188 static const struct file_operations tracing_dyn_info_fops = {
8189         .open           = tracing_open_generic,
8190         .read           = tracing_read_dyn_info,
8191         .llseek         = generic_file_llseek,
8192 };
8193 #endif /* CONFIG_DYNAMIC_FTRACE */
8194
8195 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8196 static void
8197 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8198                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8199                 void *data)
8200 {
8201         tracing_snapshot_instance(tr);
8202 }
8203
8204 static void
8205 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8206                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8207                       void *data)
8208 {
8209         struct ftrace_func_mapper *mapper = data;
8210         long *count = NULL;
8211
8212         if (mapper)
8213                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8214
8215         if (count) {
8216
8217                 if (*count <= 0)
8218                         return;
8219
8220                 (*count)--;
8221         }
8222
8223         tracing_snapshot_instance(tr);
8224 }
8225
8226 static int
8227 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8228                       struct ftrace_probe_ops *ops, void *data)
8229 {
8230         struct ftrace_func_mapper *mapper = data;
8231         long *count = NULL;
8232
8233         seq_printf(m, "%ps:", (void *)ip);
8234
8235         seq_puts(m, "snapshot");
8236
8237         if (mapper)
8238                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8239
8240         if (count)
8241                 seq_printf(m, ":count=%ld\n", *count);
8242         else
8243                 seq_puts(m, ":unlimited\n");
8244
8245         return 0;
8246 }
8247
8248 static int
8249 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8250                      unsigned long ip, void *init_data, void **data)
8251 {
8252         struct ftrace_func_mapper *mapper = *data;
8253
8254         if (!mapper) {
8255                 mapper = allocate_ftrace_func_mapper();
8256                 if (!mapper)
8257                         return -ENOMEM;
8258                 *data = mapper;
8259         }
8260
8261         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8262 }
8263
8264 static void
8265 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8266                      unsigned long ip, void *data)
8267 {
8268         struct ftrace_func_mapper *mapper = data;
8269
8270         if (!ip) {
8271                 if (!mapper)
8272                         return;
8273                 free_ftrace_func_mapper(mapper, NULL);
8274                 return;
8275         }
8276
8277         ftrace_func_mapper_remove_ip(mapper, ip);
8278 }
8279
8280 static struct ftrace_probe_ops snapshot_probe_ops = {
8281         .func                   = ftrace_snapshot,
8282         .print                  = ftrace_snapshot_print,
8283 };
8284
8285 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8286         .func                   = ftrace_count_snapshot,
8287         .print                  = ftrace_snapshot_print,
8288         .init                   = ftrace_snapshot_init,
8289         .free                   = ftrace_snapshot_free,
8290 };
8291
8292 static int
8293 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8294                                char *glob, char *cmd, char *param, int enable)
8295 {
8296         struct ftrace_probe_ops *ops;
8297         void *count = (void *)-1;
8298         char *number;
8299         int ret;
8300
8301         if (!tr)
8302                 return -ENODEV;
8303
8304         /* hash funcs only work with set_ftrace_filter */
8305         if (!enable)
8306                 return -EINVAL;
8307
8308         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8309
8310         if (glob[0] == '!')
8311                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8312
8313         if (!param)
8314                 goto out_reg;
8315
8316         number = strsep(&param, ":");
8317
8318         if (!strlen(number))
8319                 goto out_reg;
8320
8321         /*
8322          * We use the callback data field (which is a pointer)
8323          * as our counter.
8324          */
8325         ret = kstrtoul(number, 0, (unsigned long *)&count);
8326         if (ret)
8327                 return ret;
8328
8329  out_reg:
8330         ret = tracing_alloc_snapshot_instance(tr);
8331         if (ret < 0)
8332                 goto out;
8333
8334         ret = register_ftrace_function_probe(glob, tr, ops, count);
8335
8336  out:
8337         return ret < 0 ? ret : 0;
8338 }
8339
8340 static struct ftrace_func_command ftrace_snapshot_cmd = {
8341         .name                   = "snapshot",
8342         .func                   = ftrace_trace_snapshot_callback,
8343 };
8344
8345 static __init int register_snapshot_cmd(void)
8346 {
8347         return register_ftrace_command(&ftrace_snapshot_cmd);
8348 }
8349 #else
8350 static inline __init int register_snapshot_cmd(void) { return 0; }
8351 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8352
8353 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8354 {
8355         if (WARN_ON(!tr->dir))
8356                 return ERR_PTR(-ENODEV);
8357
8358         /* Top directory uses NULL as the parent */
8359         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8360                 return NULL;
8361
8362         /* All sub buffers have a descriptor */
8363         return tr->dir;
8364 }
8365
8366 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8367 {
8368         struct dentry *d_tracer;
8369
8370         if (tr->percpu_dir)
8371                 return tr->percpu_dir;
8372
8373         d_tracer = tracing_get_dentry(tr);
8374         if (IS_ERR(d_tracer))
8375                 return NULL;
8376
8377         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8378
8379         MEM_FAIL(!tr->percpu_dir,
8380                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8381
8382         return tr->percpu_dir;
8383 }
8384
8385 static struct dentry *
8386 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8387                       void *data, long cpu, const struct file_operations *fops)
8388 {
8389         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8390
8391         if (ret) /* See tracing_get_cpu() */
8392                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8393         return ret;
8394 }
8395
8396 static void
8397 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8398 {
8399         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8400         struct dentry *d_cpu;
8401         char cpu_dir[30]; /* 30 characters should be more than enough */
8402
8403         if (!d_percpu)
8404                 return;
8405
8406         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8407         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8408         if (!d_cpu) {
8409                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8410                 return;
8411         }
8412
8413         /* per cpu trace_pipe */
8414         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8415                                 tr, cpu, &tracing_pipe_fops);
8416
8417         /* per cpu trace */
8418         trace_create_cpu_file("trace", 0644, d_cpu,
8419                                 tr, cpu, &tracing_fops);
8420
8421         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8422                                 tr, cpu, &tracing_buffers_fops);
8423
8424         trace_create_cpu_file("stats", 0444, d_cpu,
8425                                 tr, cpu, &tracing_stats_fops);
8426
8427         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8428                                 tr, cpu, &tracing_entries_fops);
8429
8430 #ifdef CONFIG_TRACER_SNAPSHOT
8431         trace_create_cpu_file("snapshot", 0644, d_cpu,
8432                                 tr, cpu, &snapshot_fops);
8433
8434         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8435                                 tr, cpu, &snapshot_raw_fops);
8436 #endif
8437 }
8438
8439 #ifdef CONFIG_FTRACE_SELFTEST
8440 /* Let selftest have access to static functions in this file */
8441 #include "trace_selftest.c"
8442 #endif
8443
8444 static ssize_t
8445 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8446                         loff_t *ppos)
8447 {
8448         struct trace_option_dentry *topt = filp->private_data;
8449         char *buf;
8450
8451         if (topt->flags->val & topt->opt->bit)
8452                 buf = "1\n";
8453         else
8454                 buf = "0\n";
8455
8456         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8457 }
8458
8459 static ssize_t
8460 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8461                          loff_t *ppos)
8462 {
8463         struct trace_option_dentry *topt = filp->private_data;
8464         unsigned long val;
8465         int ret;
8466
8467         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8468         if (ret)
8469                 return ret;
8470
8471         if (val != 0 && val != 1)
8472                 return -EINVAL;
8473
8474         if (!!(topt->flags->val & topt->opt->bit) != val) {
8475                 mutex_lock(&trace_types_lock);
8476                 ret = __set_tracer_option(topt->tr, topt->flags,
8477                                           topt->opt, !val);
8478                 mutex_unlock(&trace_types_lock);
8479                 if (ret)
8480                         return ret;
8481         }
8482
8483         *ppos += cnt;
8484
8485         return cnt;
8486 }
8487
8488
8489 static const struct file_operations trace_options_fops = {
8490         .open = tracing_open_generic,
8491         .read = trace_options_read,
8492         .write = trace_options_write,
8493         .llseek = generic_file_llseek,
8494 };
8495
8496 /*
8497  * In order to pass in both the trace_array descriptor as well as the index
8498  * to the flag that the trace option file represents, the trace_array
8499  * has a character array of trace_flags_index[], which holds the index
8500  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8501  * The address of this character array is passed to the flag option file
8502  * read/write callbacks.
8503  *
8504  * In order to extract both the index and the trace_array descriptor,
8505  * get_tr_index() uses the following algorithm.
8506  *
8507  *   idx = *ptr;
8508  *
8509  * As the pointer itself contains the address of the index (remember
8510  * index[1] == 1).
8511  *
8512  * Then to get the trace_array descriptor, by subtracting that index
8513  * from the ptr, we get to the start of the index itself.
8514  *
8515  *   ptr - idx == &index[0]
8516  *
8517  * Then a simple container_of() from that pointer gets us to the
8518  * trace_array descriptor.
8519  */
8520 static void get_tr_index(void *data, struct trace_array **ptr,
8521                          unsigned int *pindex)
8522 {
8523         *pindex = *(unsigned char *)data;
8524
8525         *ptr = container_of(data - *pindex, struct trace_array,
8526                             trace_flags_index);
8527 }
8528
8529 static ssize_t
8530 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8531                         loff_t *ppos)
8532 {
8533         void *tr_index = filp->private_data;
8534         struct trace_array *tr;
8535         unsigned int index;
8536         char *buf;
8537
8538         get_tr_index(tr_index, &tr, &index);
8539
8540         if (tr->trace_flags & (1 << index))
8541                 buf = "1\n";
8542         else
8543                 buf = "0\n";
8544
8545         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8546 }
8547
8548 static ssize_t
8549 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8550                          loff_t *ppos)
8551 {
8552         void *tr_index = filp->private_data;
8553         struct trace_array *tr;
8554         unsigned int index;
8555         unsigned long val;
8556         int ret;
8557
8558         get_tr_index(tr_index, &tr, &index);
8559
8560         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8561         if (ret)
8562                 return ret;
8563
8564         if (val != 0 && val != 1)
8565                 return -EINVAL;
8566
8567         mutex_lock(&event_mutex);
8568         mutex_lock(&trace_types_lock);
8569         ret = set_tracer_flag(tr, 1 << index, val);
8570         mutex_unlock(&trace_types_lock);
8571         mutex_unlock(&event_mutex);
8572
8573         if (ret < 0)
8574                 return ret;
8575
8576         *ppos += cnt;
8577
8578         return cnt;
8579 }
8580
8581 static const struct file_operations trace_options_core_fops = {
8582         .open = tracing_open_generic,
8583         .read = trace_options_core_read,
8584         .write = trace_options_core_write,
8585         .llseek = generic_file_llseek,
8586 };
8587
8588 struct dentry *trace_create_file(const char *name,
8589                                  umode_t mode,
8590                                  struct dentry *parent,
8591                                  void *data,
8592                                  const struct file_operations *fops)
8593 {
8594         struct dentry *ret;
8595
8596         ret = tracefs_create_file(name, mode, parent, data, fops);
8597         if (!ret)
8598                 pr_warn("Could not create tracefs '%s' entry\n", name);
8599
8600         return ret;
8601 }
8602
8603
8604 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8605 {
8606         struct dentry *d_tracer;
8607
8608         if (tr->options)
8609                 return tr->options;
8610
8611         d_tracer = tracing_get_dentry(tr);
8612         if (IS_ERR(d_tracer))
8613                 return NULL;
8614
8615         tr->options = tracefs_create_dir("options", d_tracer);
8616         if (!tr->options) {
8617                 pr_warn("Could not create tracefs directory 'options'\n");
8618                 return NULL;
8619         }
8620
8621         return tr->options;
8622 }
8623
8624 static void
8625 create_trace_option_file(struct trace_array *tr,
8626                          struct trace_option_dentry *topt,
8627                          struct tracer_flags *flags,
8628                          struct tracer_opt *opt)
8629 {
8630         struct dentry *t_options;
8631
8632         t_options = trace_options_init_dentry(tr);
8633         if (!t_options)
8634                 return;
8635
8636         topt->flags = flags;
8637         topt->opt = opt;
8638         topt->tr = tr;
8639
8640         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8641                                     &trace_options_fops);
8642
8643 }
8644
8645 static void
8646 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8647 {
8648         struct trace_option_dentry *topts;
8649         struct trace_options *tr_topts;
8650         struct tracer_flags *flags;
8651         struct tracer_opt *opts;
8652         int cnt;
8653         int i;
8654
8655         if (!tracer)
8656                 return;
8657
8658         flags = tracer->flags;
8659
8660         if (!flags || !flags->opts)
8661                 return;
8662
8663         /*
8664          * If this is an instance, only create flags for tracers
8665          * the instance may have.
8666          */
8667         if (!trace_ok_for_array(tracer, tr))
8668                 return;
8669
8670         for (i = 0; i < tr->nr_topts; i++) {
8671                 /* Make sure there's no duplicate flags. */
8672                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8673                         return;
8674         }
8675
8676         opts = flags->opts;
8677
8678         for (cnt = 0; opts[cnt].name; cnt++)
8679                 ;
8680
8681         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8682         if (!topts)
8683                 return;
8684
8685         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8686                             GFP_KERNEL);
8687         if (!tr_topts) {
8688                 kfree(topts);
8689                 return;
8690         }
8691
8692         tr->topts = tr_topts;
8693         tr->topts[tr->nr_topts].tracer = tracer;
8694         tr->topts[tr->nr_topts].topts = topts;
8695         tr->nr_topts++;
8696
8697         for (cnt = 0; opts[cnt].name; cnt++) {
8698                 create_trace_option_file(tr, &topts[cnt], flags,
8699                                          &opts[cnt]);
8700                 MEM_FAIL(topts[cnt].entry == NULL,
8701                           "Failed to create trace option: %s",
8702                           opts[cnt].name);
8703         }
8704 }
8705
8706 static struct dentry *
8707 create_trace_option_core_file(struct trace_array *tr,
8708                               const char *option, long index)
8709 {
8710         struct dentry *t_options;
8711
8712         t_options = trace_options_init_dentry(tr);
8713         if (!t_options)
8714                 return NULL;
8715
8716         return trace_create_file(option, 0644, t_options,
8717                                  (void *)&tr->trace_flags_index[index],
8718                                  &trace_options_core_fops);
8719 }
8720
8721 static void create_trace_options_dir(struct trace_array *tr)
8722 {
8723         struct dentry *t_options;
8724         bool top_level = tr == &global_trace;
8725         int i;
8726
8727         t_options = trace_options_init_dentry(tr);
8728         if (!t_options)
8729                 return;
8730
8731         for (i = 0; trace_options[i]; i++) {
8732                 if (top_level ||
8733                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8734                         create_trace_option_core_file(tr, trace_options[i], i);
8735         }
8736 }
8737
8738 static ssize_t
8739 rb_simple_read(struct file *filp, char __user *ubuf,
8740                size_t cnt, loff_t *ppos)
8741 {
8742         struct trace_array *tr = filp->private_data;
8743         char buf[64];
8744         int r;
8745
8746         r = tracer_tracing_is_on(tr);
8747         r = sprintf(buf, "%d\n", r);
8748
8749         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8750 }
8751
8752 static ssize_t
8753 rb_simple_write(struct file *filp, const char __user *ubuf,
8754                 size_t cnt, loff_t *ppos)
8755 {
8756         struct trace_array *tr = filp->private_data;
8757         struct trace_buffer *buffer = tr->array_buffer.buffer;
8758         unsigned long val;
8759         int ret;
8760
8761         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8762         if (ret)
8763                 return ret;
8764
8765         if (buffer) {
8766                 mutex_lock(&trace_types_lock);
8767                 if (!!val == tracer_tracing_is_on(tr)) {
8768                         val = 0; /* do nothing */
8769                 } else if (val) {
8770                         tracer_tracing_on(tr);
8771                         if (tr->current_trace->start)
8772                                 tr->current_trace->start(tr);
8773                 } else {
8774                         tracer_tracing_off(tr);
8775                         if (tr->current_trace->stop)
8776                                 tr->current_trace->stop(tr);
8777                 }
8778                 mutex_unlock(&trace_types_lock);
8779         }
8780
8781         (*ppos)++;
8782
8783         return cnt;
8784 }
8785
8786 static const struct file_operations rb_simple_fops = {
8787         .open           = tracing_open_generic_tr,
8788         .read           = rb_simple_read,
8789         .write          = rb_simple_write,
8790         .release        = tracing_release_generic_tr,
8791         .llseek         = default_llseek,
8792 };
8793
8794 static ssize_t
8795 buffer_percent_read(struct file *filp, char __user *ubuf,
8796                     size_t cnt, loff_t *ppos)
8797 {
8798         struct trace_array *tr = filp->private_data;
8799         char buf[64];
8800         int r;
8801
8802         r = tr->buffer_percent;
8803         r = sprintf(buf, "%d\n", r);
8804
8805         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8806 }
8807
8808 static ssize_t
8809 buffer_percent_write(struct file *filp, const char __user *ubuf,
8810                      size_t cnt, loff_t *ppos)
8811 {
8812         struct trace_array *tr = filp->private_data;
8813         unsigned long val;
8814         int ret;
8815
8816         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8817         if (ret)
8818                 return ret;
8819
8820         if (val > 100)
8821                 return -EINVAL;
8822
8823         if (!val)
8824                 val = 1;
8825
8826         tr->buffer_percent = val;
8827
8828         (*ppos)++;
8829
8830         return cnt;
8831 }
8832
8833 static const struct file_operations buffer_percent_fops = {
8834         .open           = tracing_open_generic_tr,
8835         .read           = buffer_percent_read,
8836         .write          = buffer_percent_write,
8837         .release        = tracing_release_generic_tr,
8838         .llseek         = default_llseek,
8839 };
8840
8841 static struct dentry *trace_instance_dir;
8842
8843 static void
8844 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8845
8846 static int
8847 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8848 {
8849         enum ring_buffer_flags rb_flags;
8850
8851         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8852
8853         buf->tr = tr;
8854
8855         buf->buffer = ring_buffer_alloc(size, rb_flags);
8856         if (!buf->buffer)
8857                 return -ENOMEM;
8858
8859         buf->data = alloc_percpu(struct trace_array_cpu);
8860         if (!buf->data) {
8861                 ring_buffer_free(buf->buffer);
8862                 buf->buffer = NULL;
8863                 return -ENOMEM;
8864         }
8865
8866         /* Allocate the first page for all buffers */
8867         set_buffer_entries(&tr->array_buffer,
8868                            ring_buffer_size(tr->array_buffer.buffer, 0));
8869
8870         return 0;
8871 }
8872
8873 static int allocate_trace_buffers(struct trace_array *tr, int size)
8874 {
8875         int ret;
8876
8877         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8878         if (ret)
8879                 return ret;
8880
8881 #ifdef CONFIG_TRACER_MAX_TRACE
8882         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8883                                     allocate_snapshot ? size : 1);
8884         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8885                 ring_buffer_free(tr->array_buffer.buffer);
8886                 tr->array_buffer.buffer = NULL;
8887                 free_percpu(tr->array_buffer.data);
8888                 tr->array_buffer.data = NULL;
8889                 return -ENOMEM;
8890         }
8891         tr->allocated_snapshot = allocate_snapshot;
8892
8893         /*
8894          * Only the top level trace array gets its snapshot allocated
8895          * from the kernel command line.
8896          */
8897         allocate_snapshot = false;
8898 #endif
8899
8900         return 0;
8901 }
8902
8903 static void free_trace_buffer(struct array_buffer *buf)
8904 {
8905         if (buf->buffer) {
8906                 ring_buffer_free(buf->buffer);
8907                 buf->buffer = NULL;
8908                 free_percpu(buf->data);
8909                 buf->data = NULL;
8910         }
8911 }
8912
8913 static void free_trace_buffers(struct trace_array *tr)
8914 {
8915         if (!tr)
8916                 return;
8917
8918         free_trace_buffer(&tr->array_buffer);
8919
8920 #ifdef CONFIG_TRACER_MAX_TRACE
8921         free_trace_buffer(&tr->max_buffer);
8922 #endif
8923 }
8924
8925 static void init_trace_flags_index(struct trace_array *tr)
8926 {
8927         int i;
8928
8929         /* Used by the trace options files */
8930         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8931                 tr->trace_flags_index[i] = i;
8932 }
8933
8934 static void __update_tracer_options(struct trace_array *tr)
8935 {
8936         struct tracer *t;
8937
8938         for (t = trace_types; t; t = t->next)
8939                 add_tracer_options(tr, t);
8940 }
8941
8942 static void update_tracer_options(struct trace_array *tr)
8943 {
8944         mutex_lock(&trace_types_lock);
8945         __update_tracer_options(tr);
8946         mutex_unlock(&trace_types_lock);
8947 }
8948
8949 /* Must have trace_types_lock held */
8950 struct trace_array *trace_array_find(const char *instance)
8951 {
8952         struct trace_array *tr, *found = NULL;
8953
8954         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8955                 if (tr->name && strcmp(tr->name, instance) == 0) {
8956                         found = tr;
8957                         break;
8958                 }
8959         }
8960
8961         return found;
8962 }
8963
8964 struct trace_array *trace_array_find_get(const char *instance)
8965 {
8966         struct trace_array *tr;
8967
8968         mutex_lock(&trace_types_lock);
8969         tr = trace_array_find(instance);
8970         if (tr)
8971                 tr->ref++;
8972         mutex_unlock(&trace_types_lock);
8973
8974         return tr;
8975 }
8976
8977 static int trace_array_create_dir(struct trace_array *tr)
8978 {
8979         int ret;
8980
8981         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8982         if (!tr->dir)
8983                 return -EINVAL;
8984
8985         ret = event_trace_add_tracer(tr->dir, tr);
8986         if (ret)
8987                 tracefs_remove(tr->dir);
8988
8989         init_tracer_tracefs(tr, tr->dir);
8990         __update_tracer_options(tr);
8991
8992         return ret;
8993 }
8994
8995 static struct trace_array *trace_array_create(const char *name)
8996 {
8997         struct trace_array *tr;
8998         int ret;
8999
9000         ret = -ENOMEM;
9001         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9002         if (!tr)
9003                 return ERR_PTR(ret);
9004
9005         tr->name = kstrdup(name, GFP_KERNEL);
9006         if (!tr->name)
9007                 goto out_free_tr;
9008
9009         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9010                 goto out_free_tr;
9011
9012         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9013
9014         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9015
9016         raw_spin_lock_init(&tr->start_lock);
9017
9018         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9019
9020         tr->current_trace = &nop_trace;
9021
9022         INIT_LIST_HEAD(&tr->systems);
9023         INIT_LIST_HEAD(&tr->events);
9024         INIT_LIST_HEAD(&tr->hist_vars);
9025         INIT_LIST_HEAD(&tr->err_log);
9026
9027         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9028                 goto out_free_tr;
9029
9030         if (ftrace_allocate_ftrace_ops(tr) < 0)
9031                 goto out_free_tr;
9032
9033         ftrace_init_trace_array(tr);
9034
9035         init_trace_flags_index(tr);
9036
9037         if (trace_instance_dir) {
9038                 ret = trace_array_create_dir(tr);
9039                 if (ret)
9040                         goto out_free_tr;
9041         } else
9042                 __trace_early_add_events(tr);
9043
9044         list_add(&tr->list, &ftrace_trace_arrays);
9045
9046         tr->ref++;
9047
9048         return tr;
9049
9050  out_free_tr:
9051         ftrace_free_ftrace_ops(tr);
9052         free_trace_buffers(tr);
9053         free_cpumask_var(tr->tracing_cpumask);
9054         kfree(tr->name);
9055         kfree(tr);
9056
9057         return ERR_PTR(ret);
9058 }
9059
9060 static int instance_mkdir(const char *name)
9061 {
9062         struct trace_array *tr;
9063         int ret;
9064
9065         mutex_lock(&event_mutex);
9066         mutex_lock(&trace_types_lock);
9067
9068         ret = -EEXIST;
9069         if (trace_array_find(name))
9070                 goto out_unlock;
9071
9072         tr = trace_array_create(name);
9073
9074         ret = PTR_ERR_OR_ZERO(tr);
9075
9076 out_unlock:
9077         mutex_unlock(&trace_types_lock);
9078         mutex_unlock(&event_mutex);
9079         return ret;
9080 }
9081
9082 /**
9083  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9084  * @name: The name of the trace array to be looked up/created.
9085  *
9086  * Returns pointer to trace array with given name.
9087  * NULL, if it cannot be created.
9088  *
9089  * NOTE: This function increments the reference counter associated with the
9090  * trace array returned. This makes sure it cannot be freed while in use.
9091  * Use trace_array_put() once the trace array is no longer needed.
9092  * If the trace_array is to be freed, trace_array_destroy() needs to
9093  * be called after the trace_array_put(), or simply let user space delete
9094  * it from the tracefs instances directory. But until the
9095  * trace_array_put() is called, user space can not delete it.
9096  *
9097  */
9098 struct trace_array *trace_array_get_by_name(const char *name)
9099 {
9100         struct trace_array *tr;
9101
9102         mutex_lock(&event_mutex);
9103         mutex_lock(&trace_types_lock);
9104
9105         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9106                 if (tr->name && strcmp(tr->name, name) == 0)
9107                         goto out_unlock;
9108         }
9109
9110         tr = trace_array_create(name);
9111
9112         if (IS_ERR(tr))
9113                 tr = NULL;
9114 out_unlock:
9115         if (tr)
9116                 tr->ref++;
9117
9118         mutex_unlock(&trace_types_lock);
9119         mutex_unlock(&event_mutex);
9120         return tr;
9121 }
9122 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9123
9124 static int __remove_instance(struct trace_array *tr)
9125 {
9126         int i;
9127
9128         /* Reference counter for a newly created trace array = 1. */
9129         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9130                 return -EBUSY;
9131
9132         list_del(&tr->list);
9133
9134         /* Disable all the flags that were enabled coming in */
9135         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9136                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9137                         set_tracer_flag(tr, 1 << i, 0);
9138         }
9139
9140         tracing_set_nop(tr);
9141         clear_ftrace_function_probes(tr);
9142         event_trace_del_tracer(tr);
9143         ftrace_clear_pids(tr);
9144         ftrace_destroy_function_files(tr);
9145         tracefs_remove(tr->dir);
9146         free_percpu(tr->last_func_repeats);
9147         free_trace_buffers(tr);
9148
9149         for (i = 0; i < tr->nr_topts; i++) {
9150                 kfree(tr->topts[i].topts);
9151         }
9152         kfree(tr->topts);
9153
9154         free_cpumask_var(tr->tracing_cpumask);
9155         kfree(tr->name);
9156         kfree(tr);
9157
9158         return 0;
9159 }
9160
9161 int trace_array_destroy(struct trace_array *this_tr)
9162 {
9163         struct trace_array *tr;
9164         int ret;
9165
9166         if (!this_tr)
9167                 return -EINVAL;
9168
9169         mutex_lock(&event_mutex);
9170         mutex_lock(&trace_types_lock);
9171
9172         ret = -ENODEV;
9173
9174         /* Making sure trace array exists before destroying it. */
9175         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9176                 if (tr == this_tr) {
9177                         ret = __remove_instance(tr);
9178                         break;
9179                 }
9180         }
9181
9182         mutex_unlock(&trace_types_lock);
9183         mutex_unlock(&event_mutex);
9184
9185         return ret;
9186 }
9187 EXPORT_SYMBOL_GPL(trace_array_destroy);
9188
9189 static int instance_rmdir(const char *name)
9190 {
9191         struct trace_array *tr;
9192         int ret;
9193
9194         mutex_lock(&event_mutex);
9195         mutex_lock(&trace_types_lock);
9196
9197         ret = -ENODEV;
9198         tr = trace_array_find(name);
9199         if (tr)
9200                 ret = __remove_instance(tr);
9201
9202         mutex_unlock(&trace_types_lock);
9203         mutex_unlock(&event_mutex);
9204
9205         return ret;
9206 }
9207
9208 static __init void create_trace_instances(struct dentry *d_tracer)
9209 {
9210         struct trace_array *tr;
9211
9212         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9213                                                          instance_mkdir,
9214                                                          instance_rmdir);
9215         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9216                 return;
9217
9218         mutex_lock(&event_mutex);
9219         mutex_lock(&trace_types_lock);
9220
9221         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9222                 if (!tr->name)
9223                         continue;
9224                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9225                              "Failed to create instance directory\n"))
9226                         break;
9227         }
9228
9229         mutex_unlock(&trace_types_lock);
9230         mutex_unlock(&event_mutex);
9231 }
9232
9233 static void
9234 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9235 {
9236         struct trace_event_file *file;
9237         int cpu;
9238
9239         trace_create_file("available_tracers", 0444, d_tracer,
9240                         tr, &show_traces_fops);
9241
9242         trace_create_file("current_tracer", 0644, d_tracer,
9243                         tr, &set_tracer_fops);
9244
9245         trace_create_file("tracing_cpumask", 0644, d_tracer,
9246                           tr, &tracing_cpumask_fops);
9247
9248         trace_create_file("trace_options", 0644, d_tracer,
9249                           tr, &tracing_iter_fops);
9250
9251         trace_create_file("trace", 0644, d_tracer,
9252                           tr, &tracing_fops);
9253
9254         trace_create_file("trace_pipe", 0444, d_tracer,
9255                           tr, &tracing_pipe_fops);
9256
9257         trace_create_file("buffer_size_kb", 0644, d_tracer,
9258                           tr, &tracing_entries_fops);
9259
9260         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9261                           tr, &tracing_total_entries_fops);
9262
9263         trace_create_file("free_buffer", 0200, d_tracer,
9264                           tr, &tracing_free_buffer_fops);
9265
9266         trace_create_file("trace_marker", 0220, d_tracer,
9267                           tr, &tracing_mark_fops);
9268
9269         file = __find_event_file(tr, "ftrace", "print");
9270         if (file && file->dir)
9271                 trace_create_file("trigger", 0644, file->dir, file,
9272                                   &event_trigger_fops);
9273         tr->trace_marker_file = file;
9274
9275         trace_create_file("trace_marker_raw", 0220, d_tracer,
9276                           tr, &tracing_mark_raw_fops);
9277
9278         trace_create_file("trace_clock", 0644, d_tracer, tr,
9279                           &trace_clock_fops);
9280
9281         trace_create_file("tracing_on", 0644, d_tracer,
9282                           tr, &rb_simple_fops);
9283
9284         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9285                           &trace_time_stamp_mode_fops);
9286
9287         tr->buffer_percent = 50;
9288
9289         trace_create_file("buffer_percent", 0444, d_tracer,
9290                         tr, &buffer_percent_fops);
9291
9292         create_trace_options_dir(tr);
9293
9294 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9295         trace_create_maxlat_file(tr, d_tracer);
9296 #endif
9297
9298         if (ftrace_create_function_files(tr, d_tracer))
9299                 MEM_FAIL(1, "Could not allocate function filter files");
9300
9301 #ifdef CONFIG_TRACER_SNAPSHOT
9302         trace_create_file("snapshot", 0644, d_tracer,
9303                           tr, &snapshot_fops);
9304 #endif
9305
9306         trace_create_file("error_log", 0644, d_tracer,
9307                           tr, &tracing_err_log_fops);
9308
9309         for_each_tracing_cpu(cpu)
9310                 tracing_init_tracefs_percpu(tr, cpu);
9311
9312         ftrace_init_tracefs(tr, d_tracer);
9313 }
9314
9315 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9316 {
9317         struct vfsmount *mnt;
9318         struct file_system_type *type;
9319
9320         /*
9321          * To maintain backward compatibility for tools that mount
9322          * debugfs to get to the tracing facility, tracefs is automatically
9323          * mounted to the debugfs/tracing directory.
9324          */
9325         type = get_fs_type("tracefs");
9326         if (!type)
9327                 return NULL;
9328         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9329         put_filesystem(type);
9330         if (IS_ERR(mnt))
9331                 return NULL;
9332         mntget(mnt);
9333
9334         return mnt;
9335 }
9336
9337 /**
9338  * tracing_init_dentry - initialize top level trace array
9339  *
9340  * This is called when creating files or directories in the tracing
9341  * directory. It is called via fs_initcall() by any of the boot up code
9342  * and expects to return the dentry of the top level tracing directory.
9343  */
9344 int tracing_init_dentry(void)
9345 {
9346         struct trace_array *tr = &global_trace;
9347
9348         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9349                 pr_warn("Tracing disabled due to lockdown\n");
9350                 return -EPERM;
9351         }
9352
9353         /* The top level trace array uses  NULL as parent */
9354         if (tr->dir)
9355                 return 0;
9356
9357         if (WARN_ON(!tracefs_initialized()))
9358                 return -ENODEV;
9359
9360         /*
9361          * As there may still be users that expect the tracing
9362          * files to exist in debugfs/tracing, we must automount
9363          * the tracefs file system there, so older tools still
9364          * work with the newer kernel.
9365          */
9366         tr->dir = debugfs_create_automount("tracing", NULL,
9367                                            trace_automount, NULL);
9368
9369         return 0;
9370 }
9371
9372 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9373 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9374
9375 static struct workqueue_struct *eval_map_wq __initdata;
9376 static struct work_struct eval_map_work __initdata;
9377
9378 static void __init eval_map_work_func(struct work_struct *work)
9379 {
9380         int len;
9381
9382         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9383         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9384 }
9385
9386 static int __init trace_eval_init(void)
9387 {
9388         INIT_WORK(&eval_map_work, eval_map_work_func);
9389
9390         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9391         if (!eval_map_wq) {
9392                 pr_err("Unable to allocate eval_map_wq\n");
9393                 /* Do work here */
9394                 eval_map_work_func(&eval_map_work);
9395                 return -ENOMEM;
9396         }
9397
9398         queue_work(eval_map_wq, &eval_map_work);
9399         return 0;
9400 }
9401
9402 static int __init trace_eval_sync(void)
9403 {
9404         /* Make sure the eval map updates are finished */
9405         if (eval_map_wq)
9406                 destroy_workqueue(eval_map_wq);
9407         return 0;
9408 }
9409
9410 late_initcall_sync(trace_eval_sync);
9411
9412
9413 #ifdef CONFIG_MODULES
9414 static void trace_module_add_evals(struct module *mod)
9415 {
9416         if (!mod->num_trace_evals)
9417                 return;
9418
9419         /*
9420          * Modules with bad taint do not have events created, do
9421          * not bother with enums either.
9422          */
9423         if (trace_module_has_bad_taint(mod))
9424                 return;
9425
9426         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9427 }
9428
9429 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9430 static void trace_module_remove_evals(struct module *mod)
9431 {
9432         union trace_eval_map_item *map;
9433         union trace_eval_map_item **last = &trace_eval_maps;
9434
9435         if (!mod->num_trace_evals)
9436                 return;
9437
9438         mutex_lock(&trace_eval_mutex);
9439
9440         map = trace_eval_maps;
9441
9442         while (map) {
9443                 if (map->head.mod == mod)
9444                         break;
9445                 map = trace_eval_jmp_to_tail(map);
9446                 last = &map->tail.next;
9447                 map = map->tail.next;
9448         }
9449         if (!map)
9450                 goto out;
9451
9452         *last = trace_eval_jmp_to_tail(map)->tail.next;
9453         kfree(map);
9454  out:
9455         mutex_unlock(&trace_eval_mutex);
9456 }
9457 #else
9458 static inline void trace_module_remove_evals(struct module *mod) { }
9459 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9460
9461 static int trace_module_notify(struct notifier_block *self,
9462                                unsigned long val, void *data)
9463 {
9464         struct module *mod = data;
9465
9466         switch (val) {
9467         case MODULE_STATE_COMING:
9468                 trace_module_add_evals(mod);
9469                 break;
9470         case MODULE_STATE_GOING:
9471                 trace_module_remove_evals(mod);
9472                 break;
9473         }
9474
9475         return NOTIFY_OK;
9476 }
9477
9478 static struct notifier_block trace_module_nb = {
9479         .notifier_call = trace_module_notify,
9480         .priority = 0,
9481 };
9482 #endif /* CONFIG_MODULES */
9483
9484 static __init int tracer_init_tracefs(void)
9485 {
9486         int ret;
9487
9488         trace_access_lock_init();
9489
9490         ret = tracing_init_dentry();
9491         if (ret)
9492                 return 0;
9493
9494         event_trace_init();
9495
9496         init_tracer_tracefs(&global_trace, NULL);
9497         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9498
9499         trace_create_file("tracing_thresh", 0644, NULL,
9500                         &global_trace, &tracing_thresh_fops);
9501
9502         trace_create_file("README", 0444, NULL,
9503                         NULL, &tracing_readme_fops);
9504
9505         trace_create_file("saved_cmdlines", 0444, NULL,
9506                         NULL, &tracing_saved_cmdlines_fops);
9507
9508         trace_create_file("saved_cmdlines_size", 0644, NULL,
9509                           NULL, &tracing_saved_cmdlines_size_fops);
9510
9511         trace_create_file("saved_tgids", 0444, NULL,
9512                         NULL, &tracing_saved_tgids_fops);
9513
9514         trace_eval_init();
9515
9516         trace_create_eval_file(NULL);
9517
9518 #ifdef CONFIG_MODULES
9519         register_module_notifier(&trace_module_nb);
9520 #endif
9521
9522 #ifdef CONFIG_DYNAMIC_FTRACE
9523         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9524                         NULL, &tracing_dyn_info_fops);
9525 #endif
9526
9527         create_trace_instances(NULL);
9528
9529         update_tracer_options(&global_trace);
9530
9531         return 0;
9532 }
9533
9534 static int trace_panic_handler(struct notifier_block *this,
9535                                unsigned long event, void *unused)
9536 {
9537         if (ftrace_dump_on_oops)
9538                 ftrace_dump(ftrace_dump_on_oops);
9539         return NOTIFY_OK;
9540 }
9541
9542 static struct notifier_block trace_panic_notifier = {
9543         .notifier_call  = trace_panic_handler,
9544         .next           = NULL,
9545         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9546 };
9547
9548 static int trace_die_handler(struct notifier_block *self,
9549                              unsigned long val,
9550                              void *data)
9551 {
9552         switch (val) {
9553         case DIE_OOPS:
9554                 if (ftrace_dump_on_oops)
9555                         ftrace_dump(ftrace_dump_on_oops);
9556                 break;
9557         default:
9558                 break;
9559         }
9560         return NOTIFY_OK;
9561 }
9562
9563 static struct notifier_block trace_die_notifier = {
9564         .notifier_call = trace_die_handler,
9565         .priority = 200
9566 };
9567
9568 /*
9569  * printk is set to max of 1024, we really don't need it that big.
9570  * Nothing should be printing 1000 characters anyway.
9571  */
9572 #define TRACE_MAX_PRINT         1000
9573
9574 /*
9575  * Define here KERN_TRACE so that we have one place to modify
9576  * it if we decide to change what log level the ftrace dump
9577  * should be at.
9578  */
9579 #define KERN_TRACE              KERN_EMERG
9580
9581 void
9582 trace_printk_seq(struct trace_seq *s)
9583 {
9584         /* Probably should print a warning here. */
9585         if (s->seq.len >= TRACE_MAX_PRINT)
9586                 s->seq.len = TRACE_MAX_PRINT;
9587
9588         /*
9589          * More paranoid code. Although the buffer size is set to
9590          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9591          * an extra layer of protection.
9592          */
9593         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9594                 s->seq.len = s->seq.size - 1;
9595
9596         /* should be zero ended, but we are paranoid. */
9597         s->buffer[s->seq.len] = 0;
9598
9599         printk(KERN_TRACE "%s", s->buffer);
9600
9601         trace_seq_init(s);
9602 }
9603
9604 void trace_init_global_iter(struct trace_iterator *iter)
9605 {
9606         iter->tr = &global_trace;
9607         iter->trace = iter->tr->current_trace;
9608         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9609         iter->array_buffer = &global_trace.array_buffer;
9610
9611         if (iter->trace && iter->trace->open)
9612                 iter->trace->open(iter);
9613
9614         /* Annotate start of buffers if we had overruns */
9615         if (ring_buffer_overruns(iter->array_buffer->buffer))
9616                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9617
9618         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9619         if (trace_clocks[iter->tr->clock_id].in_ns)
9620                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9621 }
9622
9623 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9624 {
9625         /* use static because iter can be a bit big for the stack */
9626         static struct trace_iterator iter;
9627         static atomic_t dump_running;
9628         struct trace_array *tr = &global_trace;
9629         unsigned int old_userobj;
9630         unsigned long flags;
9631         int cnt = 0, cpu;
9632
9633         /* Only allow one dump user at a time. */
9634         if (atomic_inc_return(&dump_running) != 1) {
9635                 atomic_dec(&dump_running);
9636                 return;
9637         }
9638
9639         /*
9640          * Always turn off tracing when we dump.
9641          * We don't need to show trace output of what happens
9642          * between multiple crashes.
9643          *
9644          * If the user does a sysrq-z, then they can re-enable
9645          * tracing with echo 1 > tracing_on.
9646          */
9647         tracing_off();
9648
9649         local_irq_save(flags);
9650         printk_nmi_direct_enter();
9651
9652         /* Simulate the iterator */
9653         trace_init_global_iter(&iter);
9654         /* Can not use kmalloc for iter.temp and iter.fmt */
9655         iter.temp = static_temp_buf;
9656         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9657         iter.fmt = static_fmt_buf;
9658         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9659
9660         for_each_tracing_cpu(cpu) {
9661                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9662         }
9663
9664         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9665
9666         /* don't look at user memory in panic mode */
9667         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9668
9669         switch (oops_dump_mode) {
9670         case DUMP_ALL:
9671                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9672                 break;
9673         case DUMP_ORIG:
9674                 iter.cpu_file = raw_smp_processor_id();
9675                 break;
9676         case DUMP_NONE:
9677                 goto out_enable;
9678         default:
9679                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9680                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9681         }
9682
9683         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9684
9685         /* Did function tracer already get disabled? */
9686         if (ftrace_is_dead()) {
9687                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9688                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9689         }
9690
9691         /*
9692          * We need to stop all tracing on all CPUS to read
9693          * the next buffer. This is a bit expensive, but is
9694          * not done often. We fill all what we can read,
9695          * and then release the locks again.
9696          */
9697
9698         while (!trace_empty(&iter)) {
9699
9700                 if (!cnt)
9701                         printk(KERN_TRACE "---------------------------------\n");
9702
9703                 cnt++;
9704
9705                 trace_iterator_reset(&iter);
9706                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9707
9708                 if (trace_find_next_entry_inc(&iter) != NULL) {
9709                         int ret;
9710
9711                         ret = print_trace_line(&iter);
9712                         if (ret != TRACE_TYPE_NO_CONSUME)
9713                                 trace_consume(&iter);
9714                 }
9715                 touch_nmi_watchdog();
9716
9717                 trace_printk_seq(&iter.seq);
9718         }
9719
9720         if (!cnt)
9721                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9722         else
9723                 printk(KERN_TRACE "---------------------------------\n");
9724
9725  out_enable:
9726         tr->trace_flags |= old_userobj;
9727
9728         for_each_tracing_cpu(cpu) {
9729                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9730         }
9731         atomic_dec(&dump_running);
9732         printk_nmi_direct_exit();
9733         local_irq_restore(flags);
9734 }
9735 EXPORT_SYMBOL_GPL(ftrace_dump);
9736
9737 #define WRITE_BUFSIZE  4096
9738
9739 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9740                                 size_t count, loff_t *ppos,
9741                                 int (*createfn)(const char *))
9742 {
9743         char *kbuf, *buf, *tmp;
9744         int ret = 0;
9745         size_t done = 0;
9746         size_t size;
9747
9748         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9749         if (!kbuf)
9750                 return -ENOMEM;
9751
9752         while (done < count) {
9753                 size = count - done;
9754
9755                 if (size >= WRITE_BUFSIZE)
9756                         size = WRITE_BUFSIZE - 1;
9757
9758                 if (copy_from_user(kbuf, buffer + done, size)) {
9759                         ret = -EFAULT;
9760                         goto out;
9761                 }
9762                 kbuf[size] = '\0';
9763                 buf = kbuf;
9764                 do {
9765                         tmp = strchr(buf, '\n');
9766                         if (tmp) {
9767                                 *tmp = '\0';
9768                                 size = tmp - buf + 1;
9769                         } else {
9770                                 size = strlen(buf);
9771                                 if (done + size < count) {
9772                                         if (buf != kbuf)
9773                                                 break;
9774                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9775                                         pr_warn("Line length is too long: Should be less than %d\n",
9776                                                 WRITE_BUFSIZE - 2);
9777                                         ret = -EINVAL;
9778                                         goto out;
9779                                 }
9780                         }
9781                         done += size;
9782
9783                         /* Remove comments */
9784                         tmp = strchr(buf, '#');
9785
9786                         if (tmp)
9787                                 *tmp = '\0';
9788
9789                         ret = createfn(buf);
9790                         if (ret)
9791                                 goto out;
9792                         buf += size;
9793
9794                 } while (done < count);
9795         }
9796         ret = done;
9797
9798 out:
9799         kfree(kbuf);
9800
9801         return ret;
9802 }
9803
9804 __init static int tracer_alloc_buffers(void)
9805 {
9806         int ring_buf_size;
9807         int ret = -ENOMEM;
9808
9809
9810         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9811                 pr_warn("Tracing disabled due to lockdown\n");
9812                 return -EPERM;
9813         }
9814
9815         /*
9816          * Make sure we don't accidentally add more trace options
9817          * than we have bits for.
9818          */
9819         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9820
9821         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9822                 goto out;
9823
9824         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9825                 goto out_free_buffer_mask;
9826
9827         /* Only allocate trace_printk buffers if a trace_printk exists */
9828         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9829                 /* Must be called before global_trace.buffer is allocated */
9830                 trace_printk_init_buffers();
9831
9832         /* To save memory, keep the ring buffer size to its minimum */
9833         if (ring_buffer_expanded)
9834                 ring_buf_size = trace_buf_size;
9835         else
9836                 ring_buf_size = 1;
9837
9838         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9839         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9840
9841         raw_spin_lock_init(&global_trace.start_lock);
9842
9843         /*
9844          * The prepare callbacks allocates some memory for the ring buffer. We
9845          * don't free the buffer if the CPU goes down. If we were to free
9846          * the buffer, then the user would lose any trace that was in the
9847          * buffer. The memory will be removed once the "instance" is removed.
9848          */
9849         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9850                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9851                                       NULL);
9852         if (ret < 0)
9853                 goto out_free_cpumask;
9854         /* Used for event triggers */
9855         ret = -ENOMEM;
9856         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9857         if (!temp_buffer)
9858                 goto out_rm_hp_state;
9859
9860         if (trace_create_savedcmd() < 0)
9861                 goto out_free_temp_buffer;
9862
9863         /* TODO: make the number of buffers hot pluggable with CPUS */
9864         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9865                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9866                 goto out_free_savedcmd;
9867         }
9868
9869         if (global_trace.buffer_disabled)
9870                 tracing_off();
9871
9872         if (trace_boot_clock) {
9873                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9874                 if (ret < 0)
9875                         pr_warn("Trace clock %s not defined, going back to default\n",
9876                                 trace_boot_clock);
9877         }
9878
9879         /*
9880          * register_tracer() might reference current_trace, so it
9881          * needs to be set before we register anything. This is
9882          * just a bootstrap of current_trace anyway.
9883          */
9884         global_trace.current_trace = &nop_trace;
9885
9886         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9887
9888         ftrace_init_global_array_ops(&global_trace);
9889
9890         init_trace_flags_index(&global_trace);
9891
9892         register_tracer(&nop_trace);
9893
9894         /* Function tracing may start here (via kernel command line) */
9895         init_function_trace();
9896
9897         /* All seems OK, enable tracing */
9898         tracing_disabled = 0;
9899
9900         atomic_notifier_chain_register(&panic_notifier_list,
9901                                        &trace_panic_notifier);
9902
9903         register_die_notifier(&trace_die_notifier);
9904
9905         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9906
9907         INIT_LIST_HEAD(&global_trace.systems);
9908         INIT_LIST_HEAD(&global_trace.events);
9909         INIT_LIST_HEAD(&global_trace.hist_vars);
9910         INIT_LIST_HEAD(&global_trace.err_log);
9911         list_add(&global_trace.list, &ftrace_trace_arrays);
9912
9913         apply_trace_boot_options();
9914
9915         register_snapshot_cmd();
9916
9917         test_can_verify();
9918
9919         return 0;
9920
9921 out_free_savedcmd:
9922         free_saved_cmdlines_buffer(savedcmd);
9923 out_free_temp_buffer:
9924         ring_buffer_free(temp_buffer);
9925 out_rm_hp_state:
9926         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9927 out_free_cpumask:
9928         free_cpumask_var(global_trace.tracing_cpumask);
9929 out_free_buffer_mask:
9930         free_cpumask_var(tracing_buffer_mask);
9931 out:
9932         return ret;
9933 }
9934
9935 void __init early_trace_init(void)
9936 {
9937         if (tracepoint_printk) {
9938                 tracepoint_print_iter =
9939                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9940                 if (MEM_FAIL(!tracepoint_print_iter,
9941                              "Failed to allocate trace iterator\n"))
9942                         tracepoint_printk = 0;
9943                 else
9944                         static_key_enable(&tracepoint_printk_key.key);
9945         }
9946         tracer_alloc_buffers();
9947 }
9948
9949 void __init trace_init(void)
9950 {
9951         trace_event_init();
9952 }
9953
9954 __init static int clear_boot_tracer(void)
9955 {
9956         /*
9957          * The default tracer at boot buffer is an init section.
9958          * This function is called in lateinit. If we did not
9959          * find the boot tracer, then clear it out, to prevent
9960          * later registration from accessing the buffer that is
9961          * about to be freed.
9962          */
9963         if (!default_bootup_tracer)
9964                 return 0;
9965
9966         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9967                default_bootup_tracer);
9968         default_bootup_tracer = NULL;
9969
9970         return 0;
9971 }
9972
9973 fs_initcall(tracer_init_tracefs);
9974 late_initcall_sync(clear_boot_tracer);
9975
9976 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9977 __init static int tracing_set_default_clock(void)
9978 {
9979         /* sched_clock_stable() is determined in late_initcall */
9980         if (!trace_boot_clock && !sched_clock_stable()) {
9981                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9982                         pr_warn("Can not set tracing clock due to lockdown\n");
9983                         return -EPERM;
9984                 }
9985
9986                 printk(KERN_WARNING
9987                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9988                        "If you want to keep using the local clock, then add:\n"
9989                        "  \"trace_clock=local\"\n"
9990                        "on the kernel command line\n");
9991                 tracing_set_clock(&global_trace, "global");
9992         }
9993
9994         return 0;
9995 }
9996 late_initcall_sync(tracing_set_default_clock);
9997 #endif