Merge tag 'kvmarm-fixes-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 static int *tgid_map;
2188
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194         unsigned *map_cmdline_to_pid;
2195         unsigned cmdline_num;
2196         int cmdline_idx;
2197         char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215                                     struct saved_cmdlines_buffer *s)
2216 {
2217         s->map_cmdline_to_pid = kmalloc_array(val,
2218                                               sizeof(*s->map_cmdline_to_pid),
2219                                               GFP_KERNEL);
2220         if (!s->map_cmdline_to_pid)
2221                 return -ENOMEM;
2222
2223         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224         if (!s->saved_cmdlines) {
2225                 kfree(s->map_cmdline_to_pid);
2226                 return -ENOMEM;
2227         }
2228
2229         s->cmdline_idx = 0;
2230         s->cmdline_num = val;
2231         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232                sizeof(s->map_pid_to_cmdline));
2233         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234                val * sizeof(*s->map_cmdline_to_pid));
2235
2236         return 0;
2237 }
2238
2239 static int trace_create_savedcmd(void)
2240 {
2241         int ret;
2242
2243         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244         if (!savedcmd)
2245                 return -ENOMEM;
2246
2247         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248         if (ret < 0) {
2249                 kfree(savedcmd);
2250                 savedcmd = NULL;
2251                 return -ENOMEM;
2252         }
2253
2254         return 0;
2255 }
2256
2257 int is_tracing_stopped(void)
2258 {
2259         return global_trace.stop_count;
2260 }
2261
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270         struct trace_buffer *buffer;
2271         unsigned long flags;
2272
2273         if (tracing_disabled)
2274                 return;
2275
2276         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277         if (--global_trace.stop_count) {
2278                 if (global_trace.stop_count < 0) {
2279                         /* Someone screwed up their debugging */
2280                         WARN_ON_ONCE(1);
2281                         global_trace.stop_count = 0;
2282                 }
2283                 goto out;
2284         }
2285
2286         /* Prevent the buffers from switching */
2287         arch_spin_lock(&global_trace.max_lock);
2288
2289         buffer = global_trace.array_buffer.buffer;
2290         if (buffer)
2291                 ring_buffer_record_enable(buffer);
2292
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294         buffer = global_trace.max_buffer.buffer;
2295         if (buffer)
2296                 ring_buffer_record_enable(buffer);
2297 #endif
2298
2299         arch_spin_unlock(&global_trace.max_lock);
2300
2301  out:
2302         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307         struct trace_buffer *buffer;
2308         unsigned long flags;
2309
2310         if (tracing_disabled)
2311                 return;
2312
2313         /* If global, we need to also start the max tracer */
2314         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315                 return tracing_start();
2316
2317         raw_spin_lock_irqsave(&tr->start_lock, flags);
2318
2319         if (--tr->stop_count) {
2320                 if (tr->stop_count < 0) {
2321                         /* Someone screwed up their debugging */
2322                         WARN_ON_ONCE(1);
2323                         tr->stop_count = 0;
2324                 }
2325                 goto out;
2326         }
2327
2328         buffer = tr->array_buffer.buffer;
2329         if (buffer)
2330                 ring_buffer_record_enable(buffer);
2331
2332  out:
2333         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348         if (global_trace.stop_count++)
2349                 goto out;
2350
2351         /* Prevent the buffers from switching */
2352         arch_spin_lock(&global_trace.max_lock);
2353
2354         buffer = global_trace.array_buffer.buffer;
2355         if (buffer)
2356                 ring_buffer_record_disable(buffer);
2357
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359         buffer = global_trace.max_buffer.buffer;
2360         if (buffer)
2361                 ring_buffer_record_disable(buffer);
2362 #endif
2363
2364         arch_spin_unlock(&global_trace.max_lock);
2365
2366  out:
2367         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372         struct trace_buffer *buffer;
2373         unsigned long flags;
2374
2375         /* If global, we need to also stop the max tracer */
2376         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377                 return tracing_stop();
2378
2379         raw_spin_lock_irqsave(&tr->start_lock, flags);
2380         if (tr->stop_count++)
2381                 goto out;
2382
2383         buffer = tr->array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387  out:
2388         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393         unsigned tpid, idx;
2394
2395         /* treat recording of idle task as a success */
2396         if (!tsk->pid)
2397                 return 1;
2398
2399         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400
2401         /*
2402          * It's not the end of the world if we don't get
2403          * the lock, but we also don't want to spin
2404          * nor do we want to disable interrupts,
2405          * so if we miss here, then better luck next time.
2406          */
2407         if (!arch_spin_trylock(&trace_cmdline_lock))
2408                 return 0;
2409
2410         idx = savedcmd->map_pid_to_cmdline[tpid];
2411         if (idx == NO_CMDLINE_MAP) {
2412                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413
2414                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2415                 savedcmd->cmdline_idx = idx;
2416         }
2417
2418         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419         set_cmdline(idx, tsk->comm);
2420
2421         arch_spin_unlock(&trace_cmdline_lock);
2422
2423         return 1;
2424 }
2425
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428         unsigned map;
2429         int tpid;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         tpid = pid & (PID_MAX_DEFAULT - 1);
2442         map = savedcmd->map_pid_to_cmdline[tpid];
2443         if (map != NO_CMDLINE_MAP) {
2444                 tpid = savedcmd->map_cmdline_to_pid[map];
2445                 if (tpid == pid) {
2446                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447                         return;
2448                 }
2449         }
2450         strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2580 {
2581         unsigned int trace_flags = irqs_status;
2582         unsigned int pc;
2583
2584         pc = preempt_count();
2585
2586         if (pc & NMI_MASK)
2587                 trace_flags |= TRACE_FLAG_NMI;
2588         if (pc & HARDIRQ_MASK)
2589                 trace_flags |= TRACE_FLAG_HARDIRQ;
2590         if (in_serving_softirq())
2591                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2592
2593         if (tif_need_resched())
2594                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2595         if (test_preempt_need_resched())
2596                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2597         return (trace_flags << 16) | (pc & 0xff);
2598 }
2599
2600 struct ring_buffer_event *
2601 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2602                           int type,
2603                           unsigned long len,
2604                           unsigned int trace_ctx)
2605 {
2606         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2607 }
2608
2609 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2610 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2611 static int trace_buffered_event_ref;
2612
2613 /**
2614  * trace_buffered_event_enable - enable buffering events
2615  *
2616  * When events are being filtered, it is quicker to use a temporary
2617  * buffer to write the event data into if there's a likely chance
2618  * that it will not be committed. The discard of the ring buffer
2619  * is not as fast as committing, and is much slower than copying
2620  * a commit.
2621  *
2622  * When an event is to be filtered, allocate per cpu buffers to
2623  * write the event data into, and if the event is filtered and discarded
2624  * it is simply dropped, otherwise, the entire data is to be committed
2625  * in one shot.
2626  */
2627 void trace_buffered_event_enable(void)
2628 {
2629         struct ring_buffer_event *event;
2630         struct page *page;
2631         int cpu;
2632
2633         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2634
2635         if (trace_buffered_event_ref++)
2636                 return;
2637
2638         for_each_tracing_cpu(cpu) {
2639                 page = alloc_pages_node(cpu_to_node(cpu),
2640                                         GFP_KERNEL | __GFP_NORETRY, 0);
2641                 if (!page)
2642                         goto failed;
2643
2644                 event = page_address(page);
2645                 memset(event, 0, sizeof(*event));
2646
2647                 per_cpu(trace_buffered_event, cpu) = event;
2648
2649                 preempt_disable();
2650                 if (cpu == smp_processor_id() &&
2651                     __this_cpu_read(trace_buffered_event) !=
2652                     per_cpu(trace_buffered_event, cpu))
2653                         WARN_ON_ONCE(1);
2654                 preempt_enable();
2655         }
2656
2657         return;
2658  failed:
2659         trace_buffered_event_disable();
2660 }
2661
2662 static void enable_trace_buffered_event(void *data)
2663 {
2664         /* Probably not needed, but do it anyway */
2665         smp_rmb();
2666         this_cpu_dec(trace_buffered_event_cnt);
2667 }
2668
2669 static void disable_trace_buffered_event(void *data)
2670 {
2671         this_cpu_inc(trace_buffered_event_cnt);
2672 }
2673
2674 /**
2675  * trace_buffered_event_disable - disable buffering events
2676  *
2677  * When a filter is removed, it is faster to not use the buffered
2678  * events, and to commit directly into the ring buffer. Free up
2679  * the temp buffers when there are no more users. This requires
2680  * special synchronization with current events.
2681  */
2682 void trace_buffered_event_disable(void)
2683 {
2684         int cpu;
2685
2686         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2687
2688         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2689                 return;
2690
2691         if (--trace_buffered_event_ref)
2692                 return;
2693
2694         preempt_disable();
2695         /* For each CPU, set the buffer as used. */
2696         smp_call_function_many(tracing_buffer_mask,
2697                                disable_trace_buffered_event, NULL, 1);
2698         preempt_enable();
2699
2700         /* Wait for all current users to finish */
2701         synchronize_rcu();
2702
2703         for_each_tracing_cpu(cpu) {
2704                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2705                 per_cpu(trace_buffered_event, cpu) = NULL;
2706         }
2707         /*
2708          * Make sure trace_buffered_event is NULL before clearing
2709          * trace_buffered_event_cnt.
2710          */
2711         smp_wmb();
2712
2713         preempt_disable();
2714         /* Do the work on each cpu */
2715         smp_call_function_many(tracing_buffer_mask,
2716                                enable_trace_buffered_event, NULL, 1);
2717         preempt_enable();
2718 }
2719
2720 static struct trace_buffer *temp_buffer;
2721
2722 struct ring_buffer_event *
2723 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2724                           struct trace_event_file *trace_file,
2725                           int type, unsigned long len,
2726                           unsigned int trace_ctx)
2727 {
2728         struct ring_buffer_event *entry;
2729         struct trace_array *tr = trace_file->tr;
2730         int val;
2731
2732         *current_rb = tr->array_buffer.buffer;
2733
2734         if (!tr->no_filter_buffering_ref &&
2735             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2736             (entry = this_cpu_read(trace_buffered_event))) {
2737                 /* Try to use the per cpu buffer first */
2738                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2739                 if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2740                         trace_event_setup(entry, type, trace_ctx);
2741                         entry->array[0] = len;
2742                         return entry;
2743                 }
2744                 this_cpu_dec(trace_buffered_event_cnt);
2745         }
2746
2747         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2748                                             trace_ctx);
2749         /*
2750          * If tracing is off, but we have triggers enabled
2751          * we still need to look at the event data. Use the temp_buffer
2752          * to store the trace event for the trigger to use. It's recursive
2753          * safe and will not be recorded anywhere.
2754          */
2755         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2756                 *current_rb = temp_buffer;
2757                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2758                                                     trace_ctx);
2759         }
2760         return entry;
2761 }
2762 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2763
2764 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2765 static DEFINE_MUTEX(tracepoint_printk_mutex);
2766
2767 static void output_printk(struct trace_event_buffer *fbuffer)
2768 {
2769         struct trace_event_call *event_call;
2770         struct trace_event_file *file;
2771         struct trace_event *event;
2772         unsigned long flags;
2773         struct trace_iterator *iter = tracepoint_print_iter;
2774
2775         /* We should never get here if iter is NULL */
2776         if (WARN_ON_ONCE(!iter))
2777                 return;
2778
2779         event_call = fbuffer->trace_file->event_call;
2780         if (!event_call || !event_call->event.funcs ||
2781             !event_call->event.funcs->trace)
2782                 return;
2783
2784         file = fbuffer->trace_file;
2785         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2786             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2787              !filter_match_preds(file->filter, fbuffer->entry)))
2788                 return;
2789
2790         event = &fbuffer->trace_file->event_call->event;
2791
2792         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2793         trace_seq_init(&iter->seq);
2794         iter->ent = fbuffer->entry;
2795         event_call->event.funcs->trace(iter, 0, event);
2796         trace_seq_putc(&iter->seq, 0);
2797         printk("%s", iter->seq.buffer);
2798
2799         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2800 }
2801
2802 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2803                              void *buffer, size_t *lenp,
2804                              loff_t *ppos)
2805 {
2806         int save_tracepoint_printk;
2807         int ret;
2808
2809         mutex_lock(&tracepoint_printk_mutex);
2810         save_tracepoint_printk = tracepoint_printk;
2811
2812         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2813
2814         /*
2815          * This will force exiting early, as tracepoint_printk
2816          * is always zero when tracepoint_printk_iter is not allocated
2817          */
2818         if (!tracepoint_print_iter)
2819                 tracepoint_printk = 0;
2820
2821         if (save_tracepoint_printk == tracepoint_printk)
2822                 goto out;
2823
2824         if (tracepoint_printk)
2825                 static_key_enable(&tracepoint_printk_key.key);
2826         else
2827                 static_key_disable(&tracepoint_printk_key.key);
2828
2829  out:
2830         mutex_unlock(&tracepoint_printk_mutex);
2831
2832         return ret;
2833 }
2834
2835 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2836 {
2837         if (static_key_false(&tracepoint_printk_key.key))
2838                 output_printk(fbuffer);
2839
2840         if (static_branch_unlikely(&trace_event_exports_enabled))
2841                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2842         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2843                                     fbuffer->event, fbuffer->entry,
2844                                     fbuffer->trace_ctx, fbuffer->regs);
2845 }
2846 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2847
2848 /*
2849  * Skip 3:
2850  *
2851  *   trace_buffer_unlock_commit_regs()
2852  *   trace_event_buffer_commit()
2853  *   trace_event_raw_event_xxx()
2854  */
2855 # define STACK_SKIP 3
2856
2857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2858                                      struct trace_buffer *buffer,
2859                                      struct ring_buffer_event *event,
2860                                      unsigned int trace_ctx,
2861                                      struct pt_regs *regs)
2862 {
2863         __buffer_unlock_commit(buffer, event);
2864
2865         /*
2866          * If regs is not set, then skip the necessary functions.
2867          * Note, we can still get here via blktrace, wakeup tracer
2868          * and mmiotrace, but that's ok if they lose a function or
2869          * two. They are not that meaningful.
2870          */
2871         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2872         ftrace_trace_userstack(tr, buffer, trace_ctx);
2873 }
2874
2875 /*
2876  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2877  */
2878 void
2879 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2880                                    struct ring_buffer_event *event)
2881 {
2882         __buffer_unlock_commit(buffer, event);
2883 }
2884
2885 void
2886 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2887                parent_ip, unsigned int trace_ctx)
2888 {
2889         struct trace_event_call *call = &event_function;
2890         struct trace_buffer *buffer = tr->array_buffer.buffer;
2891         struct ring_buffer_event *event;
2892         struct ftrace_entry *entry;
2893
2894         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2895                                             trace_ctx);
2896         if (!event)
2897                 return;
2898         entry   = ring_buffer_event_data(event);
2899         entry->ip                       = ip;
2900         entry->parent_ip                = parent_ip;
2901
2902         if (!call_filter_check_discard(call, entry, buffer, event)) {
2903                 if (static_branch_unlikely(&trace_function_exports_enabled))
2904                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2905                 __buffer_unlock_commit(buffer, event);
2906         }
2907 }
2908
2909 #ifdef CONFIG_STACKTRACE
2910
2911 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2912 #define FTRACE_KSTACK_NESTING   4
2913
2914 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2915
2916 struct ftrace_stack {
2917         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2918 };
2919
2920
2921 struct ftrace_stacks {
2922         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2923 };
2924
2925 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2926 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2927
2928 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2929                                  unsigned int trace_ctx,
2930                                  int skip, struct pt_regs *regs)
2931 {
2932         struct trace_event_call *call = &event_kernel_stack;
2933         struct ring_buffer_event *event;
2934         unsigned int size, nr_entries;
2935         struct ftrace_stack *fstack;
2936         struct stack_entry *entry;
2937         int stackidx;
2938
2939         /*
2940          * Add one, for this function and the call to save_stack_trace()
2941          * If regs is set, then these functions will not be in the way.
2942          */
2943 #ifndef CONFIG_UNWINDER_ORC
2944         if (!regs)
2945                 skip++;
2946 #endif
2947
2948         preempt_disable_notrace();
2949
2950         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2951
2952         /* This should never happen. If it does, yell once and skip */
2953         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2954                 goto out;
2955
2956         /*
2957          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2958          * interrupt will either see the value pre increment or post
2959          * increment. If the interrupt happens pre increment it will have
2960          * restored the counter when it returns.  We just need a barrier to
2961          * keep gcc from moving things around.
2962          */
2963         barrier();
2964
2965         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2966         size = ARRAY_SIZE(fstack->calls);
2967
2968         if (regs) {
2969                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2970                                                    size, skip);
2971         } else {
2972                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2973         }
2974
2975         size = nr_entries * sizeof(unsigned long);
2976         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2977                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2978                                     trace_ctx);
2979         if (!event)
2980                 goto out;
2981         entry = ring_buffer_event_data(event);
2982
2983         memcpy(&entry->caller, fstack->calls, size);
2984         entry->size = nr_entries;
2985
2986         if (!call_filter_check_discard(call, entry, buffer, event))
2987                 __buffer_unlock_commit(buffer, event);
2988
2989  out:
2990         /* Again, don't let gcc optimize things here */
2991         barrier();
2992         __this_cpu_dec(ftrace_stack_reserve);
2993         preempt_enable_notrace();
2994
2995 }
2996
2997 static inline void ftrace_trace_stack(struct trace_array *tr,
2998                                       struct trace_buffer *buffer,
2999                                       unsigned int trace_ctx,
3000                                       int skip, struct pt_regs *regs)
3001 {
3002         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3003                 return;
3004
3005         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3006 }
3007
3008 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3009                    int skip)
3010 {
3011         struct trace_buffer *buffer = tr->array_buffer.buffer;
3012
3013         if (rcu_is_watching()) {
3014                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3015                 return;
3016         }
3017
3018         /*
3019          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3020          * but if the above rcu_is_watching() failed, then the NMI
3021          * triggered someplace critical, and rcu_irq_enter() should
3022          * not be called from NMI.
3023          */
3024         if (unlikely(in_nmi()))
3025                 return;
3026
3027         rcu_irq_enter_irqson();
3028         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3029         rcu_irq_exit_irqson();
3030 }
3031
3032 /**
3033  * trace_dump_stack - record a stack back trace in the trace buffer
3034  * @skip: Number of functions to skip (helper handlers)
3035  */
3036 void trace_dump_stack(int skip)
3037 {
3038         if (tracing_disabled || tracing_selftest_running)
3039                 return;
3040
3041 #ifndef CONFIG_UNWINDER_ORC
3042         /* Skip 1 to skip this function. */
3043         skip++;
3044 #endif
3045         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3046                              tracing_gen_ctx(), skip, NULL);
3047 }
3048 EXPORT_SYMBOL_GPL(trace_dump_stack);
3049
3050 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3051 static DEFINE_PER_CPU(int, user_stack_count);
3052
3053 static void
3054 ftrace_trace_userstack(struct trace_array *tr,
3055                        struct trace_buffer *buffer, unsigned int trace_ctx)
3056 {
3057         struct trace_event_call *call = &event_user_stack;
3058         struct ring_buffer_event *event;
3059         struct userstack_entry *entry;
3060
3061         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3062                 return;
3063
3064         /*
3065          * NMIs can not handle page faults, even with fix ups.
3066          * The save user stack can (and often does) fault.
3067          */
3068         if (unlikely(in_nmi()))
3069                 return;
3070
3071         /*
3072          * prevent recursion, since the user stack tracing may
3073          * trigger other kernel events.
3074          */
3075         preempt_disable();
3076         if (__this_cpu_read(user_stack_count))
3077                 goto out;
3078
3079         __this_cpu_inc(user_stack_count);
3080
3081         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3082                                             sizeof(*entry), trace_ctx);
3083         if (!event)
3084                 goto out_drop_count;
3085         entry   = ring_buffer_event_data(event);
3086
3087         entry->tgid             = current->tgid;
3088         memset(&entry->caller, 0, sizeof(entry->caller));
3089
3090         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3091         if (!call_filter_check_discard(call, entry, buffer, event))
3092                 __buffer_unlock_commit(buffer, event);
3093
3094  out_drop_count:
3095         __this_cpu_dec(user_stack_count);
3096  out:
3097         preempt_enable();
3098 }
3099 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3100 static void ftrace_trace_userstack(struct trace_array *tr,
3101                                    struct trace_buffer *buffer,
3102                                    unsigned int trace_ctx)
3103 {
3104 }
3105 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3106
3107 #endif /* CONFIG_STACKTRACE */
3108
3109 static inline void
3110 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3111                           unsigned long long delta)
3112 {
3113         entry->bottom_delta_ts = delta & U32_MAX;
3114         entry->top_delta_ts = (delta >> 32);
3115 }
3116
3117 void trace_last_func_repeats(struct trace_array *tr,
3118                              struct trace_func_repeats *last_info,
3119                              unsigned int trace_ctx)
3120 {
3121         struct trace_buffer *buffer = tr->array_buffer.buffer;
3122         struct func_repeats_entry *entry;
3123         struct ring_buffer_event *event;
3124         u64 delta;
3125
3126         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3127                                             sizeof(*entry), trace_ctx);
3128         if (!event)
3129                 return;
3130
3131         delta = ring_buffer_event_time_stamp(buffer, event) -
3132                 last_info->ts_last_call;
3133
3134         entry = ring_buffer_event_data(event);
3135         entry->ip = last_info->ip;
3136         entry->parent_ip = last_info->parent_ip;
3137         entry->count = last_info->count;
3138         func_repeats_set_delta_ts(entry, delta);
3139
3140         __buffer_unlock_commit(buffer, event);
3141 }
3142
3143 /* created for use with alloc_percpu */
3144 struct trace_buffer_struct {
3145         int nesting;
3146         char buffer[4][TRACE_BUF_SIZE];
3147 };
3148
3149 static struct trace_buffer_struct *trace_percpu_buffer;
3150
3151 /*
3152  * This allows for lockless recording.  If we're nested too deeply, then
3153  * this returns NULL.
3154  */
3155 static char *get_trace_buf(void)
3156 {
3157         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3158
3159         if (!buffer || buffer->nesting >= 4)
3160                 return NULL;
3161
3162         buffer->nesting++;
3163
3164         /* Interrupts must see nesting incremented before we use the buffer */
3165         barrier();
3166         return &buffer->buffer[buffer->nesting - 1][0];
3167 }
3168
3169 static void put_trace_buf(void)
3170 {
3171         /* Don't let the decrement of nesting leak before this */
3172         barrier();
3173         this_cpu_dec(trace_percpu_buffer->nesting);
3174 }
3175
3176 static int alloc_percpu_trace_buffer(void)
3177 {
3178         struct trace_buffer_struct *buffers;
3179
3180         if (trace_percpu_buffer)
3181                 return 0;
3182
3183         buffers = alloc_percpu(struct trace_buffer_struct);
3184         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3185                 return -ENOMEM;
3186
3187         trace_percpu_buffer = buffers;
3188         return 0;
3189 }
3190
3191 static int buffers_allocated;
3192
3193 void trace_printk_init_buffers(void)
3194 {
3195         if (buffers_allocated)
3196                 return;
3197
3198         if (alloc_percpu_trace_buffer())
3199                 return;
3200
3201         /* trace_printk() is for debug use only. Don't use it in production. */
3202
3203         pr_warn("\n");
3204         pr_warn("**********************************************************\n");
3205         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3206         pr_warn("**                                                      **\n");
3207         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3208         pr_warn("**                                                      **\n");
3209         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3210         pr_warn("** unsafe for production use.                           **\n");
3211         pr_warn("**                                                      **\n");
3212         pr_warn("** If you see this message and you are not debugging    **\n");
3213         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3214         pr_warn("**                                                      **\n");
3215         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3216         pr_warn("**********************************************************\n");
3217
3218         /* Expand the buffers to set size */
3219         tracing_update_buffers();
3220
3221         buffers_allocated = 1;
3222
3223         /*
3224          * trace_printk_init_buffers() can be called by modules.
3225          * If that happens, then we need to start cmdline recording
3226          * directly here. If the global_trace.buffer is already
3227          * allocated here, then this was called by module code.
3228          */
3229         if (global_trace.array_buffer.buffer)
3230                 tracing_start_cmdline_record();
3231 }
3232 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3233
3234 void trace_printk_start_comm(void)
3235 {
3236         /* Start tracing comms if trace printk is set */
3237         if (!buffers_allocated)
3238                 return;
3239         tracing_start_cmdline_record();
3240 }
3241
3242 static void trace_printk_start_stop_comm(int enabled)
3243 {
3244         if (!buffers_allocated)
3245                 return;
3246
3247         if (enabled)
3248                 tracing_start_cmdline_record();
3249         else
3250                 tracing_stop_cmdline_record();
3251 }
3252
3253 /**
3254  * trace_vbprintk - write binary msg to tracing buffer
3255  * @ip:    The address of the caller
3256  * @fmt:   The string format to write to the buffer
3257  * @args:  Arguments for @fmt
3258  */
3259 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3260 {
3261         struct trace_event_call *call = &event_bprint;
3262         struct ring_buffer_event *event;
3263         struct trace_buffer *buffer;
3264         struct trace_array *tr = &global_trace;
3265         struct bprint_entry *entry;
3266         unsigned int trace_ctx;
3267         char *tbuffer;
3268         int len = 0, size;
3269
3270         if (unlikely(tracing_selftest_running || tracing_disabled))
3271                 return 0;
3272
3273         /* Don't pollute graph traces with trace_vprintk internals */
3274         pause_graph_tracing();
3275
3276         trace_ctx = tracing_gen_ctx();
3277         preempt_disable_notrace();
3278
3279         tbuffer = get_trace_buf();
3280         if (!tbuffer) {
3281                 len = 0;
3282                 goto out_nobuffer;
3283         }
3284
3285         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3286
3287         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3288                 goto out_put;
3289
3290         size = sizeof(*entry) + sizeof(u32) * len;
3291         buffer = tr->array_buffer.buffer;
3292         ring_buffer_nest_start(buffer);
3293         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3294                                             trace_ctx);
3295         if (!event)
3296                 goto out;
3297         entry = ring_buffer_event_data(event);
3298         entry->ip                       = ip;
3299         entry->fmt                      = fmt;
3300
3301         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3302         if (!call_filter_check_discard(call, entry, buffer, event)) {
3303                 __buffer_unlock_commit(buffer, event);
3304                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3305         }
3306
3307 out:
3308         ring_buffer_nest_end(buffer);
3309 out_put:
3310         put_trace_buf();
3311
3312 out_nobuffer:
3313         preempt_enable_notrace();
3314         unpause_graph_tracing();
3315
3316         return len;
3317 }
3318 EXPORT_SYMBOL_GPL(trace_vbprintk);
3319
3320 __printf(3, 0)
3321 static int
3322 __trace_array_vprintk(struct trace_buffer *buffer,
3323                       unsigned long ip, const char *fmt, va_list args)
3324 {
3325         struct trace_event_call *call = &event_print;
3326         struct ring_buffer_event *event;
3327         int len = 0, size;
3328         struct print_entry *entry;
3329         unsigned int trace_ctx;
3330         char *tbuffer;
3331
3332         if (tracing_disabled || tracing_selftest_running)
3333                 return 0;
3334
3335         /* Don't pollute graph traces with trace_vprintk internals */
3336         pause_graph_tracing();
3337
3338         trace_ctx = tracing_gen_ctx();
3339         preempt_disable_notrace();
3340
3341
3342         tbuffer = get_trace_buf();
3343         if (!tbuffer) {
3344                 len = 0;
3345                 goto out_nobuffer;
3346         }
3347
3348         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3349
3350         size = sizeof(*entry) + len + 1;
3351         ring_buffer_nest_start(buffer);
3352         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3353                                             trace_ctx);
3354         if (!event)
3355                 goto out;
3356         entry = ring_buffer_event_data(event);
3357         entry->ip = ip;
3358
3359         memcpy(&entry->buf, tbuffer, len + 1);
3360         if (!call_filter_check_discard(call, entry, buffer, event)) {
3361                 __buffer_unlock_commit(buffer, event);
3362                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3363         }
3364
3365 out:
3366         ring_buffer_nest_end(buffer);
3367         put_trace_buf();
3368
3369 out_nobuffer:
3370         preempt_enable_notrace();
3371         unpause_graph_tracing();
3372
3373         return len;
3374 }
3375
3376 __printf(3, 0)
3377 int trace_array_vprintk(struct trace_array *tr,
3378                         unsigned long ip, const char *fmt, va_list args)
3379 {
3380         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3381 }
3382
3383 /**
3384  * trace_array_printk - Print a message to a specific instance
3385  * @tr: The instance trace_array descriptor
3386  * @ip: The instruction pointer that this is called from.
3387  * @fmt: The format to print (printf format)
3388  *
3389  * If a subsystem sets up its own instance, they have the right to
3390  * printk strings into their tracing instance buffer using this
3391  * function. Note, this function will not write into the top level
3392  * buffer (use trace_printk() for that), as writing into the top level
3393  * buffer should only have events that can be individually disabled.
3394  * trace_printk() is only used for debugging a kernel, and should not
3395  * be ever incorporated in normal use.
3396  *
3397  * trace_array_printk() can be used, as it will not add noise to the
3398  * top level tracing buffer.
3399  *
3400  * Note, trace_array_init_printk() must be called on @tr before this
3401  * can be used.
3402  */
3403 __printf(3, 0)
3404 int trace_array_printk(struct trace_array *tr,
3405                        unsigned long ip, const char *fmt, ...)
3406 {
3407         int ret;
3408         va_list ap;
3409
3410         if (!tr)
3411                 return -ENOENT;
3412
3413         /* This is only allowed for created instances */
3414         if (tr == &global_trace)
3415                 return 0;
3416
3417         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3418                 return 0;
3419
3420         va_start(ap, fmt);
3421         ret = trace_array_vprintk(tr, ip, fmt, ap);
3422         va_end(ap);
3423         return ret;
3424 }
3425 EXPORT_SYMBOL_GPL(trace_array_printk);
3426
3427 /**
3428  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3429  * @tr: The trace array to initialize the buffers for
3430  *
3431  * As trace_array_printk() only writes into instances, they are OK to
3432  * have in the kernel (unlike trace_printk()). This needs to be called
3433  * before trace_array_printk() can be used on a trace_array.
3434  */
3435 int trace_array_init_printk(struct trace_array *tr)
3436 {
3437         if (!tr)
3438                 return -ENOENT;
3439
3440         /* This is only allowed for created instances */
3441         if (tr == &global_trace)
3442                 return -EINVAL;
3443
3444         return alloc_percpu_trace_buffer();
3445 }
3446 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3447
3448 __printf(3, 4)
3449 int trace_array_printk_buf(struct trace_buffer *buffer,
3450                            unsigned long ip, const char *fmt, ...)
3451 {
3452         int ret;
3453         va_list ap;
3454
3455         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3456                 return 0;
3457
3458         va_start(ap, fmt);
3459         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3460         va_end(ap);
3461         return ret;
3462 }
3463
3464 __printf(2, 0)
3465 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3466 {
3467         return trace_array_vprintk(&global_trace, ip, fmt, args);
3468 }
3469 EXPORT_SYMBOL_GPL(trace_vprintk);
3470
3471 static void trace_iterator_increment(struct trace_iterator *iter)
3472 {
3473         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3474
3475         iter->idx++;
3476         if (buf_iter)
3477                 ring_buffer_iter_advance(buf_iter);
3478 }
3479
3480 static struct trace_entry *
3481 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3482                 unsigned long *lost_events)
3483 {
3484         struct ring_buffer_event *event;
3485         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3486
3487         if (buf_iter) {
3488                 event = ring_buffer_iter_peek(buf_iter, ts);
3489                 if (lost_events)
3490                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3491                                 (unsigned long)-1 : 0;
3492         } else {
3493                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3494                                          lost_events);
3495         }
3496
3497         if (event) {
3498                 iter->ent_size = ring_buffer_event_length(event);
3499                 return ring_buffer_event_data(event);
3500         }
3501         iter->ent_size = 0;
3502         return NULL;
3503 }
3504
3505 static struct trace_entry *
3506 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3507                   unsigned long *missing_events, u64 *ent_ts)
3508 {
3509         struct trace_buffer *buffer = iter->array_buffer->buffer;
3510         struct trace_entry *ent, *next = NULL;
3511         unsigned long lost_events = 0, next_lost = 0;
3512         int cpu_file = iter->cpu_file;
3513         u64 next_ts = 0, ts;
3514         int next_cpu = -1;
3515         int next_size = 0;
3516         int cpu;
3517
3518         /*
3519          * If we are in a per_cpu trace file, don't bother by iterating over
3520          * all cpu and peek directly.
3521          */
3522         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3523                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3524                         return NULL;
3525                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3526                 if (ent_cpu)
3527                         *ent_cpu = cpu_file;
3528
3529                 return ent;
3530         }
3531
3532         for_each_tracing_cpu(cpu) {
3533
3534                 if (ring_buffer_empty_cpu(buffer, cpu))
3535                         continue;
3536
3537                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3538
3539                 /*
3540                  * Pick the entry with the smallest timestamp:
3541                  */
3542                 if (ent && (!next || ts < next_ts)) {
3543                         next = ent;
3544                         next_cpu = cpu;
3545                         next_ts = ts;
3546                         next_lost = lost_events;
3547                         next_size = iter->ent_size;
3548                 }
3549         }
3550
3551         iter->ent_size = next_size;
3552
3553         if (ent_cpu)
3554                 *ent_cpu = next_cpu;
3555
3556         if (ent_ts)
3557                 *ent_ts = next_ts;
3558
3559         if (missing_events)
3560                 *missing_events = next_lost;
3561
3562         return next;
3563 }
3564
3565 #define STATIC_FMT_BUF_SIZE     128
3566 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3567
3568 static char *trace_iter_expand_format(struct trace_iterator *iter)
3569 {
3570         char *tmp;
3571
3572         /*
3573          * iter->tr is NULL when used with tp_printk, which makes
3574          * this get called where it is not safe to call krealloc().
3575          */
3576         if (!iter->tr || iter->fmt == static_fmt_buf)
3577                 return NULL;
3578
3579         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3580                        GFP_KERNEL);
3581         if (tmp) {
3582                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3583                 iter->fmt = tmp;
3584         }
3585
3586         return tmp;
3587 }
3588
3589 /* Returns true if the string is safe to dereference from an event */
3590 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3591 {
3592         unsigned long addr = (unsigned long)str;
3593         struct trace_event *trace_event;
3594         struct trace_event_call *event;
3595
3596         /* OK if part of the event data */
3597         if ((addr >= (unsigned long)iter->ent) &&
3598             (addr < (unsigned long)iter->ent + iter->ent_size))
3599                 return true;
3600
3601         /* OK if part of the temp seq buffer */
3602         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3603             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3604                 return true;
3605
3606         /* Core rodata can not be freed */
3607         if (is_kernel_rodata(addr))
3608                 return true;
3609
3610         if (trace_is_tracepoint_string(str))
3611                 return true;
3612
3613         /*
3614          * Now this could be a module event, referencing core module
3615          * data, which is OK.
3616          */
3617         if (!iter->ent)
3618                 return false;
3619
3620         trace_event = ftrace_find_event(iter->ent->type);
3621         if (!trace_event)
3622                 return false;
3623
3624         event = container_of(trace_event, struct trace_event_call, event);
3625         if (!event->mod)
3626                 return false;
3627
3628         /* Would rather have rodata, but this will suffice */
3629         if (within_module_core(addr, event->mod))
3630                 return true;
3631
3632         return false;
3633 }
3634
3635 static const char *show_buffer(struct trace_seq *s)
3636 {
3637         struct seq_buf *seq = &s->seq;
3638
3639         seq_buf_terminate(seq);
3640
3641         return seq->buffer;
3642 }
3643
3644 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3645
3646 static int test_can_verify_check(const char *fmt, ...)
3647 {
3648         char buf[16];
3649         va_list ap;
3650         int ret;
3651
3652         /*
3653          * The verifier is dependent on vsnprintf() modifies the va_list
3654          * passed to it, where it is sent as a reference. Some architectures
3655          * (like x86_32) passes it by value, which means that vsnprintf()
3656          * does not modify the va_list passed to it, and the verifier
3657          * would then need to be able to understand all the values that
3658          * vsnprintf can use. If it is passed by value, then the verifier
3659          * is disabled.
3660          */
3661         va_start(ap, fmt);
3662         vsnprintf(buf, 16, "%d", ap);
3663         ret = va_arg(ap, int);
3664         va_end(ap);
3665
3666         return ret;
3667 }
3668
3669 static void test_can_verify(void)
3670 {
3671         if (!test_can_verify_check("%d %d", 0, 1)) {
3672                 pr_info("trace event string verifier disabled\n");
3673                 static_branch_inc(&trace_no_verify);
3674         }
3675 }
3676
3677 /**
3678  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3679  * @iter: The iterator that holds the seq buffer and the event being printed
3680  * @fmt: The format used to print the event
3681  * @ap: The va_list holding the data to print from @fmt.
3682  *
3683  * This writes the data into the @iter->seq buffer using the data from
3684  * @fmt and @ap. If the format has a %s, then the source of the string
3685  * is examined to make sure it is safe to print, otherwise it will
3686  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3687  * pointer.
3688  */
3689 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3690                          va_list ap)
3691 {
3692         const char *p = fmt;
3693         const char *str;
3694         int i, j;
3695
3696         if (WARN_ON_ONCE(!fmt))
3697                 return;
3698
3699         if (static_branch_unlikely(&trace_no_verify))
3700                 goto print;
3701
3702         /* Don't bother checking when doing a ftrace_dump() */
3703         if (iter->fmt == static_fmt_buf)
3704                 goto print;
3705
3706         while (*p) {
3707                 j = 0;
3708
3709                 /* We only care about %s and variants */
3710                 for (i = 0; p[i]; i++) {
3711                         if (i + 1 >= iter->fmt_size) {
3712                                 /*
3713                                  * If we can't expand the copy buffer,
3714                                  * just print it.
3715                                  */
3716                                 if (!trace_iter_expand_format(iter))
3717                                         goto print;
3718                         }
3719
3720                         if (p[i] == '\\' && p[i+1]) {
3721                                 i++;
3722                                 continue;
3723                         }
3724                         if (p[i] == '%') {
3725                                 /* Need to test cases like %08.*s */
3726                                 for (j = 1; p[i+j]; j++) {
3727                                         if (isdigit(p[i+j]) ||
3728                                             p[i+j] == '*' ||
3729                                             p[i+j] == '.')
3730                                                 continue;
3731                                         break;
3732                                 }
3733                                 if (p[i+j] == 's')
3734                                         break;
3735                         }
3736                         j = 0;
3737                 }
3738                 /* If no %s found then just print normally */
3739                 if (!p[i])
3740                         break;
3741
3742                 /* Copy up to the %s, and print that */
3743                 strncpy(iter->fmt, p, i);
3744                 iter->fmt[i] = '\0';
3745                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3746
3747                 /* The ap now points to the string data of the %s */
3748                 str = va_arg(ap, const char *);
3749
3750                 /*
3751                  * If you hit this warning, it is likely that the
3752                  * trace event in question used %s on a string that
3753                  * was saved at the time of the event, but may not be
3754                  * around when the trace is read. Use __string(),
3755                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3756                  * instead. See samples/trace_events/trace-events-sample.h
3757                  * for reference.
3758                  */
3759                 if (WARN_ONCE(!trace_safe_str(iter, str),
3760                               "fmt: '%s' current_buffer: '%s'",
3761                               fmt, show_buffer(&iter->seq))) {
3762                         int ret;
3763
3764                         /* Try to safely read the string */
3765                         ret = strncpy_from_kernel_nofault(iter->fmt, str,
3766                                                           iter->fmt_size);
3767                         if (ret < 0)
3768                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3769                         else
3770                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3771                                                  str, iter->fmt);
3772                         str = "[UNSAFE-MEMORY]";
3773                         strcpy(iter->fmt, "%s");
3774                 } else {
3775                         strncpy(iter->fmt, p + i, j + 1);
3776                         iter->fmt[j+1] = '\0';
3777                 }
3778                 trace_seq_printf(&iter->seq, iter->fmt, str);
3779
3780                 p += i + j + 1;
3781         }
3782  print:
3783         if (*p)
3784                 trace_seq_vprintf(&iter->seq, p, ap);
3785 }
3786
3787 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3788 {
3789         const char *p, *new_fmt;
3790         char *q;
3791
3792         if (WARN_ON_ONCE(!fmt))
3793                 return fmt;
3794
3795         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3796                 return fmt;
3797
3798         p = fmt;
3799         new_fmt = q = iter->fmt;
3800         while (*p) {
3801                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3802                         if (!trace_iter_expand_format(iter))
3803                                 return fmt;
3804
3805                         q += iter->fmt - new_fmt;
3806                         new_fmt = iter->fmt;
3807                 }
3808
3809                 *q++ = *p++;
3810
3811                 /* Replace %p with %px */
3812                 if (p[-1] == '%') {
3813                         if (p[0] == '%') {
3814                                 *q++ = *p++;
3815                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3816                                 *q++ = *p++;
3817                                 *q++ = 'x';
3818                         }
3819                 }
3820         }
3821         *q = '\0';
3822
3823         return new_fmt;
3824 }
3825
3826 #define STATIC_TEMP_BUF_SIZE    128
3827 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3828
3829 /* Find the next real entry, without updating the iterator itself */
3830 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3831                                           int *ent_cpu, u64 *ent_ts)
3832 {
3833         /* __find_next_entry will reset ent_size */
3834         int ent_size = iter->ent_size;
3835         struct trace_entry *entry;
3836
3837         /*
3838          * If called from ftrace_dump(), then the iter->temp buffer
3839          * will be the static_temp_buf and not created from kmalloc.
3840          * If the entry size is greater than the buffer, we can
3841          * not save it. Just return NULL in that case. This is only
3842          * used to add markers when two consecutive events' time
3843          * stamps have a large delta. See trace_print_lat_context()
3844          */
3845         if (iter->temp == static_temp_buf &&
3846             STATIC_TEMP_BUF_SIZE < ent_size)
3847                 return NULL;
3848
3849         /*
3850          * The __find_next_entry() may call peek_next_entry(), which may
3851          * call ring_buffer_peek() that may make the contents of iter->ent
3852          * undefined. Need to copy iter->ent now.
3853          */
3854         if (iter->ent && iter->ent != iter->temp) {
3855                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3856                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3857                         void *temp;
3858                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3859                         if (!temp)
3860                                 return NULL;
3861                         kfree(iter->temp);
3862                         iter->temp = temp;
3863                         iter->temp_size = iter->ent_size;
3864                 }
3865                 memcpy(iter->temp, iter->ent, iter->ent_size);
3866                 iter->ent = iter->temp;
3867         }
3868         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3869         /* Put back the original ent_size */
3870         iter->ent_size = ent_size;
3871
3872         return entry;
3873 }
3874
3875 /* Find the next real entry, and increment the iterator to the next entry */
3876 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3877 {
3878         iter->ent = __find_next_entry(iter, &iter->cpu,
3879                                       &iter->lost_events, &iter->ts);
3880
3881         if (iter->ent)
3882                 trace_iterator_increment(iter);
3883
3884         return iter->ent ? iter : NULL;
3885 }
3886
3887 static void trace_consume(struct trace_iterator *iter)
3888 {
3889         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3890                             &iter->lost_events);
3891 }
3892
3893 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3894 {
3895         struct trace_iterator *iter = m->private;
3896         int i = (int)*pos;
3897         void *ent;
3898
3899         WARN_ON_ONCE(iter->leftover);
3900
3901         (*pos)++;
3902
3903         /* can't go backwards */
3904         if (iter->idx > i)
3905                 return NULL;
3906
3907         if (iter->idx < 0)
3908                 ent = trace_find_next_entry_inc(iter);
3909         else
3910                 ent = iter;
3911
3912         while (ent && iter->idx < i)
3913                 ent = trace_find_next_entry_inc(iter);
3914
3915         iter->pos = *pos;
3916
3917         return ent;
3918 }
3919
3920 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3921 {
3922         struct ring_buffer_iter *buf_iter;
3923         unsigned long entries = 0;
3924         u64 ts;
3925
3926         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3927
3928         buf_iter = trace_buffer_iter(iter, cpu);
3929         if (!buf_iter)
3930                 return;
3931
3932         ring_buffer_iter_reset(buf_iter);
3933
3934         /*
3935          * We could have the case with the max latency tracers
3936          * that a reset never took place on a cpu. This is evident
3937          * by the timestamp being before the start of the buffer.
3938          */
3939         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3940                 if (ts >= iter->array_buffer->time_start)
3941                         break;
3942                 entries++;
3943                 ring_buffer_iter_advance(buf_iter);
3944         }
3945
3946         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3947 }
3948
3949 /*
3950  * The current tracer is copied to avoid a global locking
3951  * all around.
3952  */
3953 static void *s_start(struct seq_file *m, loff_t *pos)
3954 {
3955         struct trace_iterator *iter = m->private;
3956         struct trace_array *tr = iter->tr;
3957         int cpu_file = iter->cpu_file;
3958         void *p = NULL;
3959         loff_t l = 0;
3960         int cpu;
3961
3962         /*
3963          * copy the tracer to avoid using a global lock all around.
3964          * iter->trace is a copy of current_trace, the pointer to the
3965          * name may be used instead of a strcmp(), as iter->trace->name
3966          * will point to the same string as current_trace->name.
3967          */
3968         mutex_lock(&trace_types_lock);
3969         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3970                 *iter->trace = *tr->current_trace;
3971         mutex_unlock(&trace_types_lock);
3972
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974         if (iter->snapshot && iter->trace->use_max_tr)
3975                 return ERR_PTR(-EBUSY);
3976 #endif
3977
3978         if (!iter->snapshot)
3979                 atomic_inc(&trace_record_taskinfo_disabled);
3980
3981         if (*pos != iter->pos) {
3982                 iter->ent = NULL;
3983                 iter->cpu = 0;
3984                 iter->idx = -1;
3985
3986                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3987                         for_each_tracing_cpu(cpu)
3988                                 tracing_iter_reset(iter, cpu);
3989                 } else
3990                         tracing_iter_reset(iter, cpu_file);
3991
3992                 iter->leftover = 0;
3993                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3994                         ;
3995
3996         } else {
3997                 /*
3998                  * If we overflowed the seq_file before, then we want
3999                  * to just reuse the trace_seq buffer again.
4000                  */
4001                 if (iter->leftover)
4002                         p = iter;
4003                 else {
4004                         l = *pos - 1;
4005                         p = s_next(m, p, &l);
4006                 }
4007         }
4008
4009         trace_event_read_lock();
4010         trace_access_lock(cpu_file);
4011         return p;
4012 }
4013
4014 static void s_stop(struct seq_file *m, void *p)
4015 {
4016         struct trace_iterator *iter = m->private;
4017
4018 #ifdef CONFIG_TRACER_MAX_TRACE
4019         if (iter->snapshot && iter->trace->use_max_tr)
4020                 return;
4021 #endif
4022
4023         if (!iter->snapshot)
4024                 atomic_dec(&trace_record_taskinfo_disabled);
4025
4026         trace_access_unlock(iter->cpu_file);
4027         trace_event_read_unlock();
4028 }
4029
4030 static void
4031 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4032                       unsigned long *entries, int cpu)
4033 {
4034         unsigned long count;
4035
4036         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4037         /*
4038          * If this buffer has skipped entries, then we hold all
4039          * entries for the trace and we need to ignore the
4040          * ones before the time stamp.
4041          */
4042         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4043                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4044                 /* total is the same as the entries */
4045                 *total = count;
4046         } else
4047                 *total = count +
4048                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4049         *entries = count;
4050 }
4051
4052 static void
4053 get_total_entries(struct array_buffer *buf,
4054                   unsigned long *total, unsigned long *entries)
4055 {
4056         unsigned long t, e;
4057         int cpu;
4058
4059         *total = 0;
4060         *entries = 0;
4061
4062         for_each_tracing_cpu(cpu) {
4063                 get_total_entries_cpu(buf, &t, &e, cpu);
4064                 *total += t;
4065                 *entries += e;
4066         }
4067 }
4068
4069 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4070 {
4071         unsigned long total, entries;
4072
4073         if (!tr)
4074                 tr = &global_trace;
4075
4076         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4077
4078         return entries;
4079 }
4080
4081 unsigned long trace_total_entries(struct trace_array *tr)
4082 {
4083         unsigned long total, entries;
4084
4085         if (!tr)
4086                 tr = &global_trace;
4087
4088         get_total_entries(&tr->array_buffer, &total, &entries);
4089
4090         return entries;
4091 }
4092
4093 static void print_lat_help_header(struct seq_file *m)
4094 {
4095         seq_puts(m, "#                    _------=> CPU#            \n"
4096                     "#                   / _-----=> irqs-off        \n"
4097                     "#                  | / _----=> need-resched    \n"
4098                     "#                  || / _---=> hardirq/softirq \n"
4099                     "#                  ||| / _--=> preempt-depth   \n"
4100                     "#                  |||| /     delay            \n"
4101                     "#  cmd     pid     ||||| time  |   caller      \n"
4102                     "#     \\   /        |||||  \\    |   /         \n");
4103 }
4104
4105 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4106 {
4107         unsigned long total;
4108         unsigned long entries;
4109
4110         get_total_entries(buf, &total, &entries);
4111         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4112                    entries, total, num_online_cpus());
4113         seq_puts(m, "#\n");
4114 }
4115
4116 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4117                                    unsigned int flags)
4118 {
4119         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4120
4121         print_event_info(buf, m);
4122
4123         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4124         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4125 }
4126
4127 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4128                                        unsigned int flags)
4129 {
4130         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4131         const char *space = "            ";
4132         int prec = tgid ? 12 : 2;
4133
4134         print_event_info(buf, m);
4135
4136         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4137         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4138         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4139         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4140         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4141         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4142         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4143 }
4144
4145 void
4146 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4147 {
4148         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4149         struct array_buffer *buf = iter->array_buffer;
4150         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4151         struct tracer *type = iter->trace;
4152         unsigned long entries;
4153         unsigned long total;
4154         const char *name = "preemption";
4155
4156         name = type->name;
4157
4158         get_total_entries(buf, &total, &entries);
4159
4160         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4161                    name, UTS_RELEASE);
4162         seq_puts(m, "# -----------------------------------"
4163                  "---------------------------------\n");
4164         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4165                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4166                    nsecs_to_usecs(data->saved_latency),
4167                    entries,
4168                    total,
4169                    buf->cpu,
4170 #if defined(CONFIG_PREEMPT_NONE)
4171                    "server",
4172 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4173                    "desktop",
4174 #elif defined(CONFIG_PREEMPT)
4175                    "preempt",
4176 #elif defined(CONFIG_PREEMPT_RT)
4177                    "preempt_rt",
4178 #else
4179                    "unknown",
4180 #endif
4181                    /* These are reserved for later use */
4182                    0, 0, 0, 0);
4183 #ifdef CONFIG_SMP
4184         seq_printf(m, " #P:%d)\n", num_online_cpus());
4185 #else
4186         seq_puts(m, ")\n");
4187 #endif
4188         seq_puts(m, "#    -----------------\n");
4189         seq_printf(m, "#    | task: %.16s-%d "
4190                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4191                    data->comm, data->pid,
4192                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4193                    data->policy, data->rt_priority);
4194         seq_puts(m, "#    -----------------\n");
4195
4196         if (data->critical_start) {
4197                 seq_puts(m, "#  => started at: ");
4198                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4199                 trace_print_seq(m, &iter->seq);
4200                 seq_puts(m, "\n#  => ended at:   ");
4201                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4202                 trace_print_seq(m, &iter->seq);
4203                 seq_puts(m, "\n#\n");
4204         }
4205
4206         seq_puts(m, "#\n");
4207 }
4208
4209 static void test_cpu_buff_start(struct trace_iterator *iter)
4210 {
4211         struct trace_seq *s = &iter->seq;
4212         struct trace_array *tr = iter->tr;
4213
4214         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4215                 return;
4216
4217         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4218                 return;
4219
4220         if (cpumask_available(iter->started) &&
4221             cpumask_test_cpu(iter->cpu, iter->started))
4222                 return;
4223
4224         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4225                 return;
4226
4227         if (cpumask_available(iter->started))
4228                 cpumask_set_cpu(iter->cpu, iter->started);
4229
4230         /* Don't print started cpu buffer for the first entry of the trace */
4231         if (iter->idx > 1)
4232                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4233                                 iter->cpu);
4234 }
4235
4236 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4237 {
4238         struct trace_array *tr = iter->tr;
4239         struct trace_seq *s = &iter->seq;
4240         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4241         struct trace_entry *entry;
4242         struct trace_event *event;
4243
4244         entry = iter->ent;
4245
4246         test_cpu_buff_start(iter);
4247
4248         event = ftrace_find_event(entry->type);
4249
4250         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4251                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4252                         trace_print_lat_context(iter);
4253                 else
4254                         trace_print_context(iter);
4255         }
4256
4257         if (trace_seq_has_overflowed(s))
4258                 return TRACE_TYPE_PARTIAL_LINE;
4259
4260         if (event)
4261                 return event->funcs->trace(iter, sym_flags, event);
4262
4263         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4264
4265         return trace_handle_return(s);
4266 }
4267
4268 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4269 {
4270         struct trace_array *tr = iter->tr;
4271         struct trace_seq *s = &iter->seq;
4272         struct trace_entry *entry;
4273         struct trace_event *event;
4274
4275         entry = iter->ent;
4276
4277         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4278                 trace_seq_printf(s, "%d %d %llu ",
4279                                  entry->pid, iter->cpu, iter->ts);
4280
4281         if (trace_seq_has_overflowed(s))
4282                 return TRACE_TYPE_PARTIAL_LINE;
4283
4284         event = ftrace_find_event(entry->type);
4285         if (event)
4286                 return event->funcs->raw(iter, 0, event);
4287
4288         trace_seq_printf(s, "%d ?\n", entry->type);
4289
4290         return trace_handle_return(s);
4291 }
4292
4293 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4294 {
4295         struct trace_array *tr = iter->tr;
4296         struct trace_seq *s = &iter->seq;
4297         unsigned char newline = '\n';
4298         struct trace_entry *entry;
4299         struct trace_event *event;
4300
4301         entry = iter->ent;
4302
4303         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4304                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4305                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4306                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4307                 if (trace_seq_has_overflowed(s))
4308                         return TRACE_TYPE_PARTIAL_LINE;
4309         }
4310
4311         event = ftrace_find_event(entry->type);
4312         if (event) {
4313                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4314                 if (ret != TRACE_TYPE_HANDLED)
4315                         return ret;
4316         }
4317
4318         SEQ_PUT_FIELD(s, newline);
4319
4320         return trace_handle_return(s);
4321 }
4322
4323 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4324 {
4325         struct trace_array *tr = iter->tr;
4326         struct trace_seq *s = &iter->seq;
4327         struct trace_entry *entry;
4328         struct trace_event *event;
4329
4330         entry = iter->ent;
4331
4332         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4333                 SEQ_PUT_FIELD(s, entry->pid);
4334                 SEQ_PUT_FIELD(s, iter->cpu);
4335                 SEQ_PUT_FIELD(s, iter->ts);
4336                 if (trace_seq_has_overflowed(s))
4337                         return TRACE_TYPE_PARTIAL_LINE;
4338         }
4339
4340         event = ftrace_find_event(entry->type);
4341         return event ? event->funcs->binary(iter, 0, event) :
4342                 TRACE_TYPE_HANDLED;
4343 }
4344
4345 int trace_empty(struct trace_iterator *iter)
4346 {
4347         struct ring_buffer_iter *buf_iter;
4348         int cpu;
4349
4350         /* If we are looking at one CPU buffer, only check that one */
4351         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4352                 cpu = iter->cpu_file;
4353                 buf_iter = trace_buffer_iter(iter, cpu);
4354                 if (buf_iter) {
4355                         if (!ring_buffer_iter_empty(buf_iter))
4356                                 return 0;
4357                 } else {
4358                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4359                                 return 0;
4360                 }
4361                 return 1;
4362         }
4363
4364         for_each_tracing_cpu(cpu) {
4365                 buf_iter = trace_buffer_iter(iter, cpu);
4366                 if (buf_iter) {
4367                         if (!ring_buffer_iter_empty(buf_iter))
4368                                 return 0;
4369                 } else {
4370                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4371                                 return 0;
4372                 }
4373         }
4374
4375         return 1;
4376 }
4377
4378 /*  Called with trace_event_read_lock() held. */
4379 enum print_line_t print_trace_line(struct trace_iterator *iter)
4380 {
4381         struct trace_array *tr = iter->tr;
4382         unsigned long trace_flags = tr->trace_flags;
4383         enum print_line_t ret;
4384
4385         if (iter->lost_events) {
4386                 if (iter->lost_events == (unsigned long)-1)
4387                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4388                                          iter->cpu);
4389                 else
4390                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4391                                          iter->cpu, iter->lost_events);
4392                 if (trace_seq_has_overflowed(&iter->seq))
4393                         return TRACE_TYPE_PARTIAL_LINE;
4394         }
4395
4396         if (iter->trace && iter->trace->print_line) {
4397                 ret = iter->trace->print_line(iter);
4398                 if (ret != TRACE_TYPE_UNHANDLED)
4399                         return ret;
4400         }
4401
4402         if (iter->ent->type == TRACE_BPUTS &&
4403                         trace_flags & TRACE_ITER_PRINTK &&
4404                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4405                 return trace_print_bputs_msg_only(iter);
4406
4407         if (iter->ent->type == TRACE_BPRINT &&
4408                         trace_flags & TRACE_ITER_PRINTK &&
4409                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4410                 return trace_print_bprintk_msg_only(iter);
4411
4412         if (iter->ent->type == TRACE_PRINT &&
4413                         trace_flags & TRACE_ITER_PRINTK &&
4414                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4415                 return trace_print_printk_msg_only(iter);
4416
4417         if (trace_flags & TRACE_ITER_BIN)
4418                 return print_bin_fmt(iter);
4419
4420         if (trace_flags & TRACE_ITER_HEX)
4421                 return print_hex_fmt(iter);
4422
4423         if (trace_flags & TRACE_ITER_RAW)
4424                 return print_raw_fmt(iter);
4425
4426         return print_trace_fmt(iter);
4427 }
4428
4429 void trace_latency_header(struct seq_file *m)
4430 {
4431         struct trace_iterator *iter = m->private;
4432         struct trace_array *tr = iter->tr;
4433
4434         /* print nothing if the buffers are empty */
4435         if (trace_empty(iter))
4436                 return;
4437
4438         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4439                 print_trace_header(m, iter);
4440
4441         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4442                 print_lat_help_header(m);
4443 }
4444
4445 void trace_default_header(struct seq_file *m)
4446 {
4447         struct trace_iterator *iter = m->private;
4448         struct trace_array *tr = iter->tr;
4449         unsigned long trace_flags = tr->trace_flags;
4450
4451         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4452                 return;
4453
4454         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4455                 /* print nothing if the buffers are empty */
4456                 if (trace_empty(iter))
4457                         return;
4458                 print_trace_header(m, iter);
4459                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4460                         print_lat_help_header(m);
4461         } else {
4462                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4463                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4464                                 print_func_help_header_irq(iter->array_buffer,
4465                                                            m, trace_flags);
4466                         else
4467                                 print_func_help_header(iter->array_buffer, m,
4468                                                        trace_flags);
4469                 }
4470         }
4471 }
4472
4473 static void test_ftrace_alive(struct seq_file *m)
4474 {
4475         if (!ftrace_is_dead())
4476                 return;
4477         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4478                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4479 }
4480
4481 #ifdef CONFIG_TRACER_MAX_TRACE
4482 static void show_snapshot_main_help(struct seq_file *m)
4483 {
4484         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4485                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4486                     "#                      Takes a snapshot of the main buffer.\n"
4487                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4488                     "#                      (Doesn't have to be '2' works with any number that\n"
4489                     "#                       is not a '0' or '1')\n");
4490 }
4491
4492 static void show_snapshot_percpu_help(struct seq_file *m)
4493 {
4494         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4495 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4496         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4497                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4498 #else
4499         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4500                     "#                     Must use main snapshot file to allocate.\n");
4501 #endif
4502         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4503                     "#                      (Doesn't have to be '2' works with any number that\n"
4504                     "#                       is not a '0' or '1')\n");
4505 }
4506
4507 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4508 {
4509         if (iter->tr->allocated_snapshot)
4510                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4511         else
4512                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4513
4514         seq_puts(m, "# Snapshot commands:\n");
4515         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4516                 show_snapshot_main_help(m);
4517         else
4518                 show_snapshot_percpu_help(m);
4519 }
4520 #else
4521 /* Should never be called */
4522 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4523 #endif
4524
4525 static int s_show(struct seq_file *m, void *v)
4526 {
4527         struct trace_iterator *iter = v;
4528         int ret;
4529
4530         if (iter->ent == NULL) {
4531                 if (iter->tr) {
4532                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4533                         seq_puts(m, "#\n");
4534                         test_ftrace_alive(m);
4535                 }
4536                 if (iter->snapshot && trace_empty(iter))
4537                         print_snapshot_help(m, iter);
4538                 else if (iter->trace && iter->trace->print_header)
4539                         iter->trace->print_header(m);
4540                 else
4541                         trace_default_header(m);
4542
4543         } else if (iter->leftover) {
4544                 /*
4545                  * If we filled the seq_file buffer earlier, we
4546                  * want to just show it now.
4547                  */
4548                 ret = trace_print_seq(m, &iter->seq);
4549
4550                 /* ret should this time be zero, but you never know */
4551                 iter->leftover = ret;
4552
4553         } else {
4554                 print_trace_line(iter);
4555                 ret = trace_print_seq(m, &iter->seq);
4556                 /*
4557                  * If we overflow the seq_file buffer, then it will
4558                  * ask us for this data again at start up.
4559                  * Use that instead.
4560                  *  ret is 0 if seq_file write succeeded.
4561                  *        -1 otherwise.
4562                  */
4563                 iter->leftover = ret;
4564         }
4565
4566         return 0;
4567 }
4568
4569 /*
4570  * Should be used after trace_array_get(), trace_types_lock
4571  * ensures that i_cdev was already initialized.
4572  */
4573 static inline int tracing_get_cpu(struct inode *inode)
4574 {
4575         if (inode->i_cdev) /* See trace_create_cpu_file() */
4576                 return (long)inode->i_cdev - 1;
4577         return RING_BUFFER_ALL_CPUS;
4578 }
4579
4580 static const struct seq_operations tracer_seq_ops = {
4581         .start          = s_start,
4582         .next           = s_next,
4583         .stop           = s_stop,
4584         .show           = s_show,
4585 };
4586
4587 static struct trace_iterator *
4588 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4589 {
4590         struct trace_array *tr = inode->i_private;
4591         struct trace_iterator *iter;
4592         int cpu;
4593
4594         if (tracing_disabled)
4595                 return ERR_PTR(-ENODEV);
4596
4597         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4598         if (!iter)
4599                 return ERR_PTR(-ENOMEM);
4600
4601         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4602                                     GFP_KERNEL);
4603         if (!iter->buffer_iter)
4604                 goto release;
4605
4606         /*
4607          * trace_find_next_entry() may need to save off iter->ent.
4608          * It will place it into the iter->temp buffer. As most
4609          * events are less than 128, allocate a buffer of that size.
4610          * If one is greater, then trace_find_next_entry() will
4611          * allocate a new buffer to adjust for the bigger iter->ent.
4612          * It's not critical if it fails to get allocated here.
4613          */
4614         iter->temp = kmalloc(128, GFP_KERNEL);
4615         if (iter->temp)
4616                 iter->temp_size = 128;
4617
4618         /*
4619          * trace_event_printf() may need to modify given format
4620          * string to replace %p with %px so that it shows real address
4621          * instead of hash value. However, that is only for the event
4622          * tracing, other tracer may not need. Defer the allocation
4623          * until it is needed.
4624          */
4625         iter->fmt = NULL;
4626         iter->fmt_size = 0;
4627
4628         /*
4629          * We make a copy of the current tracer to avoid concurrent
4630          * changes on it while we are reading.
4631          */
4632         mutex_lock(&trace_types_lock);
4633         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4634         if (!iter->trace)
4635                 goto fail;
4636
4637         *iter->trace = *tr->current_trace;
4638
4639         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4640                 goto fail;
4641
4642         iter->tr = tr;
4643
4644 #ifdef CONFIG_TRACER_MAX_TRACE
4645         /* Currently only the top directory has a snapshot */
4646         if (tr->current_trace->print_max || snapshot)
4647                 iter->array_buffer = &tr->max_buffer;
4648         else
4649 #endif
4650                 iter->array_buffer = &tr->array_buffer;
4651         iter->snapshot = snapshot;
4652         iter->pos = -1;
4653         iter->cpu_file = tracing_get_cpu(inode);
4654         mutex_init(&iter->mutex);
4655
4656         /* Notify the tracer early; before we stop tracing. */
4657         if (iter->trace->open)
4658                 iter->trace->open(iter);
4659
4660         /* Annotate start of buffers if we had overruns */
4661         if (ring_buffer_overruns(iter->array_buffer->buffer))
4662                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4663
4664         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4665         if (trace_clocks[tr->clock_id].in_ns)
4666                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4667
4668         /*
4669          * If pause-on-trace is enabled, then stop the trace while
4670          * dumping, unless this is the "snapshot" file
4671          */
4672         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4673                 tracing_stop_tr(tr);
4674
4675         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4676                 for_each_tracing_cpu(cpu) {
4677                         iter->buffer_iter[cpu] =
4678                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4679                                                          cpu, GFP_KERNEL);
4680                 }
4681                 ring_buffer_read_prepare_sync();
4682                 for_each_tracing_cpu(cpu) {
4683                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4684                         tracing_iter_reset(iter, cpu);
4685                 }
4686         } else {
4687                 cpu = iter->cpu_file;
4688                 iter->buffer_iter[cpu] =
4689                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4690                                                  cpu, GFP_KERNEL);
4691                 ring_buffer_read_prepare_sync();
4692                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4693                 tracing_iter_reset(iter, cpu);
4694         }
4695
4696         mutex_unlock(&trace_types_lock);
4697
4698         return iter;
4699
4700  fail:
4701         mutex_unlock(&trace_types_lock);
4702         kfree(iter->trace);
4703         kfree(iter->temp);
4704         kfree(iter->buffer_iter);
4705 release:
4706         seq_release_private(inode, file);
4707         return ERR_PTR(-ENOMEM);
4708 }
4709
4710 int tracing_open_generic(struct inode *inode, struct file *filp)
4711 {
4712         int ret;
4713
4714         ret = tracing_check_open_get_tr(NULL);
4715         if (ret)
4716                 return ret;
4717
4718         filp->private_data = inode->i_private;
4719         return 0;
4720 }
4721
4722 bool tracing_is_disabled(void)
4723 {
4724         return (tracing_disabled) ? true: false;
4725 }
4726
4727 /*
4728  * Open and update trace_array ref count.
4729  * Must have the current trace_array passed to it.
4730  */
4731 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4732 {
4733         struct trace_array *tr = inode->i_private;
4734         int ret;
4735
4736         ret = tracing_check_open_get_tr(tr);
4737         if (ret)
4738                 return ret;
4739
4740         filp->private_data = inode->i_private;
4741
4742         return 0;
4743 }
4744
4745 static int tracing_release(struct inode *inode, struct file *file)
4746 {
4747         struct trace_array *tr = inode->i_private;
4748         struct seq_file *m = file->private_data;
4749         struct trace_iterator *iter;
4750         int cpu;
4751
4752         if (!(file->f_mode & FMODE_READ)) {
4753                 trace_array_put(tr);
4754                 return 0;
4755         }
4756
4757         /* Writes do not use seq_file */
4758         iter = m->private;
4759         mutex_lock(&trace_types_lock);
4760
4761         for_each_tracing_cpu(cpu) {
4762                 if (iter->buffer_iter[cpu])
4763                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4764         }
4765
4766         if (iter->trace && iter->trace->close)
4767                 iter->trace->close(iter);
4768
4769         if (!iter->snapshot && tr->stop_count)
4770                 /* reenable tracing if it was previously enabled */
4771                 tracing_start_tr(tr);
4772
4773         __trace_array_put(tr);
4774
4775         mutex_unlock(&trace_types_lock);
4776
4777         mutex_destroy(&iter->mutex);
4778         free_cpumask_var(iter->started);
4779         kfree(iter->fmt);
4780         kfree(iter->temp);
4781         kfree(iter->trace);
4782         kfree(iter->buffer_iter);
4783         seq_release_private(inode, file);
4784
4785         return 0;
4786 }
4787
4788 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4789 {
4790         struct trace_array *tr = inode->i_private;
4791
4792         trace_array_put(tr);
4793         return 0;
4794 }
4795
4796 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4797 {
4798         struct trace_array *tr = inode->i_private;
4799
4800         trace_array_put(tr);
4801
4802         return single_release(inode, file);
4803 }
4804
4805 static int tracing_open(struct inode *inode, struct file *file)
4806 {
4807         struct trace_array *tr = inode->i_private;
4808         struct trace_iterator *iter;
4809         int ret;
4810
4811         ret = tracing_check_open_get_tr(tr);
4812         if (ret)
4813                 return ret;
4814
4815         /* If this file was open for write, then erase contents */
4816         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4817                 int cpu = tracing_get_cpu(inode);
4818                 struct array_buffer *trace_buf = &tr->array_buffer;
4819
4820 #ifdef CONFIG_TRACER_MAX_TRACE
4821                 if (tr->current_trace->print_max)
4822                         trace_buf = &tr->max_buffer;
4823 #endif
4824
4825                 if (cpu == RING_BUFFER_ALL_CPUS)
4826                         tracing_reset_online_cpus(trace_buf);
4827                 else
4828                         tracing_reset_cpu(trace_buf, cpu);
4829         }
4830
4831         if (file->f_mode & FMODE_READ) {
4832                 iter = __tracing_open(inode, file, false);
4833                 if (IS_ERR(iter))
4834                         ret = PTR_ERR(iter);
4835                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4836                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4837         }
4838
4839         if (ret < 0)
4840                 trace_array_put(tr);
4841
4842         return ret;
4843 }
4844
4845 /*
4846  * Some tracers are not suitable for instance buffers.
4847  * A tracer is always available for the global array (toplevel)
4848  * or if it explicitly states that it is.
4849  */
4850 static bool
4851 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4852 {
4853         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4854 }
4855
4856 /* Find the next tracer that this trace array may use */
4857 static struct tracer *
4858 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4859 {
4860         while (t && !trace_ok_for_array(t, tr))
4861                 t = t->next;
4862
4863         return t;
4864 }
4865
4866 static void *
4867 t_next(struct seq_file *m, void *v, loff_t *pos)
4868 {
4869         struct trace_array *tr = m->private;
4870         struct tracer *t = v;
4871
4872         (*pos)++;
4873
4874         if (t)
4875                 t = get_tracer_for_array(tr, t->next);
4876
4877         return t;
4878 }
4879
4880 static void *t_start(struct seq_file *m, loff_t *pos)
4881 {
4882         struct trace_array *tr = m->private;
4883         struct tracer *t;
4884         loff_t l = 0;
4885
4886         mutex_lock(&trace_types_lock);
4887
4888         t = get_tracer_for_array(tr, trace_types);
4889         for (; t && l < *pos; t = t_next(m, t, &l))
4890                         ;
4891
4892         return t;
4893 }
4894
4895 static void t_stop(struct seq_file *m, void *p)
4896 {
4897         mutex_unlock(&trace_types_lock);
4898 }
4899
4900 static int t_show(struct seq_file *m, void *v)
4901 {
4902         struct tracer *t = v;
4903
4904         if (!t)
4905                 return 0;
4906
4907         seq_puts(m, t->name);
4908         if (t->next)
4909                 seq_putc(m, ' ');
4910         else
4911                 seq_putc(m, '\n');
4912
4913         return 0;
4914 }
4915
4916 static const struct seq_operations show_traces_seq_ops = {
4917         .start          = t_start,
4918         .next           = t_next,
4919         .stop           = t_stop,
4920         .show           = t_show,
4921 };
4922
4923 static int show_traces_open(struct inode *inode, struct file *file)
4924 {
4925         struct trace_array *tr = inode->i_private;
4926         struct seq_file *m;
4927         int ret;
4928
4929         ret = tracing_check_open_get_tr(tr);
4930         if (ret)
4931                 return ret;
4932
4933         ret = seq_open(file, &show_traces_seq_ops);
4934         if (ret) {
4935                 trace_array_put(tr);
4936                 return ret;
4937         }
4938
4939         m = file->private_data;
4940         m->private = tr;
4941
4942         return 0;
4943 }
4944
4945 static int show_traces_release(struct inode *inode, struct file *file)
4946 {
4947         struct trace_array *tr = inode->i_private;
4948
4949         trace_array_put(tr);
4950         return seq_release(inode, file);
4951 }
4952
4953 static ssize_t
4954 tracing_write_stub(struct file *filp, const char __user *ubuf,
4955                    size_t count, loff_t *ppos)
4956 {
4957         return count;
4958 }
4959
4960 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4961 {
4962         int ret;
4963
4964         if (file->f_mode & FMODE_READ)
4965                 ret = seq_lseek(file, offset, whence);
4966         else
4967                 file->f_pos = ret = 0;
4968
4969         return ret;
4970 }
4971
4972 static const struct file_operations tracing_fops = {
4973         .open           = tracing_open,
4974         .read           = seq_read,
4975         .write          = tracing_write_stub,
4976         .llseek         = tracing_lseek,
4977         .release        = tracing_release,
4978 };
4979
4980 static const struct file_operations show_traces_fops = {
4981         .open           = show_traces_open,
4982         .read           = seq_read,
4983         .llseek         = seq_lseek,
4984         .release        = show_traces_release,
4985 };
4986
4987 static ssize_t
4988 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4989                      size_t count, loff_t *ppos)
4990 {
4991         struct trace_array *tr = file_inode(filp)->i_private;
4992         char *mask_str;
4993         int len;
4994
4995         len = snprintf(NULL, 0, "%*pb\n",
4996                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4997         mask_str = kmalloc(len, GFP_KERNEL);
4998         if (!mask_str)
4999                 return -ENOMEM;
5000
5001         len = snprintf(mask_str, len, "%*pb\n",
5002                        cpumask_pr_args(tr->tracing_cpumask));
5003         if (len >= count) {
5004                 count = -EINVAL;
5005                 goto out_err;
5006         }
5007         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5008
5009 out_err:
5010         kfree(mask_str);
5011
5012         return count;
5013 }
5014
5015 int tracing_set_cpumask(struct trace_array *tr,
5016                         cpumask_var_t tracing_cpumask_new)
5017 {
5018         int cpu;
5019
5020         if (!tr)
5021                 return -EINVAL;
5022
5023         local_irq_disable();
5024         arch_spin_lock(&tr->max_lock);
5025         for_each_tracing_cpu(cpu) {
5026                 /*
5027                  * Increase/decrease the disabled counter if we are
5028                  * about to flip a bit in the cpumask:
5029                  */
5030                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5031                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5032                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5033                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5034                 }
5035                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5036                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5037                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5038                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5039                 }
5040         }
5041         arch_spin_unlock(&tr->max_lock);
5042         local_irq_enable();
5043
5044         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5045
5046         return 0;
5047 }
5048
5049 static ssize_t
5050 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5051                       size_t count, loff_t *ppos)
5052 {
5053         struct trace_array *tr = file_inode(filp)->i_private;
5054         cpumask_var_t tracing_cpumask_new;
5055         int err;
5056
5057         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5058                 return -ENOMEM;
5059
5060         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5061         if (err)
5062                 goto err_free;
5063
5064         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5065         if (err)
5066                 goto err_free;
5067
5068         free_cpumask_var(tracing_cpumask_new);
5069
5070         return count;
5071
5072 err_free:
5073         free_cpumask_var(tracing_cpumask_new);
5074
5075         return err;
5076 }
5077
5078 static const struct file_operations tracing_cpumask_fops = {
5079         .open           = tracing_open_generic_tr,
5080         .read           = tracing_cpumask_read,
5081         .write          = tracing_cpumask_write,
5082         .release        = tracing_release_generic_tr,
5083         .llseek         = generic_file_llseek,
5084 };
5085
5086 static int tracing_trace_options_show(struct seq_file *m, void *v)
5087 {
5088         struct tracer_opt *trace_opts;
5089         struct trace_array *tr = m->private;
5090         u32 tracer_flags;
5091         int i;
5092
5093         mutex_lock(&trace_types_lock);
5094         tracer_flags = tr->current_trace->flags->val;
5095         trace_opts = tr->current_trace->flags->opts;
5096
5097         for (i = 0; trace_options[i]; i++) {
5098                 if (tr->trace_flags & (1 << i))
5099                         seq_printf(m, "%s\n", trace_options[i]);
5100                 else
5101                         seq_printf(m, "no%s\n", trace_options[i]);
5102         }
5103
5104         for (i = 0; trace_opts[i].name; i++) {
5105                 if (tracer_flags & trace_opts[i].bit)
5106                         seq_printf(m, "%s\n", trace_opts[i].name);
5107                 else
5108                         seq_printf(m, "no%s\n", trace_opts[i].name);
5109         }
5110         mutex_unlock(&trace_types_lock);
5111
5112         return 0;
5113 }
5114
5115 static int __set_tracer_option(struct trace_array *tr,
5116                                struct tracer_flags *tracer_flags,
5117                                struct tracer_opt *opts, int neg)
5118 {
5119         struct tracer *trace = tracer_flags->trace;
5120         int ret;
5121
5122         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5123         if (ret)
5124                 return ret;
5125
5126         if (neg)
5127                 tracer_flags->val &= ~opts->bit;
5128         else
5129                 tracer_flags->val |= opts->bit;
5130         return 0;
5131 }
5132
5133 /* Try to assign a tracer specific option */
5134 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5135 {
5136         struct tracer *trace = tr->current_trace;
5137         struct tracer_flags *tracer_flags = trace->flags;
5138         struct tracer_opt *opts = NULL;
5139         int i;
5140
5141         for (i = 0; tracer_flags->opts[i].name; i++) {
5142                 opts = &tracer_flags->opts[i];
5143
5144                 if (strcmp(cmp, opts->name) == 0)
5145                         return __set_tracer_option(tr, trace->flags, opts, neg);
5146         }
5147
5148         return -EINVAL;
5149 }
5150
5151 /* Some tracers require overwrite to stay enabled */
5152 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5153 {
5154         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5155                 return -1;
5156
5157         return 0;
5158 }
5159
5160 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5161 {
5162         if ((mask == TRACE_ITER_RECORD_TGID) ||
5163             (mask == TRACE_ITER_RECORD_CMD))
5164                 lockdep_assert_held(&event_mutex);
5165
5166         /* do nothing if flag is already set */
5167         if (!!(tr->trace_flags & mask) == !!enabled)
5168                 return 0;
5169
5170         /* Give the tracer a chance to approve the change */
5171         if (tr->current_trace->flag_changed)
5172                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5173                         return -EINVAL;
5174
5175         if (enabled)
5176                 tr->trace_flags |= mask;
5177         else
5178                 tr->trace_flags &= ~mask;
5179
5180         if (mask == TRACE_ITER_RECORD_CMD)
5181                 trace_event_enable_cmd_record(enabled);
5182
5183         if (mask == TRACE_ITER_RECORD_TGID) {
5184                 if (!tgid_map)
5185                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5186                                            sizeof(*tgid_map),
5187                                            GFP_KERNEL);
5188                 if (!tgid_map) {
5189                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5190                         return -ENOMEM;
5191                 }
5192
5193                 trace_event_enable_tgid_record(enabled);
5194         }
5195
5196         if (mask == TRACE_ITER_EVENT_FORK)
5197                 trace_event_follow_fork(tr, enabled);
5198
5199         if (mask == TRACE_ITER_FUNC_FORK)
5200                 ftrace_pid_follow_fork(tr, enabled);
5201
5202         if (mask == TRACE_ITER_OVERWRITE) {
5203                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5204 #ifdef CONFIG_TRACER_MAX_TRACE
5205                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5206 #endif
5207         }
5208
5209         if (mask == TRACE_ITER_PRINTK) {
5210                 trace_printk_start_stop_comm(enabled);
5211                 trace_printk_control(enabled);
5212         }
5213
5214         return 0;
5215 }
5216
5217 int trace_set_options(struct trace_array *tr, char *option)
5218 {
5219         char *cmp;
5220         int neg = 0;
5221         int ret;
5222         size_t orig_len = strlen(option);
5223         int len;
5224
5225         cmp = strstrip(option);
5226
5227         len = str_has_prefix(cmp, "no");
5228         if (len)
5229                 neg = 1;
5230
5231         cmp += len;
5232
5233         mutex_lock(&event_mutex);
5234         mutex_lock(&trace_types_lock);
5235
5236         ret = match_string(trace_options, -1, cmp);
5237         /* If no option could be set, test the specific tracer options */
5238         if (ret < 0)
5239                 ret = set_tracer_option(tr, cmp, neg);
5240         else
5241                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5242
5243         mutex_unlock(&trace_types_lock);
5244         mutex_unlock(&event_mutex);
5245
5246         /*
5247          * If the first trailing whitespace is replaced with '\0' by strstrip,
5248          * turn it back into a space.
5249          */
5250         if (orig_len > strlen(option))
5251                 option[strlen(option)] = ' ';
5252
5253         return ret;
5254 }
5255
5256 static void __init apply_trace_boot_options(void)
5257 {
5258         char *buf = trace_boot_options_buf;
5259         char *option;
5260
5261         while (true) {
5262                 option = strsep(&buf, ",");
5263
5264                 if (!option)
5265                         break;
5266
5267                 if (*option)
5268                         trace_set_options(&global_trace, option);
5269
5270                 /* Put back the comma to allow this to be called again */
5271                 if (buf)
5272                         *(buf - 1) = ',';
5273         }
5274 }
5275
5276 static ssize_t
5277 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5278                         size_t cnt, loff_t *ppos)
5279 {
5280         struct seq_file *m = filp->private_data;
5281         struct trace_array *tr = m->private;
5282         char buf[64];
5283         int ret;
5284
5285         if (cnt >= sizeof(buf))
5286                 return -EINVAL;
5287
5288         if (copy_from_user(buf, ubuf, cnt))
5289                 return -EFAULT;
5290
5291         buf[cnt] = 0;
5292
5293         ret = trace_set_options(tr, buf);
5294         if (ret < 0)
5295                 return ret;
5296
5297         *ppos += cnt;
5298
5299         return cnt;
5300 }
5301
5302 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5303 {
5304         struct trace_array *tr = inode->i_private;
5305         int ret;
5306
5307         ret = tracing_check_open_get_tr(tr);
5308         if (ret)
5309                 return ret;
5310
5311         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5312         if (ret < 0)
5313                 trace_array_put(tr);
5314
5315         return ret;
5316 }
5317
5318 static const struct file_operations tracing_iter_fops = {
5319         .open           = tracing_trace_options_open,
5320         .read           = seq_read,
5321         .llseek         = seq_lseek,
5322         .release        = tracing_single_release_tr,
5323         .write          = tracing_trace_options_write,
5324 };
5325
5326 static const char readme_msg[] =
5327         "tracing mini-HOWTO:\n\n"
5328         "# echo 0 > tracing_on : quick way to disable tracing\n"
5329         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5330         " Important files:\n"
5331         "  trace\t\t\t- The static contents of the buffer\n"
5332         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5333         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5334         "  current_tracer\t- function and latency tracers\n"
5335         "  available_tracers\t- list of configured tracers for current_tracer\n"
5336         "  error_log\t- error log for failed commands (that support it)\n"
5337         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5338         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5339         "  trace_clock\t\t-change the clock used to order events\n"
5340         "       local:   Per cpu clock but may not be synced across CPUs\n"
5341         "      global:   Synced across CPUs but slows tracing down.\n"
5342         "     counter:   Not a clock, but just an increment\n"
5343         "      uptime:   Jiffy counter from time of boot\n"
5344         "        perf:   Same clock that perf events use\n"
5345 #ifdef CONFIG_X86_64
5346         "     x86-tsc:   TSC cycle counter\n"
5347 #endif
5348         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5349         "       delta:   Delta difference against a buffer-wide timestamp\n"
5350         "    absolute:   Absolute (standalone) timestamp\n"
5351         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5352         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5353         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5354         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5355         "\t\t\t  Remove sub-buffer with rmdir\n"
5356         "  trace_options\t\t- Set format or modify how tracing happens\n"
5357         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5358         "\t\t\t  option name\n"
5359         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5360 #ifdef CONFIG_DYNAMIC_FTRACE
5361         "\n  available_filter_functions - list of functions that can be filtered on\n"
5362         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5363         "\t\t\t  functions\n"
5364         "\t     accepts: func_full_name or glob-matching-pattern\n"
5365         "\t     modules: Can select a group via module\n"
5366         "\t      Format: :mod:<module-name>\n"
5367         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5368         "\t    triggers: a command to perform when function is hit\n"
5369         "\t      Format: <function>:<trigger>[:count]\n"
5370         "\t     trigger: traceon, traceoff\n"
5371         "\t\t      enable_event:<system>:<event>\n"
5372         "\t\t      disable_event:<system>:<event>\n"
5373 #ifdef CONFIG_STACKTRACE
5374         "\t\t      stacktrace\n"
5375 #endif
5376 #ifdef CONFIG_TRACER_SNAPSHOT
5377         "\t\t      snapshot\n"
5378 #endif
5379         "\t\t      dump\n"
5380         "\t\t      cpudump\n"
5381         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5382         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5383         "\t     The first one will disable tracing every time do_fault is hit\n"
5384         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5385         "\t       The first time do trap is hit and it disables tracing, the\n"
5386         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5387         "\t       the counter will not decrement. It only decrements when the\n"
5388         "\t       trigger did work\n"
5389         "\t     To remove trigger without count:\n"
5390         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5391         "\t     To remove trigger with a count:\n"
5392         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5393         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5394         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5395         "\t    modules: Can select a group via module command :mod:\n"
5396         "\t    Does not accept triggers\n"
5397 #endif /* CONFIG_DYNAMIC_FTRACE */
5398 #ifdef CONFIG_FUNCTION_TRACER
5399         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5400         "\t\t    (function)\n"
5401         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5402         "\t\t    (function)\n"
5403 #endif
5404 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5405         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5406         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5407         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5408 #endif
5409 #ifdef CONFIG_TRACER_SNAPSHOT
5410         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5411         "\t\t\t  snapshot buffer. Read the contents for more\n"
5412         "\t\t\t  information\n"
5413 #endif
5414 #ifdef CONFIG_STACK_TRACER
5415         "  stack_trace\t\t- Shows the max stack trace when active\n"
5416         "  stack_max_size\t- Shows current max stack size that was traced\n"
5417         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5418         "\t\t\t  new trace)\n"
5419 #ifdef CONFIG_DYNAMIC_FTRACE
5420         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5421         "\t\t\t  traces\n"
5422 #endif
5423 #endif /* CONFIG_STACK_TRACER */
5424 #ifdef CONFIG_DYNAMIC_EVENTS
5425         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5426         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5427 #endif
5428 #ifdef CONFIG_KPROBE_EVENTS
5429         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5430         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5431 #endif
5432 #ifdef CONFIG_UPROBE_EVENTS
5433         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5434         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5435 #endif
5436 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5437         "\t  accepts: event-definitions (one definition per line)\n"
5438         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5439         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5440 #ifdef CONFIG_HIST_TRIGGERS
5441         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5442 #endif
5443         "\t           -:[<group>/]<event>\n"
5444 #ifdef CONFIG_KPROBE_EVENTS
5445         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5446   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5447 #endif
5448 #ifdef CONFIG_UPROBE_EVENTS
5449   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5450 #endif
5451         "\t     args: <name>=fetcharg[:type]\n"
5452         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5453 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5454         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5455 #else
5456         "\t           $stack<index>, $stack, $retval, $comm,\n"
5457 #endif
5458         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5459         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5460         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5461         "\t           <type>\\[<array-size>\\]\n"
5462 #ifdef CONFIG_HIST_TRIGGERS
5463         "\t    field: <stype> <name>;\n"
5464         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5465         "\t           [unsigned] char/int/long\n"
5466 #endif
5467 #endif
5468         "  events/\t\t- Directory containing all trace event subsystems:\n"
5469         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5470         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5471         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5472         "\t\t\t  events\n"
5473         "      filter\t\t- If set, only events passing filter are traced\n"
5474         "  events/<system>/<event>/\t- Directory containing control files for\n"
5475         "\t\t\t  <event>:\n"
5476         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5477         "      filter\t\t- If set, only events passing filter are traced\n"
5478         "      trigger\t\t- If set, a command to perform when event is hit\n"
5479         "\t    Format: <trigger>[:count][if <filter>]\n"
5480         "\t   trigger: traceon, traceoff\n"
5481         "\t            enable_event:<system>:<event>\n"
5482         "\t            disable_event:<system>:<event>\n"
5483 #ifdef CONFIG_HIST_TRIGGERS
5484         "\t            enable_hist:<system>:<event>\n"
5485         "\t            disable_hist:<system>:<event>\n"
5486 #endif
5487 #ifdef CONFIG_STACKTRACE
5488         "\t\t    stacktrace\n"
5489 #endif
5490 #ifdef CONFIG_TRACER_SNAPSHOT
5491         "\t\t    snapshot\n"
5492 #endif
5493 #ifdef CONFIG_HIST_TRIGGERS
5494         "\t\t    hist (see below)\n"
5495 #endif
5496         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5497         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5498         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5499         "\t                  events/block/block_unplug/trigger\n"
5500         "\t   The first disables tracing every time block_unplug is hit.\n"
5501         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5502         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5503         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5504         "\t   Like function triggers, the counter is only decremented if it\n"
5505         "\t    enabled or disabled tracing.\n"
5506         "\t   To remove a trigger without a count:\n"
5507         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5508         "\t   To remove a trigger with a count:\n"
5509         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5510         "\t   Filters can be ignored when removing a trigger.\n"
5511 #ifdef CONFIG_HIST_TRIGGERS
5512         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5513         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5514         "\t            [:values=<field1[,field2,...]>]\n"
5515         "\t            [:sort=<field1[,field2,...]>]\n"
5516         "\t            [:size=#entries]\n"
5517         "\t            [:pause][:continue][:clear]\n"
5518         "\t            [:name=histname1]\n"
5519         "\t            [:<handler>.<action>]\n"
5520         "\t            [if <filter>]\n\n"
5521         "\t    When a matching event is hit, an entry is added to a hash\n"
5522         "\t    table using the key(s) and value(s) named, and the value of a\n"
5523         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5524         "\t    correspond to fields in the event's format description.  Keys\n"
5525         "\t    can be any field, or the special string 'stacktrace'.\n"
5526         "\t    Compound keys consisting of up to two fields can be specified\n"
5527         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5528         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5529         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5530         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5531         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5532         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5533         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5534         "\t    its histogram data will be shared with other triggers of the\n"
5535         "\t    same name, and trigger hits will update this common data.\n\n"
5536         "\t    Reading the 'hist' file for the event will dump the hash\n"
5537         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5538         "\t    triggers attached to an event, there will be a table for each\n"
5539         "\t    trigger in the output.  The table displayed for a named\n"
5540         "\t    trigger will be the same as any other instance having the\n"
5541         "\t    same name.  The default format used to display a given field\n"
5542         "\t    can be modified by appending any of the following modifiers\n"
5543         "\t    to the field name, as applicable:\n\n"
5544         "\t            .hex        display a number as a hex value\n"
5545         "\t            .sym        display an address as a symbol\n"
5546         "\t            .sym-offset display an address as a symbol and offset\n"
5547         "\t            .execname   display a common_pid as a program name\n"
5548         "\t            .syscall    display a syscall id as a syscall name\n"
5549         "\t            .log2       display log2 value rather than raw number\n"
5550         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5551         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5552         "\t    trigger or to start a hist trigger but not log any events\n"
5553         "\t    until told to do so.  'continue' can be used to start or\n"
5554         "\t    restart a paused hist trigger.\n\n"
5555         "\t    The 'clear' parameter will clear the contents of a running\n"
5556         "\t    hist trigger and leave its current paused/active state\n"
5557         "\t    unchanged.\n\n"
5558         "\t    The enable_hist and disable_hist triggers can be used to\n"
5559         "\t    have one event conditionally start and stop another event's\n"
5560         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5561         "\t    the enable_event and disable_event triggers.\n\n"
5562         "\t    Hist trigger handlers and actions are executed whenever a\n"
5563         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5564         "\t        <handler>.<action>\n\n"
5565         "\t    The available handlers are:\n\n"
5566         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5567         "\t        onmax(var)               - invoke if var exceeds current max\n"
5568         "\t        onchange(var)            - invoke action if var changes\n\n"
5569         "\t    The available actions are:\n\n"
5570         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5571         "\t        save(field,...)                      - save current event fields\n"
5572 #ifdef CONFIG_TRACER_SNAPSHOT
5573         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5574 #endif
5575 #ifdef CONFIG_SYNTH_EVENTS
5576         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5577         "\t  Write into this file to define/undefine new synthetic events.\n"
5578         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5579 #endif
5580 #endif
5581 ;
5582
5583 static ssize_t
5584 tracing_readme_read(struct file *filp, char __user *ubuf,
5585                        size_t cnt, loff_t *ppos)
5586 {
5587         return simple_read_from_buffer(ubuf, cnt, ppos,
5588                                         readme_msg, strlen(readme_msg));
5589 }
5590
5591 static const struct file_operations tracing_readme_fops = {
5592         .open           = tracing_open_generic,
5593         .read           = tracing_readme_read,
5594         .llseek         = generic_file_llseek,
5595 };
5596
5597 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5598 {
5599         int *ptr = v;
5600
5601         if (*pos || m->count)
5602                 ptr++;
5603
5604         (*pos)++;
5605
5606         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5607                 if (trace_find_tgid(*ptr))
5608                         return ptr;
5609         }
5610
5611         return NULL;
5612 }
5613
5614 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5615 {
5616         void *v;
5617         loff_t l = 0;
5618
5619         if (!tgid_map)
5620                 return NULL;
5621
5622         v = &tgid_map[0];
5623         while (l <= *pos) {
5624                 v = saved_tgids_next(m, v, &l);
5625                 if (!v)
5626                         return NULL;
5627         }
5628
5629         return v;
5630 }
5631
5632 static void saved_tgids_stop(struct seq_file *m, void *v)
5633 {
5634 }
5635
5636 static int saved_tgids_show(struct seq_file *m, void *v)
5637 {
5638         int pid = (int *)v - tgid_map;
5639
5640         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5641         return 0;
5642 }
5643
5644 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5645         .start          = saved_tgids_start,
5646         .stop           = saved_tgids_stop,
5647         .next           = saved_tgids_next,
5648         .show           = saved_tgids_show,
5649 };
5650
5651 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5652 {
5653         int ret;
5654
5655         ret = tracing_check_open_get_tr(NULL);
5656         if (ret)
5657                 return ret;
5658
5659         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5660 }
5661
5662
5663 static const struct file_operations tracing_saved_tgids_fops = {
5664         .open           = tracing_saved_tgids_open,
5665         .read           = seq_read,
5666         .llseek         = seq_lseek,
5667         .release        = seq_release,
5668 };
5669
5670 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5671 {
5672         unsigned int *ptr = v;
5673
5674         if (*pos || m->count)
5675                 ptr++;
5676
5677         (*pos)++;
5678
5679         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5680              ptr++) {
5681                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5682                         continue;
5683
5684                 return ptr;
5685         }
5686
5687         return NULL;
5688 }
5689
5690 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5691 {
5692         void *v;
5693         loff_t l = 0;
5694
5695         preempt_disable();
5696         arch_spin_lock(&trace_cmdline_lock);
5697
5698         v = &savedcmd->map_cmdline_to_pid[0];
5699         while (l <= *pos) {
5700                 v = saved_cmdlines_next(m, v, &l);
5701                 if (!v)
5702                         return NULL;
5703         }
5704
5705         return v;
5706 }
5707
5708 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5709 {
5710         arch_spin_unlock(&trace_cmdline_lock);
5711         preempt_enable();
5712 }
5713
5714 static int saved_cmdlines_show(struct seq_file *m, void *v)
5715 {
5716         char buf[TASK_COMM_LEN];
5717         unsigned int *pid = v;
5718
5719         __trace_find_cmdline(*pid, buf);
5720         seq_printf(m, "%d %s\n", *pid, buf);
5721         return 0;
5722 }
5723
5724 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5725         .start          = saved_cmdlines_start,
5726         .next           = saved_cmdlines_next,
5727         .stop           = saved_cmdlines_stop,
5728         .show           = saved_cmdlines_show,
5729 };
5730
5731 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5732 {
5733         int ret;
5734
5735         ret = tracing_check_open_get_tr(NULL);
5736         if (ret)
5737                 return ret;
5738
5739         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5740 }
5741
5742 static const struct file_operations tracing_saved_cmdlines_fops = {
5743         .open           = tracing_saved_cmdlines_open,
5744         .read           = seq_read,
5745         .llseek         = seq_lseek,
5746         .release        = seq_release,
5747 };
5748
5749 static ssize_t
5750 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5751                                  size_t cnt, loff_t *ppos)
5752 {
5753         char buf[64];
5754         int r;
5755
5756         arch_spin_lock(&trace_cmdline_lock);
5757         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5758         arch_spin_unlock(&trace_cmdline_lock);
5759
5760         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5761 }
5762
5763 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5764 {
5765         kfree(s->saved_cmdlines);
5766         kfree(s->map_cmdline_to_pid);
5767         kfree(s);
5768 }
5769
5770 static int tracing_resize_saved_cmdlines(unsigned int val)
5771 {
5772         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5773
5774         s = kmalloc(sizeof(*s), GFP_KERNEL);
5775         if (!s)
5776                 return -ENOMEM;
5777
5778         if (allocate_cmdlines_buffer(val, s) < 0) {
5779                 kfree(s);
5780                 return -ENOMEM;
5781         }
5782
5783         arch_spin_lock(&trace_cmdline_lock);
5784         savedcmd_temp = savedcmd;
5785         savedcmd = s;
5786         arch_spin_unlock(&trace_cmdline_lock);
5787         free_saved_cmdlines_buffer(savedcmd_temp);
5788
5789         return 0;
5790 }
5791
5792 static ssize_t
5793 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5794                                   size_t cnt, loff_t *ppos)
5795 {
5796         unsigned long val;
5797         int ret;
5798
5799         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5800         if (ret)
5801                 return ret;
5802
5803         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5804         if (!val || val > PID_MAX_DEFAULT)
5805                 return -EINVAL;
5806
5807         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5808         if (ret < 0)
5809                 return ret;
5810
5811         *ppos += cnt;
5812
5813         return cnt;
5814 }
5815
5816 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5817         .open           = tracing_open_generic,
5818         .read           = tracing_saved_cmdlines_size_read,
5819         .write          = tracing_saved_cmdlines_size_write,
5820 };
5821
5822 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5823 static union trace_eval_map_item *
5824 update_eval_map(union trace_eval_map_item *ptr)
5825 {
5826         if (!ptr->map.eval_string) {
5827                 if (ptr->tail.next) {
5828                         ptr = ptr->tail.next;
5829                         /* Set ptr to the next real item (skip head) */
5830                         ptr++;
5831                 } else
5832                         return NULL;
5833         }
5834         return ptr;
5835 }
5836
5837 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5838 {
5839         union trace_eval_map_item *ptr = v;
5840
5841         /*
5842          * Paranoid! If ptr points to end, we don't want to increment past it.
5843          * This really should never happen.
5844          */
5845         (*pos)++;
5846         ptr = update_eval_map(ptr);
5847         if (WARN_ON_ONCE(!ptr))
5848                 return NULL;
5849
5850         ptr++;
5851         ptr = update_eval_map(ptr);
5852
5853         return ptr;
5854 }
5855
5856 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5857 {
5858         union trace_eval_map_item *v;
5859         loff_t l = 0;
5860
5861         mutex_lock(&trace_eval_mutex);
5862
5863         v = trace_eval_maps;
5864         if (v)
5865                 v++;
5866
5867         while (v && l < *pos) {
5868                 v = eval_map_next(m, v, &l);
5869         }
5870
5871         return v;
5872 }
5873
5874 static void eval_map_stop(struct seq_file *m, void *v)
5875 {
5876         mutex_unlock(&trace_eval_mutex);
5877 }
5878
5879 static int eval_map_show(struct seq_file *m, void *v)
5880 {
5881         union trace_eval_map_item *ptr = v;
5882
5883         seq_printf(m, "%s %ld (%s)\n",
5884                    ptr->map.eval_string, ptr->map.eval_value,
5885                    ptr->map.system);
5886
5887         return 0;
5888 }
5889
5890 static const struct seq_operations tracing_eval_map_seq_ops = {
5891         .start          = eval_map_start,
5892         .next           = eval_map_next,
5893         .stop           = eval_map_stop,
5894         .show           = eval_map_show,
5895 };
5896
5897 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5898 {
5899         int ret;
5900
5901         ret = tracing_check_open_get_tr(NULL);
5902         if (ret)
5903                 return ret;
5904
5905         return seq_open(filp, &tracing_eval_map_seq_ops);
5906 }
5907
5908 static const struct file_operations tracing_eval_map_fops = {
5909         .open           = tracing_eval_map_open,
5910         .read           = seq_read,
5911         .llseek         = seq_lseek,
5912         .release        = seq_release,
5913 };
5914
5915 static inline union trace_eval_map_item *
5916 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5917 {
5918         /* Return tail of array given the head */
5919         return ptr + ptr->head.length + 1;
5920 }
5921
5922 static void
5923 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5924                            int len)
5925 {
5926         struct trace_eval_map **stop;
5927         struct trace_eval_map **map;
5928         union trace_eval_map_item *map_array;
5929         union trace_eval_map_item *ptr;
5930
5931         stop = start + len;
5932
5933         /*
5934          * The trace_eval_maps contains the map plus a head and tail item,
5935          * where the head holds the module and length of array, and the
5936          * tail holds a pointer to the next list.
5937          */
5938         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5939         if (!map_array) {
5940                 pr_warn("Unable to allocate trace eval mapping\n");
5941                 return;
5942         }
5943
5944         mutex_lock(&trace_eval_mutex);
5945
5946         if (!trace_eval_maps)
5947                 trace_eval_maps = map_array;
5948         else {
5949                 ptr = trace_eval_maps;
5950                 for (;;) {
5951                         ptr = trace_eval_jmp_to_tail(ptr);
5952                         if (!ptr->tail.next)
5953                                 break;
5954                         ptr = ptr->tail.next;
5955
5956                 }
5957                 ptr->tail.next = map_array;
5958         }
5959         map_array->head.mod = mod;
5960         map_array->head.length = len;
5961         map_array++;
5962
5963         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5964                 map_array->map = **map;
5965                 map_array++;
5966         }
5967         memset(map_array, 0, sizeof(*map_array));
5968
5969         mutex_unlock(&trace_eval_mutex);
5970 }
5971
5972 static void trace_create_eval_file(struct dentry *d_tracer)
5973 {
5974         trace_create_file("eval_map", 0444, d_tracer,
5975                           NULL, &tracing_eval_map_fops);
5976 }
5977
5978 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5979 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5980 static inline void trace_insert_eval_map_file(struct module *mod,
5981                               struct trace_eval_map **start, int len) { }
5982 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5983
5984 static void trace_insert_eval_map(struct module *mod,
5985                                   struct trace_eval_map **start, int len)
5986 {
5987         struct trace_eval_map **map;
5988
5989         if (len <= 0)
5990                 return;
5991
5992         map = start;
5993
5994         trace_event_eval_update(map, len);
5995
5996         trace_insert_eval_map_file(mod, start, len);
5997 }
5998
5999 static ssize_t
6000 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6001                        size_t cnt, loff_t *ppos)
6002 {
6003         struct trace_array *tr = filp->private_data;
6004         char buf[MAX_TRACER_SIZE+2];
6005         int r;
6006
6007         mutex_lock(&trace_types_lock);
6008         r = sprintf(buf, "%s\n", tr->current_trace->name);
6009         mutex_unlock(&trace_types_lock);
6010
6011         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6012 }
6013
6014 int tracer_init(struct tracer *t, struct trace_array *tr)
6015 {
6016         tracing_reset_online_cpus(&tr->array_buffer);
6017         return t->init(tr);
6018 }
6019
6020 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6021 {
6022         int cpu;
6023
6024         for_each_tracing_cpu(cpu)
6025                 per_cpu_ptr(buf->data, cpu)->entries = val;
6026 }
6027
6028 #ifdef CONFIG_TRACER_MAX_TRACE
6029 /* resize @tr's buffer to the size of @size_tr's entries */
6030 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6031                                         struct array_buffer *size_buf, int cpu_id)
6032 {
6033         int cpu, ret = 0;
6034
6035         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6036                 for_each_tracing_cpu(cpu) {
6037                         ret = ring_buffer_resize(trace_buf->buffer,
6038                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6039                         if (ret < 0)
6040                                 break;
6041                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6042                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6043                 }
6044         } else {
6045                 ret = ring_buffer_resize(trace_buf->buffer,
6046                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6047                 if (ret == 0)
6048                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6049                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6050         }
6051
6052         return ret;
6053 }
6054 #endif /* CONFIG_TRACER_MAX_TRACE */
6055
6056 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6057                                         unsigned long size, int cpu)
6058 {
6059         int ret;
6060
6061         /*
6062          * If kernel or user changes the size of the ring buffer
6063          * we use the size that was given, and we can forget about
6064          * expanding it later.
6065          */
6066         ring_buffer_expanded = true;
6067
6068         /* May be called before buffers are initialized */
6069         if (!tr->array_buffer.buffer)
6070                 return 0;
6071
6072         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6073         if (ret < 0)
6074                 return ret;
6075
6076 #ifdef CONFIG_TRACER_MAX_TRACE
6077         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6078             !tr->current_trace->use_max_tr)
6079                 goto out;
6080
6081         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6082         if (ret < 0) {
6083                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6084                                                      &tr->array_buffer, cpu);
6085                 if (r < 0) {
6086                         /*
6087                          * AARGH! We are left with different
6088                          * size max buffer!!!!
6089                          * The max buffer is our "snapshot" buffer.
6090                          * When a tracer needs a snapshot (one of the
6091                          * latency tracers), it swaps the max buffer
6092                          * with the saved snap shot. We succeeded to
6093                          * update the size of the main buffer, but failed to
6094                          * update the size of the max buffer. But when we tried
6095                          * to reset the main buffer to the original size, we
6096                          * failed there too. This is very unlikely to
6097                          * happen, but if it does, warn and kill all
6098                          * tracing.
6099                          */
6100                         WARN_ON(1);
6101                         tracing_disabled = 1;
6102                 }
6103                 return ret;
6104         }
6105
6106         if (cpu == RING_BUFFER_ALL_CPUS)
6107                 set_buffer_entries(&tr->max_buffer, size);
6108         else
6109                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6110
6111  out:
6112 #endif /* CONFIG_TRACER_MAX_TRACE */
6113
6114         if (cpu == RING_BUFFER_ALL_CPUS)
6115                 set_buffer_entries(&tr->array_buffer, size);
6116         else
6117                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6118
6119         return ret;
6120 }
6121
6122 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6123                                   unsigned long size, int cpu_id)
6124 {
6125         int ret = size;
6126
6127         mutex_lock(&trace_types_lock);
6128
6129         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6130                 /* make sure, this cpu is enabled in the mask */
6131                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6132                         ret = -EINVAL;
6133                         goto out;
6134                 }
6135         }
6136
6137         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6138         if (ret < 0)
6139                 ret = -ENOMEM;
6140
6141 out:
6142         mutex_unlock(&trace_types_lock);
6143
6144         return ret;
6145 }
6146
6147
6148 /**
6149  * tracing_update_buffers - used by tracing facility to expand ring buffers
6150  *
6151  * To save on memory when the tracing is never used on a system with it
6152  * configured in. The ring buffers are set to a minimum size. But once
6153  * a user starts to use the tracing facility, then they need to grow
6154  * to their default size.
6155  *
6156  * This function is to be called when a tracer is about to be used.
6157  */
6158 int tracing_update_buffers(void)
6159 {
6160         int ret = 0;
6161
6162         mutex_lock(&trace_types_lock);
6163         if (!ring_buffer_expanded)
6164                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6165                                                 RING_BUFFER_ALL_CPUS);
6166         mutex_unlock(&trace_types_lock);
6167
6168         return ret;
6169 }
6170
6171 struct trace_option_dentry;
6172
6173 static void
6174 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6175
6176 /*
6177  * Used to clear out the tracer before deletion of an instance.
6178  * Must have trace_types_lock held.
6179  */
6180 static void tracing_set_nop(struct trace_array *tr)
6181 {
6182         if (tr->current_trace == &nop_trace)
6183                 return;
6184         
6185         tr->current_trace->enabled--;
6186
6187         if (tr->current_trace->reset)
6188                 tr->current_trace->reset(tr);
6189
6190         tr->current_trace = &nop_trace;
6191 }
6192
6193 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6194 {
6195         /* Only enable if the directory has been created already. */
6196         if (!tr->dir)
6197                 return;
6198
6199         create_trace_option_files(tr, t);
6200 }
6201
6202 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6203 {
6204         struct tracer *t;
6205 #ifdef CONFIG_TRACER_MAX_TRACE
6206         bool had_max_tr;
6207 #endif
6208         int ret = 0;
6209
6210         mutex_lock(&trace_types_lock);
6211
6212         if (!ring_buffer_expanded) {
6213                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6214                                                 RING_BUFFER_ALL_CPUS);
6215                 if (ret < 0)
6216                         goto out;
6217                 ret = 0;
6218         }
6219
6220         for (t = trace_types; t; t = t->next) {
6221                 if (strcmp(t->name, buf) == 0)
6222                         break;
6223         }
6224         if (!t) {
6225                 ret = -EINVAL;
6226                 goto out;
6227         }
6228         if (t == tr->current_trace)
6229                 goto out;
6230
6231 #ifdef CONFIG_TRACER_SNAPSHOT
6232         if (t->use_max_tr) {
6233                 arch_spin_lock(&tr->max_lock);
6234                 if (tr->cond_snapshot)
6235                         ret = -EBUSY;
6236                 arch_spin_unlock(&tr->max_lock);
6237                 if (ret)
6238                         goto out;
6239         }
6240 #endif
6241         /* Some tracers won't work on kernel command line */
6242         if (system_state < SYSTEM_RUNNING && t->noboot) {
6243                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6244                         t->name);
6245                 goto out;
6246         }
6247
6248         /* Some tracers are only allowed for the top level buffer */
6249         if (!trace_ok_for_array(t, tr)) {
6250                 ret = -EINVAL;
6251                 goto out;
6252         }
6253
6254         /* If trace pipe files are being read, we can't change the tracer */
6255         if (tr->trace_ref) {
6256                 ret = -EBUSY;
6257                 goto out;
6258         }
6259
6260         trace_branch_disable();
6261
6262         tr->current_trace->enabled--;
6263
6264         if (tr->current_trace->reset)
6265                 tr->current_trace->reset(tr);
6266
6267         /* Current trace needs to be nop_trace before synchronize_rcu */
6268         tr->current_trace = &nop_trace;
6269
6270 #ifdef CONFIG_TRACER_MAX_TRACE
6271         had_max_tr = tr->allocated_snapshot;
6272
6273         if (had_max_tr && !t->use_max_tr) {
6274                 /*
6275                  * We need to make sure that the update_max_tr sees that
6276                  * current_trace changed to nop_trace to keep it from
6277                  * swapping the buffers after we resize it.
6278                  * The update_max_tr is called from interrupts disabled
6279                  * so a synchronized_sched() is sufficient.
6280                  */
6281                 synchronize_rcu();
6282                 free_snapshot(tr);
6283         }
6284 #endif
6285
6286 #ifdef CONFIG_TRACER_MAX_TRACE
6287         if (t->use_max_tr && !had_max_tr) {
6288                 ret = tracing_alloc_snapshot_instance(tr);
6289                 if (ret < 0)
6290                         goto out;
6291         }
6292 #endif
6293
6294         if (t->init) {
6295                 ret = tracer_init(t, tr);
6296                 if (ret)
6297                         goto out;
6298         }
6299
6300         tr->current_trace = t;
6301         tr->current_trace->enabled++;
6302         trace_branch_enable(tr);
6303  out:
6304         mutex_unlock(&trace_types_lock);
6305
6306         return ret;
6307 }
6308
6309 static ssize_t
6310 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6311                         size_t cnt, loff_t *ppos)
6312 {
6313         struct trace_array *tr = filp->private_data;
6314         char buf[MAX_TRACER_SIZE+1];
6315         int i;
6316         size_t ret;
6317         int err;
6318
6319         ret = cnt;
6320
6321         if (cnt > MAX_TRACER_SIZE)
6322                 cnt = MAX_TRACER_SIZE;
6323
6324         if (copy_from_user(buf, ubuf, cnt))
6325                 return -EFAULT;
6326
6327         buf[cnt] = 0;
6328
6329         /* strip ending whitespace. */
6330         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6331                 buf[i] = 0;
6332
6333         err = tracing_set_tracer(tr, buf);
6334         if (err)
6335                 return err;
6336
6337         *ppos += ret;
6338
6339         return ret;
6340 }
6341
6342 static ssize_t
6343 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6344                    size_t cnt, loff_t *ppos)
6345 {
6346         char buf[64];
6347         int r;
6348
6349         r = snprintf(buf, sizeof(buf), "%ld\n",
6350                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6351         if (r > sizeof(buf))
6352                 r = sizeof(buf);
6353         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6354 }
6355
6356 static ssize_t
6357 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6358                     size_t cnt, loff_t *ppos)
6359 {
6360         unsigned long val;
6361         int ret;
6362
6363         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6364         if (ret)
6365                 return ret;
6366
6367         *ptr = val * 1000;
6368
6369         return cnt;
6370 }
6371
6372 static ssize_t
6373 tracing_thresh_read(struct file *filp, char __user *ubuf,
6374                     size_t cnt, loff_t *ppos)
6375 {
6376         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6377 }
6378
6379 static ssize_t
6380 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6381                      size_t cnt, loff_t *ppos)
6382 {
6383         struct trace_array *tr = filp->private_data;
6384         int ret;
6385
6386         mutex_lock(&trace_types_lock);
6387         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6388         if (ret < 0)
6389                 goto out;
6390
6391         if (tr->current_trace->update_thresh) {
6392                 ret = tr->current_trace->update_thresh(tr);
6393                 if (ret < 0)
6394                         goto out;
6395         }
6396
6397         ret = cnt;
6398 out:
6399         mutex_unlock(&trace_types_lock);
6400
6401         return ret;
6402 }
6403
6404 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6405
6406 static ssize_t
6407 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6408                      size_t cnt, loff_t *ppos)
6409 {
6410         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6411 }
6412
6413 static ssize_t
6414 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6415                       size_t cnt, loff_t *ppos)
6416 {
6417         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6418 }
6419
6420 #endif
6421
6422 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6423 {
6424         struct trace_array *tr = inode->i_private;
6425         struct trace_iterator *iter;
6426         int ret;
6427
6428         ret = tracing_check_open_get_tr(tr);
6429         if (ret)
6430                 return ret;
6431
6432         mutex_lock(&trace_types_lock);
6433
6434         /* create a buffer to store the information to pass to userspace */
6435         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6436         if (!iter) {
6437                 ret = -ENOMEM;
6438                 __trace_array_put(tr);
6439                 goto out;
6440         }
6441
6442         trace_seq_init(&iter->seq);
6443         iter->trace = tr->current_trace;
6444
6445         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6446                 ret = -ENOMEM;
6447                 goto fail;
6448         }
6449
6450         /* trace pipe does not show start of buffer */
6451         cpumask_setall(iter->started);
6452
6453         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6454                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6455
6456         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6457         if (trace_clocks[tr->clock_id].in_ns)
6458                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6459
6460         iter->tr = tr;
6461         iter->array_buffer = &tr->array_buffer;
6462         iter->cpu_file = tracing_get_cpu(inode);
6463         mutex_init(&iter->mutex);
6464         filp->private_data = iter;
6465
6466         if (iter->trace->pipe_open)
6467                 iter->trace->pipe_open(iter);
6468
6469         nonseekable_open(inode, filp);
6470
6471         tr->trace_ref++;
6472 out:
6473         mutex_unlock(&trace_types_lock);
6474         return ret;
6475
6476 fail:
6477         kfree(iter);
6478         __trace_array_put(tr);
6479         mutex_unlock(&trace_types_lock);
6480         return ret;
6481 }
6482
6483 static int tracing_release_pipe(struct inode *inode, struct file *file)
6484 {
6485         struct trace_iterator *iter = file->private_data;
6486         struct trace_array *tr = inode->i_private;
6487
6488         mutex_lock(&trace_types_lock);
6489
6490         tr->trace_ref--;
6491
6492         if (iter->trace->pipe_close)
6493                 iter->trace->pipe_close(iter);
6494
6495         mutex_unlock(&trace_types_lock);
6496
6497         free_cpumask_var(iter->started);
6498         mutex_destroy(&iter->mutex);
6499         kfree(iter);
6500
6501         trace_array_put(tr);
6502
6503         return 0;
6504 }
6505
6506 static __poll_t
6507 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6508 {
6509         struct trace_array *tr = iter->tr;
6510
6511         /* Iterators are static, they should be filled or empty */
6512         if (trace_buffer_iter(iter, iter->cpu_file))
6513                 return EPOLLIN | EPOLLRDNORM;
6514
6515         if (tr->trace_flags & TRACE_ITER_BLOCK)
6516                 /*
6517                  * Always select as readable when in blocking mode
6518                  */
6519                 return EPOLLIN | EPOLLRDNORM;
6520         else
6521                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6522                                              filp, poll_table);
6523 }
6524
6525 static __poll_t
6526 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6527 {
6528         struct trace_iterator *iter = filp->private_data;
6529
6530         return trace_poll(iter, filp, poll_table);
6531 }
6532
6533 /* Must be called with iter->mutex held. */
6534 static int tracing_wait_pipe(struct file *filp)
6535 {
6536         struct trace_iterator *iter = filp->private_data;
6537         int ret;
6538
6539         while (trace_empty(iter)) {
6540
6541                 if ((filp->f_flags & O_NONBLOCK)) {
6542                         return -EAGAIN;
6543                 }
6544
6545                 /*
6546                  * We block until we read something and tracing is disabled.
6547                  * We still block if tracing is disabled, but we have never
6548                  * read anything. This allows a user to cat this file, and
6549                  * then enable tracing. But after we have read something,
6550                  * we give an EOF when tracing is again disabled.
6551                  *
6552                  * iter->pos will be 0 if we haven't read anything.
6553                  */
6554                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6555                         break;
6556
6557                 mutex_unlock(&iter->mutex);
6558
6559                 ret = wait_on_pipe(iter, 0);
6560
6561                 mutex_lock(&iter->mutex);
6562
6563                 if (ret)
6564                         return ret;
6565         }
6566
6567         return 1;
6568 }
6569
6570 /*
6571  * Consumer reader.
6572  */
6573 static ssize_t
6574 tracing_read_pipe(struct file *filp, char __user *ubuf,
6575                   size_t cnt, loff_t *ppos)
6576 {
6577         struct trace_iterator *iter = filp->private_data;
6578         ssize_t sret;
6579
6580         /*
6581          * Avoid more than one consumer on a single file descriptor
6582          * This is just a matter of traces coherency, the ring buffer itself
6583          * is protected.
6584          */
6585         mutex_lock(&iter->mutex);
6586
6587         /* return any leftover data */
6588         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6589         if (sret != -EBUSY)
6590                 goto out;
6591
6592         trace_seq_init(&iter->seq);
6593
6594         if (iter->trace->read) {
6595                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6596                 if (sret)
6597                         goto out;
6598         }
6599
6600 waitagain:
6601         sret = tracing_wait_pipe(filp);
6602         if (sret <= 0)
6603                 goto out;
6604
6605         /* stop when tracing is finished */
6606         if (trace_empty(iter)) {
6607                 sret = 0;
6608                 goto out;
6609         }
6610
6611         if (cnt >= PAGE_SIZE)
6612                 cnt = PAGE_SIZE - 1;
6613
6614         /* reset all but tr, trace, and overruns */
6615         memset(&iter->seq, 0,
6616                sizeof(struct trace_iterator) -
6617                offsetof(struct trace_iterator, seq));
6618         cpumask_clear(iter->started);
6619         trace_seq_init(&iter->seq);
6620         iter->pos = -1;
6621
6622         trace_event_read_lock();
6623         trace_access_lock(iter->cpu_file);
6624         while (trace_find_next_entry_inc(iter) != NULL) {
6625                 enum print_line_t ret;
6626                 int save_len = iter->seq.seq.len;
6627
6628                 ret = print_trace_line(iter);
6629                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6630                         /* don't print partial lines */
6631                         iter->seq.seq.len = save_len;
6632                         break;
6633                 }
6634                 if (ret != TRACE_TYPE_NO_CONSUME)
6635                         trace_consume(iter);
6636
6637                 if (trace_seq_used(&iter->seq) >= cnt)
6638                         break;
6639
6640                 /*
6641                  * Setting the full flag means we reached the trace_seq buffer
6642                  * size and we should leave by partial output condition above.
6643                  * One of the trace_seq_* functions is not used properly.
6644                  */
6645                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6646                           iter->ent->type);
6647         }
6648         trace_access_unlock(iter->cpu_file);
6649         trace_event_read_unlock();
6650
6651         /* Now copy what we have to the user */
6652         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6653         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6654                 trace_seq_init(&iter->seq);
6655
6656         /*
6657          * If there was nothing to send to user, in spite of consuming trace
6658          * entries, go back to wait for more entries.
6659          */
6660         if (sret == -EBUSY)
6661                 goto waitagain;
6662
6663 out:
6664         mutex_unlock(&iter->mutex);
6665
6666         return sret;
6667 }
6668
6669 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6670                                      unsigned int idx)
6671 {
6672         __free_page(spd->pages[idx]);
6673 }
6674
6675 static size_t
6676 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6677 {
6678         size_t count;
6679         int save_len;
6680         int ret;
6681
6682         /* Seq buffer is page-sized, exactly what we need. */
6683         for (;;) {
6684                 save_len = iter->seq.seq.len;
6685                 ret = print_trace_line(iter);
6686
6687                 if (trace_seq_has_overflowed(&iter->seq)) {
6688                         iter->seq.seq.len = save_len;
6689                         break;
6690                 }
6691
6692                 /*
6693                  * This should not be hit, because it should only
6694                  * be set if the iter->seq overflowed. But check it
6695                  * anyway to be safe.
6696                  */
6697                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6698                         iter->seq.seq.len = save_len;
6699                         break;
6700                 }
6701
6702                 count = trace_seq_used(&iter->seq) - save_len;
6703                 if (rem < count) {
6704                         rem = 0;
6705                         iter->seq.seq.len = save_len;
6706                         break;
6707                 }
6708
6709                 if (ret != TRACE_TYPE_NO_CONSUME)
6710                         trace_consume(iter);
6711                 rem -= count;
6712                 if (!trace_find_next_entry_inc(iter))   {
6713                         rem = 0;
6714                         iter->ent = NULL;
6715                         break;
6716                 }
6717         }
6718
6719         return rem;
6720 }
6721
6722 static ssize_t tracing_splice_read_pipe(struct file *filp,
6723                                         loff_t *ppos,
6724                                         struct pipe_inode_info *pipe,
6725                                         size_t len,
6726                                         unsigned int flags)
6727 {
6728         struct page *pages_def[PIPE_DEF_BUFFERS];
6729         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6730         struct trace_iterator *iter = filp->private_data;
6731         struct splice_pipe_desc spd = {
6732                 .pages          = pages_def,
6733                 .partial        = partial_def,
6734                 .nr_pages       = 0, /* This gets updated below. */
6735                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6736                 .ops            = &default_pipe_buf_ops,
6737                 .spd_release    = tracing_spd_release_pipe,
6738         };
6739         ssize_t ret;
6740         size_t rem;
6741         unsigned int i;
6742
6743         if (splice_grow_spd(pipe, &spd))
6744                 return -ENOMEM;
6745
6746         mutex_lock(&iter->mutex);
6747
6748         if (iter->trace->splice_read) {
6749                 ret = iter->trace->splice_read(iter, filp,
6750                                                ppos, pipe, len, flags);
6751                 if (ret)
6752                         goto out_err;
6753         }
6754
6755         ret = tracing_wait_pipe(filp);
6756         if (ret <= 0)
6757                 goto out_err;
6758
6759         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6760                 ret = -EFAULT;
6761                 goto out_err;
6762         }
6763
6764         trace_event_read_lock();
6765         trace_access_lock(iter->cpu_file);
6766
6767         /* Fill as many pages as possible. */
6768         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6769                 spd.pages[i] = alloc_page(GFP_KERNEL);
6770                 if (!spd.pages[i])
6771                         break;
6772
6773                 rem = tracing_fill_pipe_page(rem, iter);
6774
6775                 /* Copy the data into the page, so we can start over. */
6776                 ret = trace_seq_to_buffer(&iter->seq,
6777                                           page_address(spd.pages[i]),
6778                                           trace_seq_used(&iter->seq));
6779                 if (ret < 0) {
6780                         __free_page(spd.pages[i]);
6781                         break;
6782                 }
6783                 spd.partial[i].offset = 0;
6784                 spd.partial[i].len = trace_seq_used(&iter->seq);
6785
6786                 trace_seq_init(&iter->seq);
6787         }
6788
6789         trace_access_unlock(iter->cpu_file);
6790         trace_event_read_unlock();
6791         mutex_unlock(&iter->mutex);
6792
6793         spd.nr_pages = i;
6794
6795         if (i)
6796                 ret = splice_to_pipe(pipe, &spd);
6797         else
6798                 ret = 0;
6799 out:
6800         splice_shrink_spd(&spd);
6801         return ret;
6802
6803 out_err:
6804         mutex_unlock(&iter->mutex);
6805         goto out;
6806 }
6807
6808 static ssize_t
6809 tracing_entries_read(struct file *filp, char __user *ubuf,
6810                      size_t cnt, loff_t *ppos)
6811 {
6812         struct inode *inode = file_inode(filp);
6813         struct trace_array *tr = inode->i_private;
6814         int cpu = tracing_get_cpu(inode);
6815         char buf[64];
6816         int r = 0;
6817         ssize_t ret;
6818
6819         mutex_lock(&trace_types_lock);
6820
6821         if (cpu == RING_BUFFER_ALL_CPUS) {
6822                 int cpu, buf_size_same;
6823                 unsigned long size;
6824
6825                 size = 0;
6826                 buf_size_same = 1;
6827                 /* check if all cpu sizes are same */
6828                 for_each_tracing_cpu(cpu) {
6829                         /* fill in the size from first enabled cpu */
6830                         if (size == 0)
6831                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6832                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6833                                 buf_size_same = 0;
6834                                 break;
6835                         }
6836                 }
6837
6838                 if (buf_size_same) {
6839                         if (!ring_buffer_expanded)
6840                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6841                                             size >> 10,
6842                                             trace_buf_size >> 10);
6843                         else
6844                                 r = sprintf(buf, "%lu\n", size >> 10);
6845                 } else
6846                         r = sprintf(buf, "X\n");
6847         } else
6848                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6849
6850         mutex_unlock(&trace_types_lock);
6851
6852         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6853         return ret;
6854 }
6855
6856 static ssize_t
6857 tracing_entries_write(struct file *filp, const char __user *ubuf,
6858                       size_t cnt, loff_t *ppos)
6859 {
6860         struct inode *inode = file_inode(filp);
6861         struct trace_array *tr = inode->i_private;
6862         unsigned long val;
6863         int ret;
6864
6865         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6866         if (ret)
6867                 return ret;
6868
6869         /* must have at least 1 entry */
6870         if (!val)
6871                 return -EINVAL;
6872
6873         /* value is in KB */
6874         val <<= 10;
6875         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6876         if (ret < 0)
6877                 return ret;
6878
6879         *ppos += cnt;
6880
6881         return cnt;
6882 }
6883
6884 static ssize_t
6885 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6886                                 size_t cnt, loff_t *ppos)
6887 {
6888         struct trace_array *tr = filp->private_data;
6889         char buf[64];
6890         int r, cpu;
6891         unsigned long size = 0, expanded_size = 0;
6892
6893         mutex_lock(&trace_types_lock);
6894         for_each_tracing_cpu(cpu) {
6895                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6896                 if (!ring_buffer_expanded)
6897                         expanded_size += trace_buf_size >> 10;
6898         }
6899         if (ring_buffer_expanded)
6900                 r = sprintf(buf, "%lu\n", size);
6901         else
6902                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6903         mutex_unlock(&trace_types_lock);
6904
6905         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6906 }
6907
6908 static ssize_t
6909 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6910                           size_t cnt, loff_t *ppos)
6911 {
6912         /*
6913          * There is no need to read what the user has written, this function
6914          * is just to make sure that there is no error when "echo" is used
6915          */
6916
6917         *ppos += cnt;
6918
6919         return cnt;
6920 }
6921
6922 static int
6923 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6924 {
6925         struct trace_array *tr = inode->i_private;
6926
6927         /* disable tracing ? */
6928         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6929                 tracer_tracing_off(tr);
6930         /* resize the ring buffer to 0 */
6931         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6932
6933         trace_array_put(tr);
6934
6935         return 0;
6936 }
6937
6938 static ssize_t
6939 tracing_mark_write(struct file *filp, const char __user *ubuf,
6940                                         size_t cnt, loff_t *fpos)
6941 {
6942         struct trace_array *tr = filp->private_data;
6943         struct ring_buffer_event *event;
6944         enum event_trigger_type tt = ETT_NONE;
6945         struct trace_buffer *buffer;
6946         struct print_entry *entry;
6947         ssize_t written;
6948         int size;
6949         int len;
6950
6951 /* Used in tracing_mark_raw_write() as well */
6952 #define FAULTED_STR "<faulted>"
6953 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6954
6955         if (tracing_disabled)
6956                 return -EINVAL;
6957
6958         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6959                 return -EINVAL;
6960
6961         if (cnt > TRACE_BUF_SIZE)
6962                 cnt = TRACE_BUF_SIZE;
6963
6964         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6965
6966         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6967
6968         /* If less than "<faulted>", then make sure we can still add that */
6969         if (cnt < FAULTED_SIZE)
6970                 size += FAULTED_SIZE - cnt;
6971
6972         buffer = tr->array_buffer.buffer;
6973         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6974                                             tracing_gen_ctx());
6975         if (unlikely(!event))
6976                 /* Ring buffer disabled, return as if not open for write */
6977                 return -EBADF;
6978
6979         entry = ring_buffer_event_data(event);
6980         entry->ip = _THIS_IP_;
6981
6982         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6983         if (len) {
6984                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6985                 cnt = FAULTED_SIZE;
6986                 written = -EFAULT;
6987         } else
6988                 written = cnt;
6989
6990         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6991                 /* do not add \n before testing triggers, but add \0 */
6992                 entry->buf[cnt] = '\0';
6993                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6994         }
6995
6996         if (entry->buf[cnt - 1] != '\n') {
6997                 entry->buf[cnt] = '\n';
6998                 entry->buf[cnt + 1] = '\0';
6999         } else
7000                 entry->buf[cnt] = '\0';
7001
7002         if (static_branch_unlikely(&trace_marker_exports_enabled))
7003                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7004         __buffer_unlock_commit(buffer, event);
7005
7006         if (tt)
7007                 event_triggers_post_call(tr->trace_marker_file, tt);
7008
7009         if (written > 0)
7010                 *fpos += written;
7011
7012         return written;
7013 }
7014
7015 /* Limit it for now to 3K (including tag) */
7016 #define RAW_DATA_MAX_SIZE (1024*3)
7017
7018 static ssize_t
7019 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7020                                         size_t cnt, loff_t *fpos)
7021 {
7022         struct trace_array *tr = filp->private_data;
7023         struct ring_buffer_event *event;
7024         struct trace_buffer *buffer;
7025         struct raw_data_entry *entry;
7026         ssize_t written;
7027         int size;
7028         int len;
7029
7030 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7031
7032         if (tracing_disabled)
7033                 return -EINVAL;
7034
7035         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7036                 return -EINVAL;
7037
7038         /* The marker must at least have a tag id */
7039         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7040                 return -EINVAL;
7041
7042         if (cnt > TRACE_BUF_SIZE)
7043                 cnt = TRACE_BUF_SIZE;
7044
7045         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7046
7047         size = sizeof(*entry) + cnt;
7048         if (cnt < FAULT_SIZE_ID)
7049                 size += FAULT_SIZE_ID - cnt;
7050
7051         buffer = tr->array_buffer.buffer;
7052         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7053                                             tracing_gen_ctx());
7054         if (!event)
7055                 /* Ring buffer disabled, return as if not open for write */
7056                 return -EBADF;
7057
7058         entry = ring_buffer_event_data(event);
7059
7060         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7061         if (len) {
7062                 entry->id = -1;
7063                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7064                 written = -EFAULT;
7065         } else
7066                 written = cnt;
7067
7068         __buffer_unlock_commit(buffer, event);
7069
7070         if (written > 0)
7071                 *fpos += written;
7072
7073         return written;
7074 }
7075
7076 static int tracing_clock_show(struct seq_file *m, void *v)
7077 {
7078         struct trace_array *tr = m->private;
7079         int i;
7080
7081         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7082                 seq_printf(m,
7083                         "%s%s%s%s", i ? " " : "",
7084                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7085                         i == tr->clock_id ? "]" : "");
7086         seq_putc(m, '\n');
7087
7088         return 0;
7089 }
7090
7091 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7092 {
7093         int i;
7094
7095         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7096                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7097                         break;
7098         }
7099         if (i == ARRAY_SIZE(trace_clocks))
7100                 return -EINVAL;
7101
7102         mutex_lock(&trace_types_lock);
7103
7104         tr->clock_id = i;
7105
7106         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7107
7108         /*
7109          * New clock may not be consistent with the previous clock.
7110          * Reset the buffer so that it doesn't have incomparable timestamps.
7111          */
7112         tracing_reset_online_cpus(&tr->array_buffer);
7113
7114 #ifdef CONFIG_TRACER_MAX_TRACE
7115         if (tr->max_buffer.buffer)
7116                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7117         tracing_reset_online_cpus(&tr->max_buffer);
7118 #endif
7119
7120         mutex_unlock(&trace_types_lock);
7121
7122         return 0;
7123 }
7124
7125 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7126                                    size_t cnt, loff_t *fpos)
7127 {
7128         struct seq_file *m = filp->private_data;
7129         struct trace_array *tr = m->private;
7130         char buf[64];
7131         const char *clockstr;
7132         int ret;
7133
7134         if (cnt >= sizeof(buf))
7135                 return -EINVAL;
7136
7137         if (copy_from_user(buf, ubuf, cnt))
7138                 return -EFAULT;
7139
7140         buf[cnt] = 0;
7141
7142         clockstr = strstrip(buf);
7143
7144         ret = tracing_set_clock(tr, clockstr);
7145         if (ret)
7146                 return ret;
7147
7148         *fpos += cnt;
7149
7150         return cnt;
7151 }
7152
7153 static int tracing_clock_open(struct inode *inode, struct file *file)
7154 {
7155         struct trace_array *tr = inode->i_private;
7156         int ret;
7157
7158         ret = tracing_check_open_get_tr(tr);
7159         if (ret)
7160                 return ret;
7161
7162         ret = single_open(file, tracing_clock_show, inode->i_private);
7163         if (ret < 0)
7164                 trace_array_put(tr);
7165
7166         return ret;
7167 }
7168
7169 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7170 {
7171         struct trace_array *tr = m->private;
7172
7173         mutex_lock(&trace_types_lock);
7174
7175         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7176                 seq_puts(m, "delta [absolute]\n");
7177         else
7178                 seq_puts(m, "[delta] absolute\n");
7179
7180         mutex_unlock(&trace_types_lock);
7181
7182         return 0;
7183 }
7184
7185 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7186 {
7187         struct trace_array *tr = inode->i_private;
7188         int ret;
7189
7190         ret = tracing_check_open_get_tr(tr);
7191         if (ret)
7192                 return ret;
7193
7194         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7195         if (ret < 0)
7196                 trace_array_put(tr);
7197
7198         return ret;
7199 }
7200
7201 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7202 {
7203         if (rbe == this_cpu_read(trace_buffered_event))
7204                 return ring_buffer_time_stamp(buffer);
7205
7206         return ring_buffer_event_time_stamp(buffer, rbe);
7207 }
7208
7209 /*
7210  * Set or disable using the per CPU trace_buffer_event when possible.
7211  */
7212 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7213 {
7214         int ret = 0;
7215
7216         mutex_lock(&trace_types_lock);
7217
7218         if (set && tr->no_filter_buffering_ref++)
7219                 goto out;
7220
7221         if (!set) {
7222                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7223                         ret = -EINVAL;
7224                         goto out;
7225                 }
7226
7227                 --tr->no_filter_buffering_ref;
7228         }
7229  out:
7230         mutex_unlock(&trace_types_lock);
7231
7232         return ret;
7233 }
7234
7235 struct ftrace_buffer_info {
7236         struct trace_iterator   iter;
7237         void                    *spare;
7238         unsigned int            spare_cpu;
7239         unsigned int            read;
7240 };
7241
7242 #ifdef CONFIG_TRACER_SNAPSHOT
7243 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7244 {
7245         struct trace_array *tr = inode->i_private;
7246         struct trace_iterator *iter;
7247         struct seq_file *m;
7248         int ret;
7249
7250         ret = tracing_check_open_get_tr(tr);
7251         if (ret)
7252                 return ret;
7253
7254         if (file->f_mode & FMODE_READ) {
7255                 iter = __tracing_open(inode, file, true);
7256                 if (IS_ERR(iter))
7257                         ret = PTR_ERR(iter);
7258         } else {
7259                 /* Writes still need the seq_file to hold the private data */
7260                 ret = -ENOMEM;
7261                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7262                 if (!m)
7263                         goto out;
7264                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7265                 if (!iter) {
7266                         kfree(m);
7267                         goto out;
7268                 }
7269                 ret = 0;
7270
7271                 iter->tr = tr;
7272                 iter->array_buffer = &tr->max_buffer;
7273                 iter->cpu_file = tracing_get_cpu(inode);
7274                 m->private = iter;
7275                 file->private_data = m;
7276         }
7277 out:
7278         if (ret < 0)
7279                 trace_array_put(tr);
7280
7281         return ret;
7282 }
7283
7284 static ssize_t
7285 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7286                        loff_t *ppos)
7287 {
7288         struct seq_file *m = filp->private_data;
7289         struct trace_iterator *iter = m->private;
7290         struct trace_array *tr = iter->tr;
7291         unsigned long val;
7292         int ret;
7293
7294         ret = tracing_update_buffers();
7295         if (ret < 0)
7296                 return ret;
7297
7298         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7299         if (ret)
7300                 return ret;
7301
7302         mutex_lock(&trace_types_lock);
7303
7304         if (tr->current_trace->use_max_tr) {
7305                 ret = -EBUSY;
7306                 goto out;
7307         }
7308
7309         arch_spin_lock(&tr->max_lock);
7310         if (tr->cond_snapshot)
7311                 ret = -EBUSY;
7312         arch_spin_unlock(&tr->max_lock);
7313         if (ret)
7314                 goto out;
7315
7316         switch (val) {
7317         case 0:
7318                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7319                         ret = -EINVAL;
7320                         break;
7321                 }
7322                 if (tr->allocated_snapshot)
7323                         free_snapshot(tr);
7324                 break;
7325         case 1:
7326 /* Only allow per-cpu swap if the ring buffer supports it */
7327 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7328                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7329                         ret = -EINVAL;
7330                         break;
7331                 }
7332 #endif
7333                 if (tr->allocated_snapshot)
7334                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7335                                         &tr->array_buffer, iter->cpu_file);
7336                 else
7337                         ret = tracing_alloc_snapshot_instance(tr);
7338                 if (ret < 0)
7339                         break;
7340                 local_irq_disable();
7341                 /* Now, we're going to swap */
7342                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7343                         update_max_tr(tr, current, smp_processor_id(), NULL);
7344                 else
7345                         update_max_tr_single(tr, current, iter->cpu_file);
7346                 local_irq_enable();
7347                 break;
7348         default:
7349                 if (tr->allocated_snapshot) {
7350                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7351                                 tracing_reset_online_cpus(&tr->max_buffer);
7352                         else
7353                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7354                 }
7355                 break;
7356         }
7357
7358         if (ret >= 0) {
7359                 *ppos += cnt;
7360                 ret = cnt;
7361         }
7362 out:
7363         mutex_unlock(&trace_types_lock);
7364         return ret;
7365 }
7366
7367 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7368 {
7369         struct seq_file *m = file->private_data;
7370         int ret;
7371
7372         ret = tracing_release(inode, file);
7373
7374         if (file->f_mode & FMODE_READ)
7375                 return ret;
7376
7377         /* If write only, the seq_file is just a stub */
7378         if (m)
7379                 kfree(m->private);
7380         kfree(m);
7381
7382         return 0;
7383 }
7384
7385 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7386 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7387                                     size_t count, loff_t *ppos);
7388 static int tracing_buffers_release(struct inode *inode, struct file *file);
7389 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7390                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7391
7392 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7393 {
7394         struct ftrace_buffer_info *info;
7395         int ret;
7396
7397         /* The following checks for tracefs lockdown */
7398         ret = tracing_buffers_open(inode, filp);
7399         if (ret < 0)
7400                 return ret;
7401
7402         info = filp->private_data;
7403
7404         if (info->iter.trace->use_max_tr) {
7405                 tracing_buffers_release(inode, filp);
7406                 return -EBUSY;
7407         }
7408
7409         info->iter.snapshot = true;
7410         info->iter.array_buffer = &info->iter.tr->max_buffer;
7411
7412         return ret;
7413 }
7414
7415 #endif /* CONFIG_TRACER_SNAPSHOT */
7416
7417
7418 static const struct file_operations tracing_thresh_fops = {
7419         .open           = tracing_open_generic,
7420         .read           = tracing_thresh_read,
7421         .write          = tracing_thresh_write,
7422         .llseek         = generic_file_llseek,
7423 };
7424
7425 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7426 static const struct file_operations tracing_max_lat_fops = {
7427         .open           = tracing_open_generic,
7428         .read           = tracing_max_lat_read,
7429         .write          = tracing_max_lat_write,
7430         .llseek         = generic_file_llseek,
7431 };
7432 #endif
7433
7434 static const struct file_operations set_tracer_fops = {
7435         .open           = tracing_open_generic,
7436         .read           = tracing_set_trace_read,
7437         .write          = tracing_set_trace_write,
7438         .llseek         = generic_file_llseek,
7439 };
7440
7441 static const struct file_operations tracing_pipe_fops = {
7442         .open           = tracing_open_pipe,
7443         .poll           = tracing_poll_pipe,
7444         .read           = tracing_read_pipe,
7445         .splice_read    = tracing_splice_read_pipe,
7446         .release        = tracing_release_pipe,
7447         .llseek         = no_llseek,
7448 };
7449
7450 static const struct file_operations tracing_entries_fops = {
7451         .open           = tracing_open_generic_tr,
7452         .read           = tracing_entries_read,
7453         .write          = tracing_entries_write,
7454         .llseek         = generic_file_llseek,
7455         .release        = tracing_release_generic_tr,
7456 };
7457
7458 static const struct file_operations tracing_total_entries_fops = {
7459         .open           = tracing_open_generic_tr,
7460         .read           = tracing_total_entries_read,
7461         .llseek         = generic_file_llseek,
7462         .release        = tracing_release_generic_tr,
7463 };
7464
7465 static const struct file_operations tracing_free_buffer_fops = {
7466         .open           = tracing_open_generic_tr,
7467         .write          = tracing_free_buffer_write,
7468         .release        = tracing_free_buffer_release,
7469 };
7470
7471 static const struct file_operations tracing_mark_fops = {
7472         .open           = tracing_open_generic_tr,
7473         .write          = tracing_mark_write,
7474         .llseek         = generic_file_llseek,
7475         .release        = tracing_release_generic_tr,
7476 };
7477
7478 static const struct file_operations tracing_mark_raw_fops = {
7479         .open           = tracing_open_generic_tr,
7480         .write          = tracing_mark_raw_write,
7481         .llseek         = generic_file_llseek,
7482         .release        = tracing_release_generic_tr,
7483 };
7484
7485 static const struct file_operations trace_clock_fops = {
7486         .open           = tracing_clock_open,
7487         .read           = seq_read,
7488         .llseek         = seq_lseek,
7489         .release        = tracing_single_release_tr,
7490         .write          = tracing_clock_write,
7491 };
7492
7493 static const struct file_operations trace_time_stamp_mode_fops = {
7494         .open           = tracing_time_stamp_mode_open,
7495         .read           = seq_read,
7496         .llseek         = seq_lseek,
7497         .release        = tracing_single_release_tr,
7498 };
7499
7500 #ifdef CONFIG_TRACER_SNAPSHOT
7501 static const struct file_operations snapshot_fops = {
7502         .open           = tracing_snapshot_open,
7503         .read           = seq_read,
7504         .write          = tracing_snapshot_write,
7505         .llseek         = tracing_lseek,
7506         .release        = tracing_snapshot_release,
7507 };
7508
7509 static const struct file_operations snapshot_raw_fops = {
7510         .open           = snapshot_raw_open,
7511         .read           = tracing_buffers_read,
7512         .release        = tracing_buffers_release,
7513         .splice_read    = tracing_buffers_splice_read,
7514         .llseek         = no_llseek,
7515 };
7516
7517 #endif /* CONFIG_TRACER_SNAPSHOT */
7518
7519 #define TRACING_LOG_ERRS_MAX    8
7520 #define TRACING_LOG_LOC_MAX     128
7521
7522 #define CMD_PREFIX "  Command: "
7523
7524 struct err_info {
7525         const char      **errs; /* ptr to loc-specific array of err strings */
7526         u8              type;   /* index into errs -> specific err string */
7527         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7528         u64             ts;
7529 };
7530
7531 struct tracing_log_err {
7532         struct list_head        list;
7533         struct err_info         info;
7534         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7535         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7536 };
7537
7538 static DEFINE_MUTEX(tracing_err_log_lock);
7539
7540 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7541 {
7542         struct tracing_log_err *err;
7543
7544         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7545                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7546                 if (!err)
7547                         err = ERR_PTR(-ENOMEM);
7548                 tr->n_err_log_entries++;
7549
7550                 return err;
7551         }
7552
7553         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7554         list_del(&err->list);
7555
7556         return err;
7557 }
7558
7559 /**
7560  * err_pos - find the position of a string within a command for error careting
7561  * @cmd: The tracing command that caused the error
7562  * @str: The string to position the caret at within @cmd
7563  *
7564  * Finds the position of the first occurrence of @str within @cmd.  The
7565  * return value can be passed to tracing_log_err() for caret placement
7566  * within @cmd.
7567  *
7568  * Returns the index within @cmd of the first occurrence of @str or 0
7569  * if @str was not found.
7570  */
7571 unsigned int err_pos(char *cmd, const char *str)
7572 {
7573         char *found;
7574
7575         if (WARN_ON(!strlen(cmd)))
7576                 return 0;
7577
7578         found = strstr(cmd, str);
7579         if (found)
7580                 return found - cmd;
7581
7582         return 0;
7583 }
7584
7585 /**
7586  * tracing_log_err - write an error to the tracing error log
7587  * @tr: The associated trace array for the error (NULL for top level array)
7588  * @loc: A string describing where the error occurred
7589  * @cmd: The tracing command that caused the error
7590  * @errs: The array of loc-specific static error strings
7591  * @type: The index into errs[], which produces the specific static err string
7592  * @pos: The position the caret should be placed in the cmd
7593  *
7594  * Writes an error into tracing/error_log of the form:
7595  *
7596  * <loc>: error: <text>
7597  *   Command: <cmd>
7598  *              ^
7599  *
7600  * tracing/error_log is a small log file containing the last
7601  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7602  * unless there has been a tracing error, and the error log can be
7603  * cleared and have its memory freed by writing the empty string in
7604  * truncation mode to it i.e. echo > tracing/error_log.
7605  *
7606  * NOTE: the @errs array along with the @type param are used to
7607  * produce a static error string - this string is not copied and saved
7608  * when the error is logged - only a pointer to it is saved.  See
7609  * existing callers for examples of how static strings are typically
7610  * defined for use with tracing_log_err().
7611  */
7612 void tracing_log_err(struct trace_array *tr,
7613                      const char *loc, const char *cmd,
7614                      const char **errs, u8 type, u8 pos)
7615 {
7616         struct tracing_log_err *err;
7617
7618         if (!tr)
7619                 tr = &global_trace;
7620
7621         mutex_lock(&tracing_err_log_lock);
7622         err = get_tracing_log_err(tr);
7623         if (PTR_ERR(err) == -ENOMEM) {
7624                 mutex_unlock(&tracing_err_log_lock);
7625                 return;
7626         }
7627
7628         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7629         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7630
7631         err->info.errs = errs;
7632         err->info.type = type;
7633         err->info.pos = pos;
7634         err->info.ts = local_clock();
7635
7636         list_add_tail(&err->list, &tr->err_log);
7637         mutex_unlock(&tracing_err_log_lock);
7638 }
7639
7640 static void clear_tracing_err_log(struct trace_array *tr)
7641 {
7642         struct tracing_log_err *err, *next;
7643
7644         mutex_lock(&tracing_err_log_lock);
7645         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7646                 list_del(&err->list);
7647                 kfree(err);
7648         }
7649
7650         tr->n_err_log_entries = 0;
7651         mutex_unlock(&tracing_err_log_lock);
7652 }
7653
7654 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7655 {
7656         struct trace_array *tr = m->private;
7657
7658         mutex_lock(&tracing_err_log_lock);
7659
7660         return seq_list_start(&tr->err_log, *pos);
7661 }
7662
7663 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7664 {
7665         struct trace_array *tr = m->private;
7666
7667         return seq_list_next(v, &tr->err_log, pos);
7668 }
7669
7670 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7671 {
7672         mutex_unlock(&tracing_err_log_lock);
7673 }
7674
7675 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7676 {
7677         u8 i;
7678
7679         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7680                 seq_putc(m, ' ');
7681         for (i = 0; i < pos; i++)
7682                 seq_putc(m, ' ');
7683         seq_puts(m, "^\n");
7684 }
7685
7686 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7687 {
7688         struct tracing_log_err *err = v;
7689
7690         if (err) {
7691                 const char *err_text = err->info.errs[err->info.type];
7692                 u64 sec = err->info.ts;
7693                 u32 nsec;
7694
7695                 nsec = do_div(sec, NSEC_PER_SEC);
7696                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7697                            err->loc, err_text);
7698                 seq_printf(m, "%s", err->cmd);
7699                 tracing_err_log_show_pos(m, err->info.pos);
7700         }
7701
7702         return 0;
7703 }
7704
7705 static const struct seq_operations tracing_err_log_seq_ops = {
7706         .start  = tracing_err_log_seq_start,
7707         .next   = tracing_err_log_seq_next,
7708         .stop   = tracing_err_log_seq_stop,
7709         .show   = tracing_err_log_seq_show
7710 };
7711
7712 static int tracing_err_log_open(struct inode *inode, struct file *file)
7713 {
7714         struct trace_array *tr = inode->i_private;
7715         int ret = 0;
7716
7717         ret = tracing_check_open_get_tr(tr);
7718         if (ret)
7719                 return ret;
7720
7721         /* If this file was opened for write, then erase contents */
7722         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7723                 clear_tracing_err_log(tr);
7724
7725         if (file->f_mode & FMODE_READ) {
7726                 ret = seq_open(file, &tracing_err_log_seq_ops);
7727                 if (!ret) {
7728                         struct seq_file *m = file->private_data;
7729                         m->private = tr;
7730                 } else {
7731                         trace_array_put(tr);
7732                 }
7733         }
7734         return ret;
7735 }
7736
7737 static ssize_t tracing_err_log_write(struct file *file,
7738                                      const char __user *buffer,
7739                                      size_t count, loff_t *ppos)
7740 {
7741         return count;
7742 }
7743
7744 static int tracing_err_log_release(struct inode *inode, struct file *file)
7745 {
7746         struct trace_array *tr = inode->i_private;
7747
7748         trace_array_put(tr);
7749
7750         if (file->f_mode & FMODE_READ)
7751                 seq_release(inode, file);
7752
7753         return 0;
7754 }
7755
7756 static const struct file_operations tracing_err_log_fops = {
7757         .open           = tracing_err_log_open,
7758         .write          = tracing_err_log_write,
7759         .read           = seq_read,
7760         .llseek         = seq_lseek,
7761         .release        = tracing_err_log_release,
7762 };
7763
7764 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7765 {
7766         struct trace_array *tr = inode->i_private;
7767         struct ftrace_buffer_info *info;
7768         int ret;
7769
7770         ret = tracing_check_open_get_tr(tr);
7771         if (ret)
7772                 return ret;
7773
7774         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7775         if (!info) {
7776                 trace_array_put(tr);
7777                 return -ENOMEM;
7778         }
7779
7780         mutex_lock(&trace_types_lock);
7781
7782         info->iter.tr           = tr;
7783         info->iter.cpu_file     = tracing_get_cpu(inode);
7784         info->iter.trace        = tr->current_trace;
7785         info->iter.array_buffer = &tr->array_buffer;
7786         info->spare             = NULL;
7787         /* Force reading ring buffer for first read */
7788         info->read              = (unsigned int)-1;
7789
7790         filp->private_data = info;
7791
7792         tr->trace_ref++;
7793
7794         mutex_unlock(&trace_types_lock);
7795
7796         ret = nonseekable_open(inode, filp);
7797         if (ret < 0)
7798                 trace_array_put(tr);
7799
7800         return ret;
7801 }
7802
7803 static __poll_t
7804 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7805 {
7806         struct ftrace_buffer_info *info = filp->private_data;
7807         struct trace_iterator *iter = &info->iter;
7808
7809         return trace_poll(iter, filp, poll_table);
7810 }
7811
7812 static ssize_t
7813 tracing_buffers_read(struct file *filp, char __user *ubuf,
7814                      size_t count, loff_t *ppos)
7815 {
7816         struct ftrace_buffer_info *info = filp->private_data;
7817         struct trace_iterator *iter = &info->iter;
7818         ssize_t ret = 0;
7819         ssize_t size;
7820
7821         if (!count)
7822                 return 0;
7823
7824 #ifdef CONFIG_TRACER_MAX_TRACE
7825         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7826                 return -EBUSY;
7827 #endif
7828
7829         if (!info->spare) {
7830                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7831                                                           iter->cpu_file);
7832                 if (IS_ERR(info->spare)) {
7833                         ret = PTR_ERR(info->spare);
7834                         info->spare = NULL;
7835                 } else {
7836                         info->spare_cpu = iter->cpu_file;
7837                 }
7838         }
7839         if (!info->spare)
7840                 return ret;
7841
7842         /* Do we have previous read data to read? */
7843         if (info->read < PAGE_SIZE)
7844                 goto read;
7845
7846  again:
7847         trace_access_lock(iter->cpu_file);
7848         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7849                                     &info->spare,
7850                                     count,
7851                                     iter->cpu_file, 0);
7852         trace_access_unlock(iter->cpu_file);
7853
7854         if (ret < 0) {
7855                 if (trace_empty(iter)) {
7856                         if ((filp->f_flags & O_NONBLOCK))
7857                                 return -EAGAIN;
7858
7859                         ret = wait_on_pipe(iter, 0);
7860                         if (ret)
7861                                 return ret;
7862
7863                         goto again;
7864                 }
7865                 return 0;
7866         }
7867
7868         info->read = 0;
7869  read:
7870         size = PAGE_SIZE - info->read;
7871         if (size > count)
7872                 size = count;
7873
7874         ret = copy_to_user(ubuf, info->spare + info->read, size);
7875         if (ret == size)
7876                 return -EFAULT;
7877
7878         size -= ret;
7879
7880         *ppos += size;
7881         info->read += size;
7882
7883         return size;
7884 }
7885
7886 static int tracing_buffers_release(struct inode *inode, struct file *file)
7887 {
7888         struct ftrace_buffer_info *info = file->private_data;
7889         struct trace_iterator *iter = &info->iter;
7890
7891         mutex_lock(&trace_types_lock);
7892
7893         iter->tr->trace_ref--;
7894
7895         __trace_array_put(iter->tr);
7896
7897         if (info->spare)
7898                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7899                                            info->spare_cpu, info->spare);
7900         kvfree(info);
7901
7902         mutex_unlock(&trace_types_lock);
7903
7904         return 0;
7905 }
7906
7907 struct buffer_ref {
7908         struct trace_buffer     *buffer;
7909         void                    *page;
7910         int                     cpu;
7911         refcount_t              refcount;
7912 };
7913
7914 static void buffer_ref_release(struct buffer_ref *ref)
7915 {
7916         if (!refcount_dec_and_test(&ref->refcount))
7917                 return;
7918         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7919         kfree(ref);
7920 }
7921
7922 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7923                                     struct pipe_buffer *buf)
7924 {
7925         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7926
7927         buffer_ref_release(ref);
7928         buf->private = 0;
7929 }
7930
7931 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7932                                 struct pipe_buffer *buf)
7933 {
7934         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7935
7936         if (refcount_read(&ref->refcount) > INT_MAX/2)
7937                 return false;
7938
7939         refcount_inc(&ref->refcount);
7940         return true;
7941 }
7942
7943 /* Pipe buffer operations for a buffer. */
7944 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7945         .release                = buffer_pipe_buf_release,
7946         .get                    = buffer_pipe_buf_get,
7947 };
7948
7949 /*
7950  * Callback from splice_to_pipe(), if we need to release some pages
7951  * at the end of the spd in case we error'ed out in filling the pipe.
7952  */
7953 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7954 {
7955         struct buffer_ref *ref =
7956                 (struct buffer_ref *)spd->partial[i].private;
7957
7958         buffer_ref_release(ref);
7959         spd->partial[i].private = 0;
7960 }
7961
7962 static ssize_t
7963 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7964                             struct pipe_inode_info *pipe, size_t len,
7965                             unsigned int flags)
7966 {
7967         struct ftrace_buffer_info *info = file->private_data;
7968         struct trace_iterator *iter = &info->iter;
7969         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7970         struct page *pages_def[PIPE_DEF_BUFFERS];
7971         struct splice_pipe_desc spd = {
7972                 .pages          = pages_def,
7973                 .partial        = partial_def,
7974                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7975                 .ops            = &buffer_pipe_buf_ops,
7976                 .spd_release    = buffer_spd_release,
7977         };
7978         struct buffer_ref *ref;
7979         int entries, i;
7980         ssize_t ret = 0;
7981
7982 #ifdef CONFIG_TRACER_MAX_TRACE
7983         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7984                 return -EBUSY;
7985 #endif
7986
7987         if (*ppos & (PAGE_SIZE - 1))
7988                 return -EINVAL;
7989
7990         if (len & (PAGE_SIZE - 1)) {
7991                 if (len < PAGE_SIZE)
7992                         return -EINVAL;
7993                 len &= PAGE_MASK;
7994         }
7995
7996         if (splice_grow_spd(pipe, &spd))
7997                 return -ENOMEM;
7998
7999  again:
8000         trace_access_lock(iter->cpu_file);
8001         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8002
8003         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8004                 struct page *page;
8005                 int r;
8006
8007                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8008                 if (!ref) {
8009                         ret = -ENOMEM;
8010                         break;
8011                 }
8012
8013                 refcount_set(&ref->refcount, 1);
8014                 ref->buffer = iter->array_buffer->buffer;
8015                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8016                 if (IS_ERR(ref->page)) {
8017                         ret = PTR_ERR(ref->page);
8018                         ref->page = NULL;
8019                         kfree(ref);
8020                         break;
8021                 }
8022                 ref->cpu = iter->cpu_file;
8023
8024                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8025                                           len, iter->cpu_file, 1);
8026                 if (r < 0) {
8027                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8028                                                    ref->page);
8029                         kfree(ref);
8030                         break;
8031                 }
8032
8033                 page = virt_to_page(ref->page);
8034
8035                 spd.pages[i] = page;
8036                 spd.partial[i].len = PAGE_SIZE;
8037                 spd.partial[i].offset = 0;
8038                 spd.partial[i].private = (unsigned long)ref;
8039                 spd.nr_pages++;
8040                 *ppos += PAGE_SIZE;
8041
8042                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8043         }
8044
8045         trace_access_unlock(iter->cpu_file);
8046         spd.nr_pages = i;
8047
8048         /* did we read anything? */
8049         if (!spd.nr_pages) {
8050                 if (ret)
8051                         goto out;
8052
8053                 ret = -EAGAIN;
8054                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8055                         goto out;
8056
8057                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8058                 if (ret)
8059                         goto out;
8060
8061                 goto again;
8062         }
8063
8064         ret = splice_to_pipe(pipe, &spd);
8065 out:
8066         splice_shrink_spd(&spd);
8067
8068         return ret;
8069 }
8070
8071 static const struct file_operations tracing_buffers_fops = {
8072         .open           = tracing_buffers_open,
8073         .read           = tracing_buffers_read,
8074         .poll           = tracing_buffers_poll,
8075         .release        = tracing_buffers_release,
8076         .splice_read    = tracing_buffers_splice_read,
8077         .llseek         = no_llseek,
8078 };
8079
8080 static ssize_t
8081 tracing_stats_read(struct file *filp, char __user *ubuf,
8082                    size_t count, loff_t *ppos)
8083 {
8084         struct inode *inode = file_inode(filp);
8085         struct trace_array *tr = inode->i_private;
8086         struct array_buffer *trace_buf = &tr->array_buffer;
8087         int cpu = tracing_get_cpu(inode);
8088         struct trace_seq *s;
8089         unsigned long cnt;
8090         unsigned long long t;
8091         unsigned long usec_rem;
8092
8093         s = kmalloc(sizeof(*s), GFP_KERNEL);
8094         if (!s)
8095                 return -ENOMEM;
8096
8097         trace_seq_init(s);
8098
8099         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8100         trace_seq_printf(s, "entries: %ld\n", cnt);
8101
8102         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8103         trace_seq_printf(s, "overrun: %ld\n", cnt);
8104
8105         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8106         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8107
8108         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8109         trace_seq_printf(s, "bytes: %ld\n", cnt);
8110
8111         if (trace_clocks[tr->clock_id].in_ns) {
8112                 /* local or global for trace_clock */
8113                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8114                 usec_rem = do_div(t, USEC_PER_SEC);
8115                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8116                                                                 t, usec_rem);
8117
8118                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8119                 usec_rem = do_div(t, USEC_PER_SEC);
8120                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8121         } else {
8122                 /* counter or tsc mode for trace_clock */
8123                 trace_seq_printf(s, "oldest event ts: %llu\n",
8124                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8125
8126                 trace_seq_printf(s, "now ts: %llu\n",
8127                                 ring_buffer_time_stamp(trace_buf->buffer));
8128         }
8129
8130         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8131         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8132
8133         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8134         trace_seq_printf(s, "read events: %ld\n", cnt);
8135
8136         count = simple_read_from_buffer(ubuf, count, ppos,
8137                                         s->buffer, trace_seq_used(s));
8138
8139         kfree(s);
8140
8141         return count;
8142 }
8143
8144 static const struct file_operations tracing_stats_fops = {
8145         .open           = tracing_open_generic_tr,
8146         .read           = tracing_stats_read,
8147         .llseek         = generic_file_llseek,
8148         .release        = tracing_release_generic_tr,
8149 };
8150
8151 #ifdef CONFIG_DYNAMIC_FTRACE
8152
8153 static ssize_t
8154 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8155                   size_t cnt, loff_t *ppos)
8156 {
8157         ssize_t ret;
8158         char *buf;
8159         int r;
8160
8161         /* 256 should be plenty to hold the amount needed */
8162         buf = kmalloc(256, GFP_KERNEL);
8163         if (!buf)
8164                 return -ENOMEM;
8165
8166         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8167                       ftrace_update_tot_cnt,
8168                       ftrace_number_of_pages,
8169                       ftrace_number_of_groups);
8170
8171         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8172         kfree(buf);
8173         return ret;
8174 }
8175
8176 static const struct file_operations tracing_dyn_info_fops = {
8177         .open           = tracing_open_generic,
8178         .read           = tracing_read_dyn_info,
8179         .llseek         = generic_file_llseek,
8180 };
8181 #endif /* CONFIG_DYNAMIC_FTRACE */
8182
8183 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8184 static void
8185 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8186                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8187                 void *data)
8188 {
8189         tracing_snapshot_instance(tr);
8190 }
8191
8192 static void
8193 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8194                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8195                       void *data)
8196 {
8197         struct ftrace_func_mapper *mapper = data;
8198         long *count = NULL;
8199
8200         if (mapper)
8201                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8202
8203         if (count) {
8204
8205                 if (*count <= 0)
8206                         return;
8207
8208                 (*count)--;
8209         }
8210
8211         tracing_snapshot_instance(tr);
8212 }
8213
8214 static int
8215 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8216                       struct ftrace_probe_ops *ops, void *data)
8217 {
8218         struct ftrace_func_mapper *mapper = data;
8219         long *count = NULL;
8220
8221         seq_printf(m, "%ps:", (void *)ip);
8222
8223         seq_puts(m, "snapshot");
8224
8225         if (mapper)
8226                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8227
8228         if (count)
8229                 seq_printf(m, ":count=%ld\n", *count);
8230         else
8231                 seq_puts(m, ":unlimited\n");
8232
8233         return 0;
8234 }
8235
8236 static int
8237 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8238                      unsigned long ip, void *init_data, void **data)
8239 {
8240         struct ftrace_func_mapper *mapper = *data;
8241
8242         if (!mapper) {
8243                 mapper = allocate_ftrace_func_mapper();
8244                 if (!mapper)
8245                         return -ENOMEM;
8246                 *data = mapper;
8247         }
8248
8249         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8250 }
8251
8252 static void
8253 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8254                      unsigned long ip, void *data)
8255 {
8256         struct ftrace_func_mapper *mapper = data;
8257
8258         if (!ip) {
8259                 if (!mapper)
8260                         return;
8261                 free_ftrace_func_mapper(mapper, NULL);
8262                 return;
8263         }
8264
8265         ftrace_func_mapper_remove_ip(mapper, ip);
8266 }
8267
8268 static struct ftrace_probe_ops snapshot_probe_ops = {
8269         .func                   = ftrace_snapshot,
8270         .print                  = ftrace_snapshot_print,
8271 };
8272
8273 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8274         .func                   = ftrace_count_snapshot,
8275         .print                  = ftrace_snapshot_print,
8276         .init                   = ftrace_snapshot_init,
8277         .free                   = ftrace_snapshot_free,
8278 };
8279
8280 static int
8281 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8282                                char *glob, char *cmd, char *param, int enable)
8283 {
8284         struct ftrace_probe_ops *ops;
8285         void *count = (void *)-1;
8286         char *number;
8287         int ret;
8288
8289         if (!tr)
8290                 return -ENODEV;
8291
8292         /* hash funcs only work with set_ftrace_filter */
8293         if (!enable)
8294                 return -EINVAL;
8295
8296         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8297
8298         if (glob[0] == '!')
8299                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8300
8301         if (!param)
8302                 goto out_reg;
8303
8304         number = strsep(&param, ":");
8305
8306         if (!strlen(number))
8307                 goto out_reg;
8308
8309         /*
8310          * We use the callback data field (which is a pointer)
8311          * as our counter.
8312          */
8313         ret = kstrtoul(number, 0, (unsigned long *)&count);
8314         if (ret)
8315                 return ret;
8316
8317  out_reg:
8318         ret = tracing_alloc_snapshot_instance(tr);
8319         if (ret < 0)
8320                 goto out;
8321
8322         ret = register_ftrace_function_probe(glob, tr, ops, count);
8323
8324  out:
8325         return ret < 0 ? ret : 0;
8326 }
8327
8328 static struct ftrace_func_command ftrace_snapshot_cmd = {
8329         .name                   = "snapshot",
8330         .func                   = ftrace_trace_snapshot_callback,
8331 };
8332
8333 static __init int register_snapshot_cmd(void)
8334 {
8335         return register_ftrace_command(&ftrace_snapshot_cmd);
8336 }
8337 #else
8338 static inline __init int register_snapshot_cmd(void) { return 0; }
8339 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8340
8341 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8342 {
8343         if (WARN_ON(!tr->dir))
8344                 return ERR_PTR(-ENODEV);
8345
8346         /* Top directory uses NULL as the parent */
8347         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8348                 return NULL;
8349
8350         /* All sub buffers have a descriptor */
8351         return tr->dir;
8352 }
8353
8354 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8355 {
8356         struct dentry *d_tracer;
8357
8358         if (tr->percpu_dir)
8359                 return tr->percpu_dir;
8360
8361         d_tracer = tracing_get_dentry(tr);
8362         if (IS_ERR(d_tracer))
8363                 return NULL;
8364
8365         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8366
8367         MEM_FAIL(!tr->percpu_dir,
8368                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8369
8370         return tr->percpu_dir;
8371 }
8372
8373 static struct dentry *
8374 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8375                       void *data, long cpu, const struct file_operations *fops)
8376 {
8377         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8378
8379         if (ret) /* See tracing_get_cpu() */
8380                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8381         return ret;
8382 }
8383
8384 static void
8385 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8386 {
8387         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8388         struct dentry *d_cpu;
8389         char cpu_dir[30]; /* 30 characters should be more than enough */
8390
8391         if (!d_percpu)
8392                 return;
8393
8394         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8395         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8396         if (!d_cpu) {
8397                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8398                 return;
8399         }
8400
8401         /* per cpu trace_pipe */
8402         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8403                                 tr, cpu, &tracing_pipe_fops);
8404
8405         /* per cpu trace */
8406         trace_create_cpu_file("trace", 0644, d_cpu,
8407                                 tr, cpu, &tracing_fops);
8408
8409         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8410                                 tr, cpu, &tracing_buffers_fops);
8411
8412         trace_create_cpu_file("stats", 0444, d_cpu,
8413                                 tr, cpu, &tracing_stats_fops);
8414
8415         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8416                                 tr, cpu, &tracing_entries_fops);
8417
8418 #ifdef CONFIG_TRACER_SNAPSHOT
8419         trace_create_cpu_file("snapshot", 0644, d_cpu,
8420                                 tr, cpu, &snapshot_fops);
8421
8422         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8423                                 tr, cpu, &snapshot_raw_fops);
8424 #endif
8425 }
8426
8427 #ifdef CONFIG_FTRACE_SELFTEST
8428 /* Let selftest have access to static functions in this file */
8429 #include "trace_selftest.c"
8430 #endif
8431
8432 static ssize_t
8433 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8434                         loff_t *ppos)
8435 {
8436         struct trace_option_dentry *topt = filp->private_data;
8437         char *buf;
8438
8439         if (topt->flags->val & topt->opt->bit)
8440                 buf = "1\n";
8441         else
8442                 buf = "0\n";
8443
8444         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8445 }
8446
8447 static ssize_t
8448 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8449                          loff_t *ppos)
8450 {
8451         struct trace_option_dentry *topt = filp->private_data;
8452         unsigned long val;
8453         int ret;
8454
8455         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8456         if (ret)
8457                 return ret;
8458
8459         if (val != 0 && val != 1)
8460                 return -EINVAL;
8461
8462         if (!!(topt->flags->val & topt->opt->bit) != val) {
8463                 mutex_lock(&trace_types_lock);
8464                 ret = __set_tracer_option(topt->tr, topt->flags,
8465                                           topt->opt, !val);
8466                 mutex_unlock(&trace_types_lock);
8467                 if (ret)
8468                         return ret;
8469         }
8470
8471         *ppos += cnt;
8472
8473         return cnt;
8474 }
8475
8476
8477 static const struct file_operations trace_options_fops = {
8478         .open = tracing_open_generic,
8479         .read = trace_options_read,
8480         .write = trace_options_write,
8481         .llseek = generic_file_llseek,
8482 };
8483
8484 /*
8485  * In order to pass in both the trace_array descriptor as well as the index
8486  * to the flag that the trace option file represents, the trace_array
8487  * has a character array of trace_flags_index[], which holds the index
8488  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8489  * The address of this character array is passed to the flag option file
8490  * read/write callbacks.
8491  *
8492  * In order to extract both the index and the trace_array descriptor,
8493  * get_tr_index() uses the following algorithm.
8494  *
8495  *   idx = *ptr;
8496  *
8497  * As the pointer itself contains the address of the index (remember
8498  * index[1] == 1).
8499  *
8500  * Then to get the trace_array descriptor, by subtracting that index
8501  * from the ptr, we get to the start of the index itself.
8502  *
8503  *   ptr - idx == &index[0]
8504  *
8505  * Then a simple container_of() from that pointer gets us to the
8506  * trace_array descriptor.
8507  */
8508 static void get_tr_index(void *data, struct trace_array **ptr,
8509                          unsigned int *pindex)
8510 {
8511         *pindex = *(unsigned char *)data;
8512
8513         *ptr = container_of(data - *pindex, struct trace_array,
8514                             trace_flags_index);
8515 }
8516
8517 static ssize_t
8518 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8519                         loff_t *ppos)
8520 {
8521         void *tr_index = filp->private_data;
8522         struct trace_array *tr;
8523         unsigned int index;
8524         char *buf;
8525
8526         get_tr_index(tr_index, &tr, &index);
8527
8528         if (tr->trace_flags & (1 << index))
8529                 buf = "1\n";
8530         else
8531                 buf = "0\n";
8532
8533         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8534 }
8535
8536 static ssize_t
8537 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8538                          loff_t *ppos)
8539 {
8540         void *tr_index = filp->private_data;
8541         struct trace_array *tr;
8542         unsigned int index;
8543         unsigned long val;
8544         int ret;
8545
8546         get_tr_index(tr_index, &tr, &index);
8547
8548         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8549         if (ret)
8550                 return ret;
8551
8552         if (val != 0 && val != 1)
8553                 return -EINVAL;
8554
8555         mutex_lock(&event_mutex);
8556         mutex_lock(&trace_types_lock);
8557         ret = set_tracer_flag(tr, 1 << index, val);
8558         mutex_unlock(&trace_types_lock);
8559         mutex_unlock(&event_mutex);
8560
8561         if (ret < 0)
8562                 return ret;
8563
8564         *ppos += cnt;
8565
8566         return cnt;
8567 }
8568
8569 static const struct file_operations trace_options_core_fops = {
8570         .open = tracing_open_generic,
8571         .read = trace_options_core_read,
8572         .write = trace_options_core_write,
8573         .llseek = generic_file_llseek,
8574 };
8575
8576 struct dentry *trace_create_file(const char *name,
8577                                  umode_t mode,
8578                                  struct dentry *parent,
8579                                  void *data,
8580                                  const struct file_operations *fops)
8581 {
8582         struct dentry *ret;
8583
8584         ret = tracefs_create_file(name, mode, parent, data, fops);
8585         if (!ret)
8586                 pr_warn("Could not create tracefs '%s' entry\n", name);
8587
8588         return ret;
8589 }
8590
8591
8592 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8593 {
8594         struct dentry *d_tracer;
8595
8596         if (tr->options)
8597                 return tr->options;
8598
8599         d_tracer = tracing_get_dentry(tr);
8600         if (IS_ERR(d_tracer))
8601                 return NULL;
8602
8603         tr->options = tracefs_create_dir("options", d_tracer);
8604         if (!tr->options) {
8605                 pr_warn("Could not create tracefs directory 'options'\n");
8606                 return NULL;
8607         }
8608
8609         return tr->options;
8610 }
8611
8612 static void
8613 create_trace_option_file(struct trace_array *tr,
8614                          struct trace_option_dentry *topt,
8615                          struct tracer_flags *flags,
8616                          struct tracer_opt *opt)
8617 {
8618         struct dentry *t_options;
8619
8620         t_options = trace_options_init_dentry(tr);
8621         if (!t_options)
8622                 return;
8623
8624         topt->flags = flags;
8625         topt->opt = opt;
8626         topt->tr = tr;
8627
8628         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8629                                     &trace_options_fops);
8630
8631 }
8632
8633 static void
8634 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8635 {
8636         struct trace_option_dentry *topts;
8637         struct trace_options *tr_topts;
8638         struct tracer_flags *flags;
8639         struct tracer_opt *opts;
8640         int cnt;
8641         int i;
8642
8643         if (!tracer)
8644                 return;
8645
8646         flags = tracer->flags;
8647
8648         if (!flags || !flags->opts)
8649                 return;
8650
8651         /*
8652          * If this is an instance, only create flags for tracers
8653          * the instance may have.
8654          */
8655         if (!trace_ok_for_array(tracer, tr))
8656                 return;
8657
8658         for (i = 0; i < tr->nr_topts; i++) {
8659                 /* Make sure there's no duplicate flags. */
8660                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8661                         return;
8662         }
8663
8664         opts = flags->opts;
8665
8666         for (cnt = 0; opts[cnt].name; cnt++)
8667                 ;
8668
8669         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8670         if (!topts)
8671                 return;
8672
8673         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8674                             GFP_KERNEL);
8675         if (!tr_topts) {
8676                 kfree(topts);
8677                 return;
8678         }
8679
8680         tr->topts = tr_topts;
8681         tr->topts[tr->nr_topts].tracer = tracer;
8682         tr->topts[tr->nr_topts].topts = topts;
8683         tr->nr_topts++;
8684
8685         for (cnt = 0; opts[cnt].name; cnt++) {
8686                 create_trace_option_file(tr, &topts[cnt], flags,
8687                                          &opts[cnt]);
8688                 MEM_FAIL(topts[cnt].entry == NULL,
8689                           "Failed to create trace option: %s",
8690                           opts[cnt].name);
8691         }
8692 }
8693
8694 static struct dentry *
8695 create_trace_option_core_file(struct trace_array *tr,
8696                               const char *option, long index)
8697 {
8698         struct dentry *t_options;
8699
8700         t_options = trace_options_init_dentry(tr);
8701         if (!t_options)
8702                 return NULL;
8703
8704         return trace_create_file(option, 0644, t_options,
8705                                  (void *)&tr->trace_flags_index[index],
8706                                  &trace_options_core_fops);
8707 }
8708
8709 static void create_trace_options_dir(struct trace_array *tr)
8710 {
8711         struct dentry *t_options;
8712         bool top_level = tr == &global_trace;
8713         int i;
8714
8715         t_options = trace_options_init_dentry(tr);
8716         if (!t_options)
8717                 return;
8718
8719         for (i = 0; trace_options[i]; i++) {
8720                 if (top_level ||
8721                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8722                         create_trace_option_core_file(tr, trace_options[i], i);
8723         }
8724 }
8725
8726 static ssize_t
8727 rb_simple_read(struct file *filp, char __user *ubuf,
8728                size_t cnt, loff_t *ppos)
8729 {
8730         struct trace_array *tr = filp->private_data;
8731         char buf[64];
8732         int r;
8733
8734         r = tracer_tracing_is_on(tr);
8735         r = sprintf(buf, "%d\n", r);
8736
8737         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8738 }
8739
8740 static ssize_t
8741 rb_simple_write(struct file *filp, const char __user *ubuf,
8742                 size_t cnt, loff_t *ppos)
8743 {
8744         struct trace_array *tr = filp->private_data;
8745         struct trace_buffer *buffer = tr->array_buffer.buffer;
8746         unsigned long val;
8747         int ret;
8748
8749         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8750         if (ret)
8751                 return ret;
8752
8753         if (buffer) {
8754                 mutex_lock(&trace_types_lock);
8755                 if (!!val == tracer_tracing_is_on(tr)) {
8756                         val = 0; /* do nothing */
8757                 } else if (val) {
8758                         tracer_tracing_on(tr);
8759                         if (tr->current_trace->start)
8760                                 tr->current_trace->start(tr);
8761                 } else {
8762                         tracer_tracing_off(tr);
8763                         if (tr->current_trace->stop)
8764                                 tr->current_trace->stop(tr);
8765                 }
8766                 mutex_unlock(&trace_types_lock);
8767         }
8768
8769         (*ppos)++;
8770
8771         return cnt;
8772 }
8773
8774 static const struct file_operations rb_simple_fops = {
8775         .open           = tracing_open_generic_tr,
8776         .read           = rb_simple_read,
8777         .write          = rb_simple_write,
8778         .release        = tracing_release_generic_tr,
8779         .llseek         = default_llseek,
8780 };
8781
8782 static ssize_t
8783 buffer_percent_read(struct file *filp, char __user *ubuf,
8784                     size_t cnt, loff_t *ppos)
8785 {
8786         struct trace_array *tr = filp->private_data;
8787         char buf[64];
8788         int r;
8789
8790         r = tr->buffer_percent;
8791         r = sprintf(buf, "%d\n", r);
8792
8793         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8794 }
8795
8796 static ssize_t
8797 buffer_percent_write(struct file *filp, const char __user *ubuf,
8798                      size_t cnt, loff_t *ppos)
8799 {
8800         struct trace_array *tr = filp->private_data;
8801         unsigned long val;
8802         int ret;
8803
8804         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8805         if (ret)
8806                 return ret;
8807
8808         if (val > 100)
8809                 return -EINVAL;
8810
8811         if (!val)
8812                 val = 1;
8813
8814         tr->buffer_percent = val;
8815
8816         (*ppos)++;
8817
8818         return cnt;
8819 }
8820
8821 static const struct file_operations buffer_percent_fops = {
8822         .open           = tracing_open_generic_tr,
8823         .read           = buffer_percent_read,
8824         .write          = buffer_percent_write,
8825         .release        = tracing_release_generic_tr,
8826         .llseek         = default_llseek,
8827 };
8828
8829 static struct dentry *trace_instance_dir;
8830
8831 static void
8832 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8833
8834 static int
8835 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8836 {
8837         enum ring_buffer_flags rb_flags;
8838
8839         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8840
8841         buf->tr = tr;
8842
8843         buf->buffer = ring_buffer_alloc(size, rb_flags);
8844         if (!buf->buffer)
8845                 return -ENOMEM;
8846
8847         buf->data = alloc_percpu(struct trace_array_cpu);
8848         if (!buf->data) {
8849                 ring_buffer_free(buf->buffer);
8850                 buf->buffer = NULL;
8851                 return -ENOMEM;
8852         }
8853
8854         /* Allocate the first page for all buffers */
8855         set_buffer_entries(&tr->array_buffer,
8856                            ring_buffer_size(tr->array_buffer.buffer, 0));
8857
8858         return 0;
8859 }
8860
8861 static int allocate_trace_buffers(struct trace_array *tr, int size)
8862 {
8863         int ret;
8864
8865         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8866         if (ret)
8867                 return ret;
8868
8869 #ifdef CONFIG_TRACER_MAX_TRACE
8870         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8871                                     allocate_snapshot ? size : 1);
8872         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8873                 ring_buffer_free(tr->array_buffer.buffer);
8874                 tr->array_buffer.buffer = NULL;
8875                 free_percpu(tr->array_buffer.data);
8876                 tr->array_buffer.data = NULL;
8877                 return -ENOMEM;
8878         }
8879         tr->allocated_snapshot = allocate_snapshot;
8880
8881         /*
8882          * Only the top level trace array gets its snapshot allocated
8883          * from the kernel command line.
8884          */
8885         allocate_snapshot = false;
8886 #endif
8887
8888         return 0;
8889 }
8890
8891 static void free_trace_buffer(struct array_buffer *buf)
8892 {
8893         if (buf->buffer) {
8894                 ring_buffer_free(buf->buffer);
8895                 buf->buffer = NULL;
8896                 free_percpu(buf->data);
8897                 buf->data = NULL;
8898         }
8899 }
8900
8901 static void free_trace_buffers(struct trace_array *tr)
8902 {
8903         if (!tr)
8904                 return;
8905
8906         free_trace_buffer(&tr->array_buffer);
8907
8908 #ifdef CONFIG_TRACER_MAX_TRACE
8909         free_trace_buffer(&tr->max_buffer);
8910 #endif
8911 }
8912
8913 static void init_trace_flags_index(struct trace_array *tr)
8914 {
8915         int i;
8916
8917         /* Used by the trace options files */
8918         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8919                 tr->trace_flags_index[i] = i;
8920 }
8921
8922 static void __update_tracer_options(struct trace_array *tr)
8923 {
8924         struct tracer *t;
8925
8926         for (t = trace_types; t; t = t->next)
8927                 add_tracer_options(tr, t);
8928 }
8929
8930 static void update_tracer_options(struct trace_array *tr)
8931 {
8932         mutex_lock(&trace_types_lock);
8933         __update_tracer_options(tr);
8934         mutex_unlock(&trace_types_lock);
8935 }
8936
8937 /* Must have trace_types_lock held */
8938 struct trace_array *trace_array_find(const char *instance)
8939 {
8940         struct trace_array *tr, *found = NULL;
8941
8942         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8943                 if (tr->name && strcmp(tr->name, instance) == 0) {
8944                         found = tr;
8945                         break;
8946                 }
8947         }
8948
8949         return found;
8950 }
8951
8952 struct trace_array *trace_array_find_get(const char *instance)
8953 {
8954         struct trace_array *tr;
8955
8956         mutex_lock(&trace_types_lock);
8957         tr = trace_array_find(instance);
8958         if (tr)
8959                 tr->ref++;
8960         mutex_unlock(&trace_types_lock);
8961
8962         return tr;
8963 }
8964
8965 static int trace_array_create_dir(struct trace_array *tr)
8966 {
8967         int ret;
8968
8969         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8970         if (!tr->dir)
8971                 return -EINVAL;
8972
8973         ret = event_trace_add_tracer(tr->dir, tr);
8974         if (ret)
8975                 tracefs_remove(tr->dir);
8976
8977         init_tracer_tracefs(tr, tr->dir);
8978         __update_tracer_options(tr);
8979
8980         return ret;
8981 }
8982
8983 static struct trace_array *trace_array_create(const char *name)
8984 {
8985         struct trace_array *tr;
8986         int ret;
8987
8988         ret = -ENOMEM;
8989         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8990         if (!tr)
8991                 return ERR_PTR(ret);
8992
8993         tr->name = kstrdup(name, GFP_KERNEL);
8994         if (!tr->name)
8995                 goto out_free_tr;
8996
8997         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8998                 goto out_free_tr;
8999
9000         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9001
9002         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9003
9004         raw_spin_lock_init(&tr->start_lock);
9005
9006         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9007
9008         tr->current_trace = &nop_trace;
9009
9010         INIT_LIST_HEAD(&tr->systems);
9011         INIT_LIST_HEAD(&tr->events);
9012         INIT_LIST_HEAD(&tr->hist_vars);
9013         INIT_LIST_HEAD(&tr->err_log);
9014
9015         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9016                 goto out_free_tr;
9017
9018         if (ftrace_allocate_ftrace_ops(tr) < 0)
9019                 goto out_free_tr;
9020
9021         ftrace_init_trace_array(tr);
9022
9023         init_trace_flags_index(tr);
9024
9025         if (trace_instance_dir) {
9026                 ret = trace_array_create_dir(tr);
9027                 if (ret)
9028                         goto out_free_tr;
9029         } else
9030                 __trace_early_add_events(tr);
9031
9032         list_add(&tr->list, &ftrace_trace_arrays);
9033
9034         tr->ref++;
9035
9036         return tr;
9037
9038  out_free_tr:
9039         ftrace_free_ftrace_ops(tr);
9040         free_trace_buffers(tr);
9041         free_cpumask_var(tr->tracing_cpumask);
9042         kfree(tr->name);
9043         kfree(tr);
9044
9045         return ERR_PTR(ret);
9046 }
9047
9048 static int instance_mkdir(const char *name)
9049 {
9050         struct trace_array *tr;
9051         int ret;
9052
9053         mutex_lock(&event_mutex);
9054         mutex_lock(&trace_types_lock);
9055
9056         ret = -EEXIST;
9057         if (trace_array_find(name))
9058                 goto out_unlock;
9059
9060         tr = trace_array_create(name);
9061
9062         ret = PTR_ERR_OR_ZERO(tr);
9063
9064 out_unlock:
9065         mutex_unlock(&trace_types_lock);
9066         mutex_unlock(&event_mutex);
9067         return ret;
9068 }
9069
9070 /**
9071  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9072  * @name: The name of the trace array to be looked up/created.
9073  *
9074  * Returns pointer to trace array with given name.
9075  * NULL, if it cannot be created.
9076  *
9077  * NOTE: This function increments the reference counter associated with the
9078  * trace array returned. This makes sure it cannot be freed while in use.
9079  * Use trace_array_put() once the trace array is no longer needed.
9080  * If the trace_array is to be freed, trace_array_destroy() needs to
9081  * be called after the trace_array_put(), or simply let user space delete
9082  * it from the tracefs instances directory. But until the
9083  * trace_array_put() is called, user space can not delete it.
9084  *
9085  */
9086 struct trace_array *trace_array_get_by_name(const char *name)
9087 {
9088         struct trace_array *tr;
9089
9090         mutex_lock(&event_mutex);
9091         mutex_lock(&trace_types_lock);
9092
9093         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9094                 if (tr->name && strcmp(tr->name, name) == 0)
9095                         goto out_unlock;
9096         }
9097
9098         tr = trace_array_create(name);
9099
9100         if (IS_ERR(tr))
9101                 tr = NULL;
9102 out_unlock:
9103         if (tr)
9104                 tr->ref++;
9105
9106         mutex_unlock(&trace_types_lock);
9107         mutex_unlock(&event_mutex);
9108         return tr;
9109 }
9110 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9111
9112 static int __remove_instance(struct trace_array *tr)
9113 {
9114         int i;
9115
9116         /* Reference counter for a newly created trace array = 1. */
9117         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9118                 return -EBUSY;
9119
9120         list_del(&tr->list);
9121
9122         /* Disable all the flags that were enabled coming in */
9123         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9124                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9125                         set_tracer_flag(tr, 1 << i, 0);
9126         }
9127
9128         tracing_set_nop(tr);
9129         clear_ftrace_function_probes(tr);
9130         event_trace_del_tracer(tr);
9131         ftrace_clear_pids(tr);
9132         ftrace_destroy_function_files(tr);
9133         tracefs_remove(tr->dir);
9134         free_percpu(tr->last_func_repeats);
9135         free_trace_buffers(tr);
9136
9137         for (i = 0; i < tr->nr_topts; i++) {
9138                 kfree(tr->topts[i].topts);
9139         }
9140         kfree(tr->topts);
9141
9142         free_cpumask_var(tr->tracing_cpumask);
9143         kfree(tr->name);
9144         kfree(tr);
9145
9146         return 0;
9147 }
9148
9149 int trace_array_destroy(struct trace_array *this_tr)
9150 {
9151         struct trace_array *tr;
9152         int ret;
9153
9154         if (!this_tr)
9155                 return -EINVAL;
9156
9157         mutex_lock(&event_mutex);
9158         mutex_lock(&trace_types_lock);
9159
9160         ret = -ENODEV;
9161
9162         /* Making sure trace array exists before destroying it. */
9163         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9164                 if (tr == this_tr) {
9165                         ret = __remove_instance(tr);
9166                         break;
9167                 }
9168         }
9169
9170         mutex_unlock(&trace_types_lock);
9171         mutex_unlock(&event_mutex);
9172
9173         return ret;
9174 }
9175 EXPORT_SYMBOL_GPL(trace_array_destroy);
9176
9177 static int instance_rmdir(const char *name)
9178 {
9179         struct trace_array *tr;
9180         int ret;
9181
9182         mutex_lock(&event_mutex);
9183         mutex_lock(&trace_types_lock);
9184
9185         ret = -ENODEV;
9186         tr = trace_array_find(name);
9187         if (tr)
9188                 ret = __remove_instance(tr);
9189
9190         mutex_unlock(&trace_types_lock);
9191         mutex_unlock(&event_mutex);
9192
9193         return ret;
9194 }
9195
9196 static __init void create_trace_instances(struct dentry *d_tracer)
9197 {
9198         struct trace_array *tr;
9199
9200         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9201                                                          instance_mkdir,
9202                                                          instance_rmdir);
9203         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9204                 return;
9205
9206         mutex_lock(&event_mutex);
9207         mutex_lock(&trace_types_lock);
9208
9209         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9210                 if (!tr->name)
9211                         continue;
9212                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9213                              "Failed to create instance directory\n"))
9214                         break;
9215         }
9216
9217         mutex_unlock(&trace_types_lock);
9218         mutex_unlock(&event_mutex);
9219 }
9220
9221 static void
9222 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9223 {
9224         struct trace_event_file *file;
9225         int cpu;
9226
9227         trace_create_file("available_tracers", 0444, d_tracer,
9228                         tr, &show_traces_fops);
9229
9230         trace_create_file("current_tracer", 0644, d_tracer,
9231                         tr, &set_tracer_fops);
9232
9233         trace_create_file("tracing_cpumask", 0644, d_tracer,
9234                           tr, &tracing_cpumask_fops);
9235
9236         trace_create_file("trace_options", 0644, d_tracer,
9237                           tr, &tracing_iter_fops);
9238
9239         trace_create_file("trace", 0644, d_tracer,
9240                           tr, &tracing_fops);
9241
9242         trace_create_file("trace_pipe", 0444, d_tracer,
9243                           tr, &tracing_pipe_fops);
9244
9245         trace_create_file("buffer_size_kb", 0644, d_tracer,
9246                           tr, &tracing_entries_fops);
9247
9248         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9249                           tr, &tracing_total_entries_fops);
9250
9251         trace_create_file("free_buffer", 0200, d_tracer,
9252                           tr, &tracing_free_buffer_fops);
9253
9254         trace_create_file("trace_marker", 0220, d_tracer,
9255                           tr, &tracing_mark_fops);
9256
9257         file = __find_event_file(tr, "ftrace", "print");
9258         if (file && file->dir)
9259                 trace_create_file("trigger", 0644, file->dir, file,
9260                                   &event_trigger_fops);
9261         tr->trace_marker_file = file;
9262
9263         trace_create_file("trace_marker_raw", 0220, d_tracer,
9264                           tr, &tracing_mark_raw_fops);
9265
9266         trace_create_file("trace_clock", 0644, d_tracer, tr,
9267                           &trace_clock_fops);
9268
9269         trace_create_file("tracing_on", 0644, d_tracer,
9270                           tr, &rb_simple_fops);
9271
9272         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9273                           &trace_time_stamp_mode_fops);
9274
9275         tr->buffer_percent = 50;
9276
9277         trace_create_file("buffer_percent", 0444, d_tracer,
9278                         tr, &buffer_percent_fops);
9279
9280         create_trace_options_dir(tr);
9281
9282 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9283         trace_create_maxlat_file(tr, d_tracer);
9284 #endif
9285
9286         if (ftrace_create_function_files(tr, d_tracer))
9287                 MEM_FAIL(1, "Could not allocate function filter files");
9288
9289 #ifdef CONFIG_TRACER_SNAPSHOT
9290         trace_create_file("snapshot", 0644, d_tracer,
9291                           tr, &snapshot_fops);
9292 #endif
9293
9294         trace_create_file("error_log", 0644, d_tracer,
9295                           tr, &tracing_err_log_fops);
9296
9297         for_each_tracing_cpu(cpu)
9298                 tracing_init_tracefs_percpu(tr, cpu);
9299
9300         ftrace_init_tracefs(tr, d_tracer);
9301 }
9302
9303 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9304 {
9305         struct vfsmount *mnt;
9306         struct file_system_type *type;
9307
9308         /*
9309          * To maintain backward compatibility for tools that mount
9310          * debugfs to get to the tracing facility, tracefs is automatically
9311          * mounted to the debugfs/tracing directory.
9312          */
9313         type = get_fs_type("tracefs");
9314         if (!type)
9315                 return NULL;
9316         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9317         put_filesystem(type);
9318         if (IS_ERR(mnt))
9319                 return NULL;
9320         mntget(mnt);
9321
9322         return mnt;
9323 }
9324
9325 /**
9326  * tracing_init_dentry - initialize top level trace array
9327  *
9328  * This is called when creating files or directories in the tracing
9329  * directory. It is called via fs_initcall() by any of the boot up code
9330  * and expects to return the dentry of the top level tracing directory.
9331  */
9332 int tracing_init_dentry(void)
9333 {
9334         struct trace_array *tr = &global_trace;
9335
9336         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9337                 pr_warn("Tracing disabled due to lockdown\n");
9338                 return -EPERM;
9339         }
9340
9341         /* The top level trace array uses  NULL as parent */
9342         if (tr->dir)
9343                 return 0;
9344
9345         if (WARN_ON(!tracefs_initialized()))
9346                 return -ENODEV;
9347
9348         /*
9349          * As there may still be users that expect the tracing
9350          * files to exist in debugfs/tracing, we must automount
9351          * the tracefs file system there, so older tools still
9352          * work with the newer kernel.
9353          */
9354         tr->dir = debugfs_create_automount("tracing", NULL,
9355                                            trace_automount, NULL);
9356
9357         return 0;
9358 }
9359
9360 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9361 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9362
9363 static struct workqueue_struct *eval_map_wq __initdata;
9364 static struct work_struct eval_map_work __initdata;
9365
9366 static void __init eval_map_work_func(struct work_struct *work)
9367 {
9368         int len;
9369
9370         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9371         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9372 }
9373
9374 static int __init trace_eval_init(void)
9375 {
9376         INIT_WORK(&eval_map_work, eval_map_work_func);
9377
9378         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9379         if (!eval_map_wq) {
9380                 pr_err("Unable to allocate eval_map_wq\n");
9381                 /* Do work here */
9382                 eval_map_work_func(&eval_map_work);
9383                 return -ENOMEM;
9384         }
9385
9386         queue_work(eval_map_wq, &eval_map_work);
9387         return 0;
9388 }
9389
9390 static int __init trace_eval_sync(void)
9391 {
9392         /* Make sure the eval map updates are finished */
9393         if (eval_map_wq)
9394                 destroy_workqueue(eval_map_wq);
9395         return 0;
9396 }
9397
9398 late_initcall_sync(trace_eval_sync);
9399
9400
9401 #ifdef CONFIG_MODULES
9402 static void trace_module_add_evals(struct module *mod)
9403 {
9404         if (!mod->num_trace_evals)
9405                 return;
9406
9407         /*
9408          * Modules with bad taint do not have events created, do
9409          * not bother with enums either.
9410          */
9411         if (trace_module_has_bad_taint(mod))
9412                 return;
9413
9414         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9415 }
9416
9417 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9418 static void trace_module_remove_evals(struct module *mod)
9419 {
9420         union trace_eval_map_item *map;
9421         union trace_eval_map_item **last = &trace_eval_maps;
9422
9423         if (!mod->num_trace_evals)
9424                 return;
9425
9426         mutex_lock(&trace_eval_mutex);
9427
9428         map = trace_eval_maps;
9429
9430         while (map) {
9431                 if (map->head.mod == mod)
9432                         break;
9433                 map = trace_eval_jmp_to_tail(map);
9434                 last = &map->tail.next;
9435                 map = map->tail.next;
9436         }
9437         if (!map)
9438                 goto out;
9439
9440         *last = trace_eval_jmp_to_tail(map)->tail.next;
9441         kfree(map);
9442  out:
9443         mutex_unlock(&trace_eval_mutex);
9444 }
9445 #else
9446 static inline void trace_module_remove_evals(struct module *mod) { }
9447 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9448
9449 static int trace_module_notify(struct notifier_block *self,
9450                                unsigned long val, void *data)
9451 {
9452         struct module *mod = data;
9453
9454         switch (val) {
9455         case MODULE_STATE_COMING:
9456                 trace_module_add_evals(mod);
9457                 break;
9458         case MODULE_STATE_GOING:
9459                 trace_module_remove_evals(mod);
9460                 break;
9461         }
9462
9463         return NOTIFY_OK;
9464 }
9465
9466 static struct notifier_block trace_module_nb = {
9467         .notifier_call = trace_module_notify,
9468         .priority = 0,
9469 };
9470 #endif /* CONFIG_MODULES */
9471
9472 static __init int tracer_init_tracefs(void)
9473 {
9474         int ret;
9475
9476         trace_access_lock_init();
9477
9478         ret = tracing_init_dentry();
9479         if (ret)
9480                 return 0;
9481
9482         event_trace_init();
9483
9484         init_tracer_tracefs(&global_trace, NULL);
9485         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9486
9487         trace_create_file("tracing_thresh", 0644, NULL,
9488                         &global_trace, &tracing_thresh_fops);
9489
9490         trace_create_file("README", 0444, NULL,
9491                         NULL, &tracing_readme_fops);
9492
9493         trace_create_file("saved_cmdlines", 0444, NULL,
9494                         NULL, &tracing_saved_cmdlines_fops);
9495
9496         trace_create_file("saved_cmdlines_size", 0644, NULL,
9497                           NULL, &tracing_saved_cmdlines_size_fops);
9498
9499         trace_create_file("saved_tgids", 0444, NULL,
9500                         NULL, &tracing_saved_tgids_fops);
9501
9502         trace_eval_init();
9503
9504         trace_create_eval_file(NULL);
9505
9506 #ifdef CONFIG_MODULES
9507         register_module_notifier(&trace_module_nb);
9508 #endif
9509
9510 #ifdef CONFIG_DYNAMIC_FTRACE
9511         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9512                         NULL, &tracing_dyn_info_fops);
9513 #endif
9514
9515         create_trace_instances(NULL);
9516
9517         update_tracer_options(&global_trace);
9518
9519         return 0;
9520 }
9521
9522 static int trace_panic_handler(struct notifier_block *this,
9523                                unsigned long event, void *unused)
9524 {
9525         if (ftrace_dump_on_oops)
9526                 ftrace_dump(ftrace_dump_on_oops);
9527         return NOTIFY_OK;
9528 }
9529
9530 static struct notifier_block trace_panic_notifier = {
9531         .notifier_call  = trace_panic_handler,
9532         .next           = NULL,
9533         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9534 };
9535
9536 static int trace_die_handler(struct notifier_block *self,
9537                              unsigned long val,
9538                              void *data)
9539 {
9540         switch (val) {
9541         case DIE_OOPS:
9542                 if (ftrace_dump_on_oops)
9543                         ftrace_dump(ftrace_dump_on_oops);
9544                 break;
9545         default:
9546                 break;
9547         }
9548         return NOTIFY_OK;
9549 }
9550
9551 static struct notifier_block trace_die_notifier = {
9552         .notifier_call = trace_die_handler,
9553         .priority = 200
9554 };
9555
9556 /*
9557  * printk is set to max of 1024, we really don't need it that big.
9558  * Nothing should be printing 1000 characters anyway.
9559  */
9560 #define TRACE_MAX_PRINT         1000
9561
9562 /*
9563  * Define here KERN_TRACE so that we have one place to modify
9564  * it if we decide to change what log level the ftrace dump
9565  * should be at.
9566  */
9567 #define KERN_TRACE              KERN_EMERG
9568
9569 void
9570 trace_printk_seq(struct trace_seq *s)
9571 {
9572         /* Probably should print a warning here. */
9573         if (s->seq.len >= TRACE_MAX_PRINT)
9574                 s->seq.len = TRACE_MAX_PRINT;
9575
9576         /*
9577          * More paranoid code. Although the buffer size is set to
9578          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9579          * an extra layer of protection.
9580          */
9581         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9582                 s->seq.len = s->seq.size - 1;
9583
9584         /* should be zero ended, but we are paranoid. */
9585         s->buffer[s->seq.len] = 0;
9586
9587         printk(KERN_TRACE "%s", s->buffer);
9588
9589         trace_seq_init(s);
9590 }
9591
9592 void trace_init_global_iter(struct trace_iterator *iter)
9593 {
9594         iter->tr = &global_trace;
9595         iter->trace = iter->tr->current_trace;
9596         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9597         iter->array_buffer = &global_trace.array_buffer;
9598
9599         if (iter->trace && iter->trace->open)
9600                 iter->trace->open(iter);
9601
9602         /* Annotate start of buffers if we had overruns */
9603         if (ring_buffer_overruns(iter->array_buffer->buffer))
9604                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9605
9606         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9607         if (trace_clocks[iter->tr->clock_id].in_ns)
9608                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9609 }
9610
9611 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9612 {
9613         /* use static because iter can be a bit big for the stack */
9614         static struct trace_iterator iter;
9615         static atomic_t dump_running;
9616         struct trace_array *tr = &global_trace;
9617         unsigned int old_userobj;
9618         unsigned long flags;
9619         int cnt = 0, cpu;
9620
9621         /* Only allow one dump user at a time. */
9622         if (atomic_inc_return(&dump_running) != 1) {
9623                 atomic_dec(&dump_running);
9624                 return;
9625         }
9626
9627         /*
9628          * Always turn off tracing when we dump.
9629          * We don't need to show trace output of what happens
9630          * between multiple crashes.
9631          *
9632          * If the user does a sysrq-z, then they can re-enable
9633          * tracing with echo 1 > tracing_on.
9634          */
9635         tracing_off();
9636
9637         local_irq_save(flags);
9638         printk_nmi_direct_enter();
9639
9640         /* Simulate the iterator */
9641         trace_init_global_iter(&iter);
9642         /* Can not use kmalloc for iter.temp and iter.fmt */
9643         iter.temp = static_temp_buf;
9644         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9645         iter.fmt = static_fmt_buf;
9646         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9647
9648         for_each_tracing_cpu(cpu) {
9649                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9650         }
9651
9652         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9653
9654         /* don't look at user memory in panic mode */
9655         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9656
9657         switch (oops_dump_mode) {
9658         case DUMP_ALL:
9659                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9660                 break;
9661         case DUMP_ORIG:
9662                 iter.cpu_file = raw_smp_processor_id();
9663                 break;
9664         case DUMP_NONE:
9665                 goto out_enable;
9666         default:
9667                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9668                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9669         }
9670
9671         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9672
9673         /* Did function tracer already get disabled? */
9674         if (ftrace_is_dead()) {
9675                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9676                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9677         }
9678
9679         /*
9680          * We need to stop all tracing on all CPUS to read
9681          * the next buffer. This is a bit expensive, but is
9682          * not done often. We fill all what we can read,
9683          * and then release the locks again.
9684          */
9685
9686         while (!trace_empty(&iter)) {
9687
9688                 if (!cnt)
9689                         printk(KERN_TRACE "---------------------------------\n");
9690
9691                 cnt++;
9692
9693                 trace_iterator_reset(&iter);
9694                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9695
9696                 if (trace_find_next_entry_inc(&iter) != NULL) {
9697                         int ret;
9698
9699                         ret = print_trace_line(&iter);
9700                         if (ret != TRACE_TYPE_NO_CONSUME)
9701                                 trace_consume(&iter);
9702                 }
9703                 touch_nmi_watchdog();
9704
9705                 trace_printk_seq(&iter.seq);
9706         }
9707
9708         if (!cnt)
9709                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9710         else
9711                 printk(KERN_TRACE "---------------------------------\n");
9712
9713  out_enable:
9714         tr->trace_flags |= old_userobj;
9715
9716         for_each_tracing_cpu(cpu) {
9717                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9718         }
9719         atomic_dec(&dump_running);
9720         printk_nmi_direct_exit();
9721         local_irq_restore(flags);
9722 }
9723 EXPORT_SYMBOL_GPL(ftrace_dump);
9724
9725 #define WRITE_BUFSIZE  4096
9726
9727 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9728                                 size_t count, loff_t *ppos,
9729                                 int (*createfn)(const char *))
9730 {
9731         char *kbuf, *buf, *tmp;
9732         int ret = 0;
9733         size_t done = 0;
9734         size_t size;
9735
9736         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9737         if (!kbuf)
9738                 return -ENOMEM;
9739
9740         while (done < count) {
9741                 size = count - done;
9742
9743                 if (size >= WRITE_BUFSIZE)
9744                         size = WRITE_BUFSIZE - 1;
9745
9746                 if (copy_from_user(kbuf, buffer + done, size)) {
9747                         ret = -EFAULT;
9748                         goto out;
9749                 }
9750                 kbuf[size] = '\0';
9751                 buf = kbuf;
9752                 do {
9753                         tmp = strchr(buf, '\n');
9754                         if (tmp) {
9755                                 *tmp = '\0';
9756                                 size = tmp - buf + 1;
9757                         } else {
9758                                 size = strlen(buf);
9759                                 if (done + size < count) {
9760                                         if (buf != kbuf)
9761                                                 break;
9762                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9763                                         pr_warn("Line length is too long: Should be less than %d\n",
9764                                                 WRITE_BUFSIZE - 2);
9765                                         ret = -EINVAL;
9766                                         goto out;
9767                                 }
9768                         }
9769                         done += size;
9770
9771                         /* Remove comments */
9772                         tmp = strchr(buf, '#');
9773
9774                         if (tmp)
9775                                 *tmp = '\0';
9776
9777                         ret = createfn(buf);
9778                         if (ret)
9779                                 goto out;
9780                         buf += size;
9781
9782                 } while (done < count);
9783         }
9784         ret = done;
9785
9786 out:
9787         kfree(kbuf);
9788
9789         return ret;
9790 }
9791
9792 __init static int tracer_alloc_buffers(void)
9793 {
9794         int ring_buf_size;
9795         int ret = -ENOMEM;
9796
9797
9798         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9799                 pr_warn("Tracing disabled due to lockdown\n");
9800                 return -EPERM;
9801         }
9802
9803         /*
9804          * Make sure we don't accidentally add more trace options
9805          * than we have bits for.
9806          */
9807         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9808
9809         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9810                 goto out;
9811
9812         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9813                 goto out_free_buffer_mask;
9814
9815         /* Only allocate trace_printk buffers if a trace_printk exists */
9816         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9817                 /* Must be called before global_trace.buffer is allocated */
9818                 trace_printk_init_buffers();
9819
9820         /* To save memory, keep the ring buffer size to its minimum */
9821         if (ring_buffer_expanded)
9822                 ring_buf_size = trace_buf_size;
9823         else
9824                 ring_buf_size = 1;
9825
9826         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9827         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9828
9829         raw_spin_lock_init(&global_trace.start_lock);
9830
9831         /*
9832          * The prepare callbacks allocates some memory for the ring buffer. We
9833          * don't free the buffer if the CPU goes down. If we were to free
9834          * the buffer, then the user would lose any trace that was in the
9835          * buffer. The memory will be removed once the "instance" is removed.
9836          */
9837         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9838                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9839                                       NULL);
9840         if (ret < 0)
9841                 goto out_free_cpumask;
9842         /* Used for event triggers */
9843         ret = -ENOMEM;
9844         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9845         if (!temp_buffer)
9846                 goto out_rm_hp_state;
9847
9848         if (trace_create_savedcmd() < 0)
9849                 goto out_free_temp_buffer;
9850
9851         /* TODO: make the number of buffers hot pluggable with CPUS */
9852         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9853                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9854                 goto out_free_savedcmd;
9855         }
9856
9857         if (global_trace.buffer_disabled)
9858                 tracing_off();
9859
9860         if (trace_boot_clock) {
9861                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9862                 if (ret < 0)
9863                         pr_warn("Trace clock %s not defined, going back to default\n",
9864                                 trace_boot_clock);
9865         }
9866
9867         /*
9868          * register_tracer() might reference current_trace, so it
9869          * needs to be set before we register anything. This is
9870          * just a bootstrap of current_trace anyway.
9871          */
9872         global_trace.current_trace = &nop_trace;
9873
9874         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9875
9876         ftrace_init_global_array_ops(&global_trace);
9877
9878         init_trace_flags_index(&global_trace);
9879
9880         register_tracer(&nop_trace);
9881
9882         /* Function tracing may start here (via kernel command line) */
9883         init_function_trace();
9884
9885         /* All seems OK, enable tracing */
9886         tracing_disabled = 0;
9887
9888         atomic_notifier_chain_register(&panic_notifier_list,
9889                                        &trace_panic_notifier);
9890
9891         register_die_notifier(&trace_die_notifier);
9892
9893         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9894
9895         INIT_LIST_HEAD(&global_trace.systems);
9896         INIT_LIST_HEAD(&global_trace.events);
9897         INIT_LIST_HEAD(&global_trace.hist_vars);
9898         INIT_LIST_HEAD(&global_trace.err_log);
9899         list_add(&global_trace.list, &ftrace_trace_arrays);
9900
9901         apply_trace_boot_options();
9902
9903         register_snapshot_cmd();
9904
9905         test_can_verify();
9906
9907         return 0;
9908
9909 out_free_savedcmd:
9910         free_saved_cmdlines_buffer(savedcmd);
9911 out_free_temp_buffer:
9912         ring_buffer_free(temp_buffer);
9913 out_rm_hp_state:
9914         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9915 out_free_cpumask:
9916         free_cpumask_var(global_trace.tracing_cpumask);
9917 out_free_buffer_mask:
9918         free_cpumask_var(tracing_buffer_mask);
9919 out:
9920         return ret;
9921 }
9922
9923 void __init early_trace_init(void)
9924 {
9925         if (tracepoint_printk) {
9926                 tracepoint_print_iter =
9927                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9928                 if (MEM_FAIL(!tracepoint_print_iter,
9929                              "Failed to allocate trace iterator\n"))
9930                         tracepoint_printk = 0;
9931                 else
9932                         static_key_enable(&tracepoint_printk_key.key);
9933         }
9934         tracer_alloc_buffers();
9935 }
9936
9937 void __init trace_init(void)
9938 {
9939         trace_event_init();
9940 }
9941
9942 __init static int clear_boot_tracer(void)
9943 {
9944         /*
9945          * The default tracer at boot buffer is an init section.
9946          * This function is called in lateinit. If we did not
9947          * find the boot tracer, then clear it out, to prevent
9948          * later registration from accessing the buffer that is
9949          * about to be freed.
9950          */
9951         if (!default_bootup_tracer)
9952                 return 0;
9953
9954         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9955                default_bootup_tracer);
9956         default_bootup_tracer = NULL;
9957
9958         return 0;
9959 }
9960
9961 fs_initcall(tracer_init_tracefs);
9962 late_initcall_sync(clear_boot_tracer);
9963
9964 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9965 __init static int tracing_set_default_clock(void)
9966 {
9967         /* sched_clock_stable() is determined in late_initcall */
9968         if (!trace_boot_clock && !sched_clock_stable()) {
9969                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9970                         pr_warn("Can not set tracing clock due to lockdown\n");
9971                         return -EPERM;
9972                 }
9973
9974                 printk(KERN_WARNING
9975                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9976                        "If you want to keep using the local clock, then add:\n"
9977                        "  \"trace_clock=local\"\n"
9978                        "on the kernel command line\n");
9979                 tracing_set_clock(&global_trace, "global");
9980         }
9981
9982         return 0;
9983 }
9984 late_initcall_sync(tracing_set_default_clock);
9985 #endif