66a4ad93b5e984ad68a07d82490a69da02aaef26
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 static int *tgid_map;
2188
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194         unsigned *map_cmdline_to_pid;
2195         unsigned cmdline_num;
2196         int cmdline_idx;
2197         char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215                                     struct saved_cmdlines_buffer *s)
2216 {
2217         s->map_cmdline_to_pid = kmalloc_array(val,
2218                                               sizeof(*s->map_cmdline_to_pid),
2219                                               GFP_KERNEL);
2220         if (!s->map_cmdline_to_pid)
2221                 return -ENOMEM;
2222
2223         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224         if (!s->saved_cmdlines) {
2225                 kfree(s->map_cmdline_to_pid);
2226                 return -ENOMEM;
2227         }
2228
2229         s->cmdline_idx = 0;
2230         s->cmdline_num = val;
2231         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232                sizeof(s->map_pid_to_cmdline));
2233         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234                val * sizeof(*s->map_cmdline_to_pid));
2235
2236         return 0;
2237 }
2238
2239 static int trace_create_savedcmd(void)
2240 {
2241         int ret;
2242
2243         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244         if (!savedcmd)
2245                 return -ENOMEM;
2246
2247         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248         if (ret < 0) {
2249                 kfree(savedcmd);
2250                 savedcmd = NULL;
2251                 return -ENOMEM;
2252         }
2253
2254         return 0;
2255 }
2256
2257 int is_tracing_stopped(void)
2258 {
2259         return global_trace.stop_count;
2260 }
2261
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270         struct trace_buffer *buffer;
2271         unsigned long flags;
2272
2273         if (tracing_disabled)
2274                 return;
2275
2276         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277         if (--global_trace.stop_count) {
2278                 if (global_trace.stop_count < 0) {
2279                         /* Someone screwed up their debugging */
2280                         WARN_ON_ONCE(1);
2281                         global_trace.stop_count = 0;
2282                 }
2283                 goto out;
2284         }
2285
2286         /* Prevent the buffers from switching */
2287         arch_spin_lock(&global_trace.max_lock);
2288
2289         buffer = global_trace.array_buffer.buffer;
2290         if (buffer)
2291                 ring_buffer_record_enable(buffer);
2292
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294         buffer = global_trace.max_buffer.buffer;
2295         if (buffer)
2296                 ring_buffer_record_enable(buffer);
2297 #endif
2298
2299         arch_spin_unlock(&global_trace.max_lock);
2300
2301  out:
2302         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307         struct trace_buffer *buffer;
2308         unsigned long flags;
2309
2310         if (tracing_disabled)
2311                 return;
2312
2313         /* If global, we need to also start the max tracer */
2314         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315                 return tracing_start();
2316
2317         raw_spin_lock_irqsave(&tr->start_lock, flags);
2318
2319         if (--tr->stop_count) {
2320                 if (tr->stop_count < 0) {
2321                         /* Someone screwed up their debugging */
2322                         WARN_ON_ONCE(1);
2323                         tr->stop_count = 0;
2324                 }
2325                 goto out;
2326         }
2327
2328         buffer = tr->array_buffer.buffer;
2329         if (buffer)
2330                 ring_buffer_record_enable(buffer);
2331
2332  out:
2333         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348         if (global_trace.stop_count++)
2349                 goto out;
2350
2351         /* Prevent the buffers from switching */
2352         arch_spin_lock(&global_trace.max_lock);
2353
2354         buffer = global_trace.array_buffer.buffer;
2355         if (buffer)
2356                 ring_buffer_record_disable(buffer);
2357
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359         buffer = global_trace.max_buffer.buffer;
2360         if (buffer)
2361                 ring_buffer_record_disable(buffer);
2362 #endif
2363
2364         arch_spin_unlock(&global_trace.max_lock);
2365
2366  out:
2367         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372         struct trace_buffer *buffer;
2373         unsigned long flags;
2374
2375         /* If global, we need to also stop the max tracer */
2376         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377                 return tracing_stop();
2378
2379         raw_spin_lock_irqsave(&tr->start_lock, flags);
2380         if (tr->stop_count++)
2381                 goto out;
2382
2383         buffer = tr->array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387  out:
2388         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393         unsigned pid, idx;
2394
2395         /* treat recording of idle task as a success */
2396         if (!tsk->pid)
2397                 return 1;
2398
2399         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2400                 return 0;
2401
2402         /*
2403          * It's not the end of the world if we don't get
2404          * the lock, but we also don't want to spin
2405          * nor do we want to disable interrupts,
2406          * so if we miss here, then better luck next time.
2407          */
2408         if (!arch_spin_trylock(&trace_cmdline_lock))
2409                 return 0;
2410
2411         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2412         if (idx == NO_CMDLINE_MAP) {
2413                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2414
2415                 /*
2416                  * Check whether the cmdline buffer at idx has a pid
2417                  * mapped. We are going to overwrite that entry so we
2418                  * need to clear the map_pid_to_cmdline. Otherwise we
2419                  * would read the new comm for the old pid.
2420                  */
2421                 pid = savedcmd->map_cmdline_to_pid[idx];
2422                 if (pid != NO_CMDLINE_MAP)
2423                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2424
2425                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2426                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2427
2428                 savedcmd->cmdline_idx = idx;
2429         }
2430
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441
2442         if (!pid) {
2443                 strcpy(comm, "<idle>");
2444                 return;
2445         }
2446
2447         if (WARN_ON_ONCE(pid < 0)) {
2448                 strcpy(comm, "<XXX>");
2449                 return;
2450         }
2451
2452         if (pid > PID_MAX_DEFAULT) {
2453                 strcpy(comm, "<...>");
2454                 return;
2455         }
2456
2457         map = savedcmd->map_pid_to_cmdline[pid];
2458         if (map != NO_CMDLINE_MAP)
2459                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2460         else
2461                 strcpy(comm, "<...>");
2462 }
2463
2464 void trace_find_cmdline(int pid, char comm[])
2465 {
2466         preempt_disable();
2467         arch_spin_lock(&trace_cmdline_lock);
2468
2469         __trace_find_cmdline(pid, comm);
2470
2471         arch_spin_unlock(&trace_cmdline_lock);
2472         preempt_enable();
2473 }
2474
2475 int trace_find_tgid(int pid)
2476 {
2477         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2478                 return 0;
2479
2480         return tgid_map[pid];
2481 }
2482
2483 static int trace_save_tgid(struct task_struct *tsk)
2484 {
2485         /* treat recording of idle task as a success */
2486         if (!tsk->pid)
2487                 return 1;
2488
2489         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2490                 return 0;
2491
2492         tgid_map[tsk->pid] = tsk->tgid;
2493         return 1;
2494 }
2495
2496 static bool tracing_record_taskinfo_skip(int flags)
2497 {
2498         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2499                 return true;
2500         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2501                 return true;
2502         if (!__this_cpu_read(trace_taskinfo_save))
2503                 return true;
2504         return false;
2505 }
2506
2507 /**
2508  * tracing_record_taskinfo - record the task info of a task
2509  *
2510  * @task:  task to record
2511  * @flags: TRACE_RECORD_CMDLINE for recording comm
2512  *         TRACE_RECORD_TGID for recording tgid
2513  */
2514 void tracing_record_taskinfo(struct task_struct *task, int flags)
2515 {
2516         bool done;
2517
2518         if (tracing_record_taskinfo_skip(flags))
2519                 return;
2520
2521         /*
2522          * Record as much task information as possible. If some fail, continue
2523          * to try to record the others.
2524          */
2525         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2526         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2527
2528         /* If recording any information failed, retry again soon. */
2529         if (!done)
2530                 return;
2531
2532         __this_cpu_write(trace_taskinfo_save, false);
2533 }
2534
2535 /**
2536  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2537  *
2538  * @prev: previous task during sched_switch
2539  * @next: next task during sched_switch
2540  * @flags: TRACE_RECORD_CMDLINE for recording comm
2541  *         TRACE_RECORD_TGID for recording tgid
2542  */
2543 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2544                                           struct task_struct *next, int flags)
2545 {
2546         bool done;
2547
2548         if (tracing_record_taskinfo_skip(flags))
2549                 return;
2550
2551         /*
2552          * Record as much task information as possible. If some fail, continue
2553          * to try to record the others.
2554          */
2555         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2556         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2557         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2558         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2559
2560         /* If recording any information failed, retry again soon. */
2561         if (!done)
2562                 return;
2563
2564         __this_cpu_write(trace_taskinfo_save, false);
2565 }
2566
2567 /* Helpers to record a specific task information */
2568 void tracing_record_cmdline(struct task_struct *task)
2569 {
2570         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2571 }
2572
2573 void tracing_record_tgid(struct task_struct *task)
2574 {
2575         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2576 }
2577
2578 /*
2579  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2580  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2581  * simplifies those functions and keeps them in sync.
2582  */
2583 enum print_line_t trace_handle_return(struct trace_seq *s)
2584 {
2585         return trace_seq_has_overflowed(s) ?
2586                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2587 }
2588 EXPORT_SYMBOL_GPL(trace_handle_return);
2589
2590 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2591 {
2592         unsigned int trace_flags = irqs_status;
2593         unsigned int pc;
2594
2595         pc = preempt_count();
2596
2597         if (pc & NMI_MASK)
2598                 trace_flags |= TRACE_FLAG_NMI;
2599         if (pc & HARDIRQ_MASK)
2600                 trace_flags |= TRACE_FLAG_HARDIRQ;
2601         if (in_serving_softirq())
2602                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2603
2604         if (tif_need_resched())
2605                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2606         if (test_preempt_need_resched())
2607                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2608         return (trace_flags << 16) | (pc & 0xff);
2609 }
2610
2611 struct ring_buffer_event *
2612 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2613                           int type,
2614                           unsigned long len,
2615                           unsigned int trace_ctx)
2616 {
2617         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2618 }
2619
2620 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2621 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2622 static int trace_buffered_event_ref;
2623
2624 /**
2625  * trace_buffered_event_enable - enable buffering events
2626  *
2627  * When events are being filtered, it is quicker to use a temporary
2628  * buffer to write the event data into if there's a likely chance
2629  * that it will not be committed. The discard of the ring buffer
2630  * is not as fast as committing, and is much slower than copying
2631  * a commit.
2632  *
2633  * When an event is to be filtered, allocate per cpu buffers to
2634  * write the event data into, and if the event is filtered and discarded
2635  * it is simply dropped, otherwise, the entire data is to be committed
2636  * in one shot.
2637  */
2638 void trace_buffered_event_enable(void)
2639 {
2640         struct ring_buffer_event *event;
2641         struct page *page;
2642         int cpu;
2643
2644         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2645
2646         if (trace_buffered_event_ref++)
2647                 return;
2648
2649         for_each_tracing_cpu(cpu) {
2650                 page = alloc_pages_node(cpu_to_node(cpu),
2651                                         GFP_KERNEL | __GFP_NORETRY, 0);
2652                 if (!page)
2653                         goto failed;
2654
2655                 event = page_address(page);
2656                 memset(event, 0, sizeof(*event));
2657
2658                 per_cpu(trace_buffered_event, cpu) = event;
2659
2660                 preempt_disable();
2661                 if (cpu == smp_processor_id() &&
2662                     __this_cpu_read(trace_buffered_event) !=
2663                     per_cpu(trace_buffered_event, cpu))
2664                         WARN_ON_ONCE(1);
2665                 preempt_enable();
2666         }
2667
2668         return;
2669  failed:
2670         trace_buffered_event_disable();
2671 }
2672
2673 static void enable_trace_buffered_event(void *data)
2674 {
2675         /* Probably not needed, but do it anyway */
2676         smp_rmb();
2677         this_cpu_dec(trace_buffered_event_cnt);
2678 }
2679
2680 static void disable_trace_buffered_event(void *data)
2681 {
2682         this_cpu_inc(trace_buffered_event_cnt);
2683 }
2684
2685 /**
2686  * trace_buffered_event_disable - disable buffering events
2687  *
2688  * When a filter is removed, it is faster to not use the buffered
2689  * events, and to commit directly into the ring buffer. Free up
2690  * the temp buffers when there are no more users. This requires
2691  * special synchronization with current events.
2692  */
2693 void trace_buffered_event_disable(void)
2694 {
2695         int cpu;
2696
2697         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2698
2699         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2700                 return;
2701
2702         if (--trace_buffered_event_ref)
2703                 return;
2704
2705         preempt_disable();
2706         /* For each CPU, set the buffer as used. */
2707         smp_call_function_many(tracing_buffer_mask,
2708                                disable_trace_buffered_event, NULL, 1);
2709         preempt_enable();
2710
2711         /* Wait for all current users to finish */
2712         synchronize_rcu();
2713
2714         for_each_tracing_cpu(cpu) {
2715                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2716                 per_cpu(trace_buffered_event, cpu) = NULL;
2717         }
2718         /*
2719          * Make sure trace_buffered_event is NULL before clearing
2720          * trace_buffered_event_cnt.
2721          */
2722         smp_wmb();
2723
2724         preempt_disable();
2725         /* Do the work on each cpu */
2726         smp_call_function_many(tracing_buffer_mask,
2727                                enable_trace_buffered_event, NULL, 1);
2728         preempt_enable();
2729 }
2730
2731 static struct trace_buffer *temp_buffer;
2732
2733 struct ring_buffer_event *
2734 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2735                           struct trace_event_file *trace_file,
2736                           int type, unsigned long len,
2737                           unsigned int trace_ctx)
2738 {
2739         struct ring_buffer_event *entry;
2740         struct trace_array *tr = trace_file->tr;
2741         int val;
2742
2743         *current_rb = tr->array_buffer.buffer;
2744
2745         if (!tr->no_filter_buffering_ref &&
2746             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2747             (entry = this_cpu_read(trace_buffered_event))) {
2748                 /* Try to use the per cpu buffer first */
2749                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2750                 if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2751                         trace_event_setup(entry, type, trace_ctx);
2752                         entry->array[0] = len;
2753                         return entry;
2754                 }
2755                 this_cpu_dec(trace_buffered_event_cnt);
2756         }
2757
2758         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2759                                             trace_ctx);
2760         /*
2761          * If tracing is off, but we have triggers enabled
2762          * we still need to look at the event data. Use the temp_buffer
2763          * to store the trace event for the trigger to use. It's recursive
2764          * safe and will not be recorded anywhere.
2765          */
2766         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2767                 *current_rb = temp_buffer;
2768                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2769                                                     trace_ctx);
2770         }
2771         return entry;
2772 }
2773 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2774
2775 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2776 static DEFINE_MUTEX(tracepoint_printk_mutex);
2777
2778 static void output_printk(struct trace_event_buffer *fbuffer)
2779 {
2780         struct trace_event_call *event_call;
2781         struct trace_event_file *file;
2782         struct trace_event *event;
2783         unsigned long flags;
2784         struct trace_iterator *iter = tracepoint_print_iter;
2785
2786         /* We should never get here if iter is NULL */
2787         if (WARN_ON_ONCE(!iter))
2788                 return;
2789
2790         event_call = fbuffer->trace_file->event_call;
2791         if (!event_call || !event_call->event.funcs ||
2792             !event_call->event.funcs->trace)
2793                 return;
2794
2795         file = fbuffer->trace_file;
2796         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2797             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2798              !filter_match_preds(file->filter, fbuffer->entry)))
2799                 return;
2800
2801         event = &fbuffer->trace_file->event_call->event;
2802
2803         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2804         trace_seq_init(&iter->seq);
2805         iter->ent = fbuffer->entry;
2806         event_call->event.funcs->trace(iter, 0, event);
2807         trace_seq_putc(&iter->seq, 0);
2808         printk("%s", iter->seq.buffer);
2809
2810         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2811 }
2812
2813 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2814                              void *buffer, size_t *lenp,
2815                              loff_t *ppos)
2816 {
2817         int save_tracepoint_printk;
2818         int ret;
2819
2820         mutex_lock(&tracepoint_printk_mutex);
2821         save_tracepoint_printk = tracepoint_printk;
2822
2823         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2824
2825         /*
2826          * This will force exiting early, as tracepoint_printk
2827          * is always zero when tracepoint_printk_iter is not allocated
2828          */
2829         if (!tracepoint_print_iter)
2830                 tracepoint_printk = 0;
2831
2832         if (save_tracepoint_printk == tracepoint_printk)
2833                 goto out;
2834
2835         if (tracepoint_printk)
2836                 static_key_enable(&tracepoint_printk_key.key);
2837         else
2838                 static_key_disable(&tracepoint_printk_key.key);
2839
2840  out:
2841         mutex_unlock(&tracepoint_printk_mutex);
2842
2843         return ret;
2844 }
2845
2846 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2847 {
2848         if (static_key_false(&tracepoint_printk_key.key))
2849                 output_printk(fbuffer);
2850
2851         if (static_branch_unlikely(&trace_event_exports_enabled))
2852                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2853         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2854                                     fbuffer->event, fbuffer->entry,
2855                                     fbuffer->trace_ctx, fbuffer->regs);
2856 }
2857 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2858
2859 /*
2860  * Skip 3:
2861  *
2862  *   trace_buffer_unlock_commit_regs()
2863  *   trace_event_buffer_commit()
2864  *   trace_event_raw_event_xxx()
2865  */
2866 # define STACK_SKIP 3
2867
2868 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2869                                      struct trace_buffer *buffer,
2870                                      struct ring_buffer_event *event,
2871                                      unsigned int trace_ctx,
2872                                      struct pt_regs *regs)
2873 {
2874         __buffer_unlock_commit(buffer, event);
2875
2876         /*
2877          * If regs is not set, then skip the necessary functions.
2878          * Note, we can still get here via blktrace, wakeup tracer
2879          * and mmiotrace, but that's ok if they lose a function or
2880          * two. They are not that meaningful.
2881          */
2882         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2883         ftrace_trace_userstack(tr, buffer, trace_ctx);
2884 }
2885
2886 /*
2887  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2888  */
2889 void
2890 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2891                                    struct ring_buffer_event *event)
2892 {
2893         __buffer_unlock_commit(buffer, event);
2894 }
2895
2896 void
2897 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2898                parent_ip, unsigned int trace_ctx)
2899 {
2900         struct trace_event_call *call = &event_function;
2901         struct trace_buffer *buffer = tr->array_buffer.buffer;
2902         struct ring_buffer_event *event;
2903         struct ftrace_entry *entry;
2904
2905         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2906                                             trace_ctx);
2907         if (!event)
2908                 return;
2909         entry   = ring_buffer_event_data(event);
2910         entry->ip                       = ip;
2911         entry->parent_ip                = parent_ip;
2912
2913         if (!call_filter_check_discard(call, entry, buffer, event)) {
2914                 if (static_branch_unlikely(&trace_function_exports_enabled))
2915                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916                 __buffer_unlock_commit(buffer, event);
2917         }
2918 }
2919
2920 #ifdef CONFIG_STACKTRACE
2921
2922 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2923 #define FTRACE_KSTACK_NESTING   4
2924
2925 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2926
2927 struct ftrace_stack {
2928         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2929 };
2930
2931
2932 struct ftrace_stacks {
2933         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2934 };
2935
2936 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2937 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2938
2939 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2940                                  unsigned int trace_ctx,
2941                                  int skip, struct pt_regs *regs)
2942 {
2943         struct trace_event_call *call = &event_kernel_stack;
2944         struct ring_buffer_event *event;
2945         unsigned int size, nr_entries;
2946         struct ftrace_stack *fstack;
2947         struct stack_entry *entry;
2948         int stackidx;
2949
2950         /*
2951          * Add one, for this function and the call to save_stack_trace()
2952          * If regs is set, then these functions will not be in the way.
2953          */
2954 #ifndef CONFIG_UNWINDER_ORC
2955         if (!regs)
2956                 skip++;
2957 #endif
2958
2959         preempt_disable_notrace();
2960
2961         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2962
2963         /* This should never happen. If it does, yell once and skip */
2964         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2965                 goto out;
2966
2967         /*
2968          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2969          * interrupt will either see the value pre increment or post
2970          * increment. If the interrupt happens pre increment it will have
2971          * restored the counter when it returns.  We just need a barrier to
2972          * keep gcc from moving things around.
2973          */
2974         barrier();
2975
2976         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2977         size = ARRAY_SIZE(fstack->calls);
2978
2979         if (regs) {
2980                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2981                                                    size, skip);
2982         } else {
2983                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2984         }
2985
2986         size = nr_entries * sizeof(unsigned long);
2987         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2988                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2989                                     trace_ctx);
2990         if (!event)
2991                 goto out;
2992         entry = ring_buffer_event_data(event);
2993
2994         memcpy(&entry->caller, fstack->calls, size);
2995         entry->size = nr_entries;
2996
2997         if (!call_filter_check_discard(call, entry, buffer, event))
2998                 __buffer_unlock_commit(buffer, event);
2999
3000  out:
3001         /* Again, don't let gcc optimize things here */
3002         barrier();
3003         __this_cpu_dec(ftrace_stack_reserve);
3004         preempt_enable_notrace();
3005
3006 }
3007
3008 static inline void ftrace_trace_stack(struct trace_array *tr,
3009                                       struct trace_buffer *buffer,
3010                                       unsigned int trace_ctx,
3011                                       int skip, struct pt_regs *regs)
3012 {
3013         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3014                 return;
3015
3016         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3017 }
3018
3019 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3020                    int skip)
3021 {
3022         struct trace_buffer *buffer = tr->array_buffer.buffer;
3023
3024         if (rcu_is_watching()) {
3025                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3026                 return;
3027         }
3028
3029         /*
3030          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3031          * but if the above rcu_is_watching() failed, then the NMI
3032          * triggered someplace critical, and rcu_irq_enter() should
3033          * not be called from NMI.
3034          */
3035         if (unlikely(in_nmi()))
3036                 return;
3037
3038         rcu_irq_enter_irqson();
3039         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3040         rcu_irq_exit_irqson();
3041 }
3042
3043 /**
3044  * trace_dump_stack - record a stack back trace in the trace buffer
3045  * @skip: Number of functions to skip (helper handlers)
3046  */
3047 void trace_dump_stack(int skip)
3048 {
3049         if (tracing_disabled || tracing_selftest_running)
3050                 return;
3051
3052 #ifndef CONFIG_UNWINDER_ORC
3053         /* Skip 1 to skip this function. */
3054         skip++;
3055 #endif
3056         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3057                              tracing_gen_ctx(), skip, NULL);
3058 }
3059 EXPORT_SYMBOL_GPL(trace_dump_stack);
3060
3061 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3062 static DEFINE_PER_CPU(int, user_stack_count);
3063
3064 static void
3065 ftrace_trace_userstack(struct trace_array *tr,
3066                        struct trace_buffer *buffer, unsigned int trace_ctx)
3067 {
3068         struct trace_event_call *call = &event_user_stack;
3069         struct ring_buffer_event *event;
3070         struct userstack_entry *entry;
3071
3072         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3073                 return;
3074
3075         /*
3076          * NMIs can not handle page faults, even with fix ups.
3077          * The save user stack can (and often does) fault.
3078          */
3079         if (unlikely(in_nmi()))
3080                 return;
3081
3082         /*
3083          * prevent recursion, since the user stack tracing may
3084          * trigger other kernel events.
3085          */
3086         preempt_disable();
3087         if (__this_cpu_read(user_stack_count))
3088                 goto out;
3089
3090         __this_cpu_inc(user_stack_count);
3091
3092         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3093                                             sizeof(*entry), trace_ctx);
3094         if (!event)
3095                 goto out_drop_count;
3096         entry   = ring_buffer_event_data(event);
3097
3098         entry->tgid             = current->tgid;
3099         memset(&entry->caller, 0, sizeof(entry->caller));
3100
3101         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3102         if (!call_filter_check_discard(call, entry, buffer, event))
3103                 __buffer_unlock_commit(buffer, event);
3104
3105  out_drop_count:
3106         __this_cpu_dec(user_stack_count);
3107  out:
3108         preempt_enable();
3109 }
3110 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3111 static void ftrace_trace_userstack(struct trace_array *tr,
3112                                    struct trace_buffer *buffer,
3113                                    unsigned int trace_ctx)
3114 {
3115 }
3116 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3117
3118 #endif /* CONFIG_STACKTRACE */
3119
3120 static inline void
3121 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3122                           unsigned long long delta)
3123 {
3124         entry->bottom_delta_ts = delta & U32_MAX;
3125         entry->top_delta_ts = (delta >> 32);
3126 }
3127
3128 void trace_last_func_repeats(struct trace_array *tr,
3129                              struct trace_func_repeats *last_info,
3130                              unsigned int trace_ctx)
3131 {
3132         struct trace_buffer *buffer = tr->array_buffer.buffer;
3133         struct func_repeats_entry *entry;
3134         struct ring_buffer_event *event;
3135         u64 delta;
3136
3137         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3138                                             sizeof(*entry), trace_ctx);
3139         if (!event)
3140                 return;
3141
3142         delta = ring_buffer_event_time_stamp(buffer, event) -
3143                 last_info->ts_last_call;
3144
3145         entry = ring_buffer_event_data(event);
3146         entry->ip = last_info->ip;
3147         entry->parent_ip = last_info->parent_ip;
3148         entry->count = last_info->count;
3149         func_repeats_set_delta_ts(entry, delta);
3150
3151         __buffer_unlock_commit(buffer, event);
3152 }
3153
3154 /* created for use with alloc_percpu */
3155 struct trace_buffer_struct {
3156         int nesting;
3157         char buffer[4][TRACE_BUF_SIZE];
3158 };
3159
3160 static struct trace_buffer_struct *trace_percpu_buffer;
3161
3162 /*
3163  * This allows for lockless recording.  If we're nested too deeply, then
3164  * this returns NULL.
3165  */
3166 static char *get_trace_buf(void)
3167 {
3168         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3169
3170         if (!buffer || buffer->nesting >= 4)
3171                 return NULL;
3172
3173         buffer->nesting++;
3174
3175         /* Interrupts must see nesting incremented before we use the buffer */
3176         barrier();
3177         return &buffer->buffer[buffer->nesting - 1][0];
3178 }
3179
3180 static void put_trace_buf(void)
3181 {
3182         /* Don't let the decrement of nesting leak before this */
3183         barrier();
3184         this_cpu_dec(trace_percpu_buffer->nesting);
3185 }
3186
3187 static int alloc_percpu_trace_buffer(void)
3188 {
3189         struct trace_buffer_struct *buffers;
3190
3191         if (trace_percpu_buffer)
3192                 return 0;
3193
3194         buffers = alloc_percpu(struct trace_buffer_struct);
3195         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3196                 return -ENOMEM;
3197
3198         trace_percpu_buffer = buffers;
3199         return 0;
3200 }
3201
3202 static int buffers_allocated;
3203
3204 void trace_printk_init_buffers(void)
3205 {
3206         if (buffers_allocated)
3207                 return;
3208
3209         if (alloc_percpu_trace_buffer())
3210                 return;
3211
3212         /* trace_printk() is for debug use only. Don't use it in production. */
3213
3214         pr_warn("\n");
3215         pr_warn("**********************************************************\n");
3216         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3217         pr_warn("**                                                      **\n");
3218         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3219         pr_warn("**                                                      **\n");
3220         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3221         pr_warn("** unsafe for production use.                           **\n");
3222         pr_warn("**                                                      **\n");
3223         pr_warn("** If you see this message and you are not debugging    **\n");
3224         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3225         pr_warn("**                                                      **\n");
3226         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3227         pr_warn("**********************************************************\n");
3228
3229         /* Expand the buffers to set size */
3230         tracing_update_buffers();
3231
3232         buffers_allocated = 1;
3233
3234         /*
3235          * trace_printk_init_buffers() can be called by modules.
3236          * If that happens, then we need to start cmdline recording
3237          * directly here. If the global_trace.buffer is already
3238          * allocated here, then this was called by module code.
3239          */
3240         if (global_trace.array_buffer.buffer)
3241                 tracing_start_cmdline_record();
3242 }
3243 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3244
3245 void trace_printk_start_comm(void)
3246 {
3247         /* Start tracing comms if trace printk is set */
3248         if (!buffers_allocated)
3249                 return;
3250         tracing_start_cmdline_record();
3251 }
3252
3253 static void trace_printk_start_stop_comm(int enabled)
3254 {
3255         if (!buffers_allocated)
3256                 return;
3257
3258         if (enabled)
3259                 tracing_start_cmdline_record();
3260         else
3261                 tracing_stop_cmdline_record();
3262 }
3263
3264 /**
3265  * trace_vbprintk - write binary msg to tracing buffer
3266  * @ip:    The address of the caller
3267  * @fmt:   The string format to write to the buffer
3268  * @args:  Arguments for @fmt
3269  */
3270 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3271 {
3272         struct trace_event_call *call = &event_bprint;
3273         struct ring_buffer_event *event;
3274         struct trace_buffer *buffer;
3275         struct trace_array *tr = &global_trace;
3276         struct bprint_entry *entry;
3277         unsigned int trace_ctx;
3278         char *tbuffer;
3279         int len = 0, size;
3280
3281         if (unlikely(tracing_selftest_running || tracing_disabled))
3282                 return 0;
3283
3284         /* Don't pollute graph traces with trace_vprintk internals */
3285         pause_graph_tracing();
3286
3287         trace_ctx = tracing_gen_ctx();
3288         preempt_disable_notrace();
3289
3290         tbuffer = get_trace_buf();
3291         if (!tbuffer) {
3292                 len = 0;
3293                 goto out_nobuffer;
3294         }
3295
3296         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3297
3298         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3299                 goto out_put;
3300
3301         size = sizeof(*entry) + sizeof(u32) * len;
3302         buffer = tr->array_buffer.buffer;
3303         ring_buffer_nest_start(buffer);
3304         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3305                                             trace_ctx);
3306         if (!event)
3307                 goto out;
3308         entry = ring_buffer_event_data(event);
3309         entry->ip                       = ip;
3310         entry->fmt                      = fmt;
3311
3312         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3313         if (!call_filter_check_discard(call, entry, buffer, event)) {
3314                 __buffer_unlock_commit(buffer, event);
3315                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3316         }
3317
3318 out:
3319         ring_buffer_nest_end(buffer);
3320 out_put:
3321         put_trace_buf();
3322
3323 out_nobuffer:
3324         preempt_enable_notrace();
3325         unpause_graph_tracing();
3326
3327         return len;
3328 }
3329 EXPORT_SYMBOL_GPL(trace_vbprintk);
3330
3331 __printf(3, 0)
3332 static int
3333 __trace_array_vprintk(struct trace_buffer *buffer,
3334                       unsigned long ip, const char *fmt, va_list args)
3335 {
3336         struct trace_event_call *call = &event_print;
3337         struct ring_buffer_event *event;
3338         int len = 0, size;
3339         struct print_entry *entry;
3340         unsigned int trace_ctx;
3341         char *tbuffer;
3342
3343         if (tracing_disabled || tracing_selftest_running)
3344                 return 0;
3345
3346         /* Don't pollute graph traces with trace_vprintk internals */
3347         pause_graph_tracing();
3348
3349         trace_ctx = tracing_gen_ctx();
3350         preempt_disable_notrace();
3351
3352
3353         tbuffer = get_trace_buf();
3354         if (!tbuffer) {
3355                 len = 0;
3356                 goto out_nobuffer;
3357         }
3358
3359         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3360
3361         size = sizeof(*entry) + len + 1;
3362         ring_buffer_nest_start(buffer);
3363         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3364                                             trace_ctx);
3365         if (!event)
3366                 goto out;
3367         entry = ring_buffer_event_data(event);
3368         entry->ip = ip;
3369
3370         memcpy(&entry->buf, tbuffer, len + 1);
3371         if (!call_filter_check_discard(call, entry, buffer, event)) {
3372                 __buffer_unlock_commit(buffer, event);
3373                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3374         }
3375
3376 out:
3377         ring_buffer_nest_end(buffer);
3378         put_trace_buf();
3379
3380 out_nobuffer:
3381         preempt_enable_notrace();
3382         unpause_graph_tracing();
3383
3384         return len;
3385 }
3386
3387 __printf(3, 0)
3388 int trace_array_vprintk(struct trace_array *tr,
3389                         unsigned long ip, const char *fmt, va_list args)
3390 {
3391         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3392 }
3393
3394 /**
3395  * trace_array_printk - Print a message to a specific instance
3396  * @tr: The instance trace_array descriptor
3397  * @ip: The instruction pointer that this is called from.
3398  * @fmt: The format to print (printf format)
3399  *
3400  * If a subsystem sets up its own instance, they have the right to
3401  * printk strings into their tracing instance buffer using this
3402  * function. Note, this function will not write into the top level
3403  * buffer (use trace_printk() for that), as writing into the top level
3404  * buffer should only have events that can be individually disabled.
3405  * trace_printk() is only used for debugging a kernel, and should not
3406  * be ever incorporated in normal use.
3407  *
3408  * trace_array_printk() can be used, as it will not add noise to the
3409  * top level tracing buffer.
3410  *
3411  * Note, trace_array_init_printk() must be called on @tr before this
3412  * can be used.
3413  */
3414 __printf(3, 0)
3415 int trace_array_printk(struct trace_array *tr,
3416                        unsigned long ip, const char *fmt, ...)
3417 {
3418         int ret;
3419         va_list ap;
3420
3421         if (!tr)
3422                 return -ENOENT;
3423
3424         /* This is only allowed for created instances */
3425         if (tr == &global_trace)
3426                 return 0;
3427
3428         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3429                 return 0;
3430
3431         va_start(ap, fmt);
3432         ret = trace_array_vprintk(tr, ip, fmt, ap);
3433         va_end(ap);
3434         return ret;
3435 }
3436 EXPORT_SYMBOL_GPL(trace_array_printk);
3437
3438 /**
3439  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3440  * @tr: The trace array to initialize the buffers for
3441  *
3442  * As trace_array_printk() only writes into instances, they are OK to
3443  * have in the kernel (unlike trace_printk()). This needs to be called
3444  * before trace_array_printk() can be used on a trace_array.
3445  */
3446 int trace_array_init_printk(struct trace_array *tr)
3447 {
3448         if (!tr)
3449                 return -ENOENT;
3450
3451         /* This is only allowed for created instances */
3452         if (tr == &global_trace)
3453                 return -EINVAL;
3454
3455         return alloc_percpu_trace_buffer();
3456 }
3457 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3458
3459 __printf(3, 4)
3460 int trace_array_printk_buf(struct trace_buffer *buffer,
3461                            unsigned long ip, const char *fmt, ...)
3462 {
3463         int ret;
3464         va_list ap;
3465
3466         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3467                 return 0;
3468
3469         va_start(ap, fmt);
3470         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3471         va_end(ap);
3472         return ret;
3473 }
3474
3475 __printf(2, 0)
3476 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3477 {
3478         return trace_array_vprintk(&global_trace, ip, fmt, args);
3479 }
3480 EXPORT_SYMBOL_GPL(trace_vprintk);
3481
3482 static void trace_iterator_increment(struct trace_iterator *iter)
3483 {
3484         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3485
3486         iter->idx++;
3487         if (buf_iter)
3488                 ring_buffer_iter_advance(buf_iter);
3489 }
3490
3491 static struct trace_entry *
3492 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3493                 unsigned long *lost_events)
3494 {
3495         struct ring_buffer_event *event;
3496         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3497
3498         if (buf_iter) {
3499                 event = ring_buffer_iter_peek(buf_iter, ts);
3500                 if (lost_events)
3501                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3502                                 (unsigned long)-1 : 0;
3503         } else {
3504                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3505                                          lost_events);
3506         }
3507
3508         if (event) {
3509                 iter->ent_size = ring_buffer_event_length(event);
3510                 return ring_buffer_event_data(event);
3511         }
3512         iter->ent_size = 0;
3513         return NULL;
3514 }
3515
3516 static struct trace_entry *
3517 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3518                   unsigned long *missing_events, u64 *ent_ts)
3519 {
3520         struct trace_buffer *buffer = iter->array_buffer->buffer;
3521         struct trace_entry *ent, *next = NULL;
3522         unsigned long lost_events = 0, next_lost = 0;
3523         int cpu_file = iter->cpu_file;
3524         u64 next_ts = 0, ts;
3525         int next_cpu = -1;
3526         int next_size = 0;
3527         int cpu;
3528
3529         /*
3530          * If we are in a per_cpu trace file, don't bother by iterating over
3531          * all cpu and peek directly.
3532          */
3533         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3534                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3535                         return NULL;
3536                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3537                 if (ent_cpu)
3538                         *ent_cpu = cpu_file;
3539
3540                 return ent;
3541         }
3542
3543         for_each_tracing_cpu(cpu) {
3544
3545                 if (ring_buffer_empty_cpu(buffer, cpu))
3546                         continue;
3547
3548                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3549
3550                 /*
3551                  * Pick the entry with the smallest timestamp:
3552                  */
3553                 if (ent && (!next || ts < next_ts)) {
3554                         next = ent;
3555                         next_cpu = cpu;
3556                         next_ts = ts;
3557                         next_lost = lost_events;
3558                         next_size = iter->ent_size;
3559                 }
3560         }
3561
3562         iter->ent_size = next_size;
3563
3564         if (ent_cpu)
3565                 *ent_cpu = next_cpu;
3566
3567         if (ent_ts)
3568                 *ent_ts = next_ts;
3569
3570         if (missing_events)
3571                 *missing_events = next_lost;
3572
3573         return next;
3574 }
3575
3576 #define STATIC_FMT_BUF_SIZE     128
3577 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3578
3579 static char *trace_iter_expand_format(struct trace_iterator *iter)
3580 {
3581         char *tmp;
3582
3583         if (iter->fmt == static_fmt_buf)
3584                 return NULL;
3585
3586         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3587                        GFP_KERNEL);
3588         if (tmp) {
3589                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3590                 iter->fmt = tmp;
3591         }
3592
3593         return tmp;
3594 }
3595
3596 /* Returns true if the string is safe to dereference from an event */
3597 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3598 {
3599         unsigned long addr = (unsigned long)str;
3600         struct trace_event *trace_event;
3601         struct trace_event_call *event;
3602
3603         /* OK if part of the event data */
3604         if ((addr >= (unsigned long)iter->ent) &&
3605             (addr < (unsigned long)iter->ent + iter->ent_size))
3606                 return true;
3607
3608         /* OK if part of the temp seq buffer */
3609         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3610             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3611                 return true;
3612
3613         /* Core rodata can not be freed */
3614         if (is_kernel_rodata(addr))
3615                 return true;
3616
3617         if (trace_is_tracepoint_string(str))
3618                 return true;
3619
3620         /*
3621          * Now this could be a module event, referencing core module
3622          * data, which is OK.
3623          */
3624         if (!iter->ent)
3625                 return false;
3626
3627         trace_event = ftrace_find_event(iter->ent->type);
3628         if (!trace_event)
3629                 return false;
3630
3631         event = container_of(trace_event, struct trace_event_call, event);
3632         if (!event->mod)
3633                 return false;
3634
3635         /* Would rather have rodata, but this will suffice */
3636         if (within_module_core(addr, event->mod))
3637                 return true;
3638
3639         return false;
3640 }
3641
3642 static const char *show_buffer(struct trace_seq *s)
3643 {
3644         struct seq_buf *seq = &s->seq;
3645
3646         seq_buf_terminate(seq);
3647
3648         return seq->buffer;
3649 }
3650
3651 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3652
3653 static int test_can_verify_check(const char *fmt, ...)
3654 {
3655         char buf[16];
3656         va_list ap;
3657         int ret;
3658
3659         /*
3660          * The verifier is dependent on vsnprintf() modifies the va_list
3661          * passed to it, where it is sent as a reference. Some architectures
3662          * (like x86_32) passes it by value, which means that vsnprintf()
3663          * does not modify the va_list passed to it, and the verifier
3664          * would then need to be able to understand all the values that
3665          * vsnprintf can use. If it is passed by value, then the verifier
3666          * is disabled.
3667          */
3668         va_start(ap, fmt);
3669         vsnprintf(buf, 16, "%d", ap);
3670         ret = va_arg(ap, int);
3671         va_end(ap);
3672
3673         return ret;
3674 }
3675
3676 static void test_can_verify(void)
3677 {
3678         if (!test_can_verify_check("%d %d", 0, 1)) {
3679                 pr_info("trace event string verifier disabled\n");
3680                 static_branch_inc(&trace_no_verify);
3681         }
3682 }
3683
3684 /**
3685  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3686  * @iter: The iterator that holds the seq buffer and the event being printed
3687  * @fmt: The format used to print the event
3688  * @ap: The va_list holding the data to print from @fmt.
3689  *
3690  * This writes the data into the @iter->seq buffer using the data from
3691  * @fmt and @ap. If the format has a %s, then the source of the string
3692  * is examined to make sure it is safe to print, otherwise it will
3693  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3694  * pointer.
3695  */
3696 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3697                          va_list ap)
3698 {
3699         const char *p = fmt;
3700         const char *str;
3701         int i, j;
3702
3703         if (WARN_ON_ONCE(!fmt))
3704                 return;
3705
3706         if (static_branch_unlikely(&trace_no_verify))
3707                 goto print;
3708
3709         /* Don't bother checking when doing a ftrace_dump() */
3710         if (iter->fmt == static_fmt_buf)
3711                 goto print;
3712
3713         while (*p) {
3714                 j = 0;
3715
3716                 /* We only care about %s and variants */
3717                 for (i = 0; p[i]; i++) {
3718                         if (i + 1 >= iter->fmt_size) {
3719                                 /*
3720                                  * If we can't expand the copy buffer,
3721                                  * just print it.
3722                                  */
3723                                 if (!trace_iter_expand_format(iter))
3724                                         goto print;
3725                         }
3726
3727                         if (p[i] == '\\' && p[i+1]) {
3728                                 i++;
3729                                 continue;
3730                         }
3731                         if (p[i] == '%') {
3732                                 /* Need to test cases like %08.*s */
3733                                 for (j = 1; p[i+j]; j++) {
3734                                         if (isdigit(p[i+j]) ||
3735                                             p[i+j] == '*' ||
3736                                             p[i+j] == '.')
3737                                                 continue;
3738                                         break;
3739                                 }
3740                                 if (p[i+j] == 's')
3741                                         break;
3742                         }
3743                         j = 0;
3744                 }
3745                 /* If no %s found then just print normally */
3746                 if (!p[i])
3747                         break;
3748
3749                 /* Copy up to the %s, and print that */
3750                 strncpy(iter->fmt, p, i);
3751                 iter->fmt[i] = '\0';
3752                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3753
3754                 /* The ap now points to the string data of the %s */
3755                 str = va_arg(ap, const char *);
3756
3757                 /*
3758                  * If you hit this warning, it is likely that the
3759                  * trace event in question used %s on a string that
3760                  * was saved at the time of the event, but may not be
3761                  * around when the trace is read. Use __string(),
3762                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3763                  * instead. See samples/trace_events/trace-events-sample.h
3764                  * for reference.
3765                  */
3766                 if (WARN_ONCE(!trace_safe_str(iter, str),
3767                               "fmt: '%s' current_buffer: '%s'",
3768                               fmt, show_buffer(&iter->seq))) {
3769                         int ret;
3770
3771                         /* Try to safely read the string */
3772                         ret = strncpy_from_kernel_nofault(iter->fmt, str,
3773                                                           iter->fmt_size);
3774                         if (ret < 0)
3775                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3776                         else
3777                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3778                                                  str, iter->fmt);
3779                         str = "[UNSAFE-MEMORY]";
3780                         strcpy(iter->fmt, "%s");
3781                 } else {
3782                         strncpy(iter->fmt, p + i, j + 1);
3783                         iter->fmt[j+1] = '\0';
3784                 }
3785                 trace_seq_printf(&iter->seq, iter->fmt, str);
3786
3787                 p += i + j + 1;
3788         }
3789  print:
3790         if (*p)
3791                 trace_seq_vprintf(&iter->seq, p, ap);
3792 }
3793
3794 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3795 {
3796         const char *p, *new_fmt;
3797         char *q;
3798
3799         if (WARN_ON_ONCE(!fmt))
3800                 return fmt;
3801
3802         if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3803                 return fmt;
3804
3805         p = fmt;
3806         new_fmt = q = iter->fmt;
3807         while (*p) {
3808                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3809                         if (!trace_iter_expand_format(iter))
3810                                 return fmt;
3811
3812                         q += iter->fmt - new_fmt;
3813                         new_fmt = iter->fmt;
3814                 }
3815
3816                 *q++ = *p++;
3817
3818                 /* Replace %p with %px */
3819                 if (p[-1] == '%') {
3820                         if (p[0] == '%') {
3821                                 *q++ = *p++;
3822                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3823                                 *q++ = *p++;
3824                                 *q++ = 'x';
3825                         }
3826                 }
3827         }
3828         *q = '\0';
3829
3830         return new_fmt;
3831 }
3832
3833 #define STATIC_TEMP_BUF_SIZE    128
3834 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3835
3836 /* Find the next real entry, without updating the iterator itself */
3837 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3838                                           int *ent_cpu, u64 *ent_ts)
3839 {
3840         /* __find_next_entry will reset ent_size */
3841         int ent_size = iter->ent_size;
3842         struct trace_entry *entry;
3843
3844         /*
3845          * If called from ftrace_dump(), then the iter->temp buffer
3846          * will be the static_temp_buf and not created from kmalloc.
3847          * If the entry size is greater than the buffer, we can
3848          * not save it. Just return NULL in that case. This is only
3849          * used to add markers when two consecutive events' time
3850          * stamps have a large delta. See trace_print_lat_context()
3851          */
3852         if (iter->temp == static_temp_buf &&
3853             STATIC_TEMP_BUF_SIZE < ent_size)
3854                 return NULL;
3855
3856         /*
3857          * The __find_next_entry() may call peek_next_entry(), which may
3858          * call ring_buffer_peek() that may make the contents of iter->ent
3859          * undefined. Need to copy iter->ent now.
3860          */
3861         if (iter->ent && iter->ent != iter->temp) {
3862                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3863                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3864                         void *temp;
3865                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3866                         if (!temp)
3867                                 return NULL;
3868                         kfree(iter->temp);
3869                         iter->temp = temp;
3870                         iter->temp_size = iter->ent_size;
3871                 }
3872                 memcpy(iter->temp, iter->ent, iter->ent_size);
3873                 iter->ent = iter->temp;
3874         }
3875         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3876         /* Put back the original ent_size */
3877         iter->ent_size = ent_size;
3878
3879         return entry;
3880 }
3881
3882 /* Find the next real entry, and increment the iterator to the next entry */
3883 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3884 {
3885         iter->ent = __find_next_entry(iter, &iter->cpu,
3886                                       &iter->lost_events, &iter->ts);
3887
3888         if (iter->ent)
3889                 trace_iterator_increment(iter);
3890
3891         return iter->ent ? iter : NULL;
3892 }
3893
3894 static void trace_consume(struct trace_iterator *iter)
3895 {
3896         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3897                             &iter->lost_events);
3898 }
3899
3900 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3901 {
3902         struct trace_iterator *iter = m->private;
3903         int i = (int)*pos;
3904         void *ent;
3905
3906         WARN_ON_ONCE(iter->leftover);
3907
3908         (*pos)++;
3909
3910         /* can't go backwards */
3911         if (iter->idx > i)
3912                 return NULL;
3913
3914         if (iter->idx < 0)
3915                 ent = trace_find_next_entry_inc(iter);
3916         else
3917                 ent = iter;
3918
3919         while (ent && iter->idx < i)
3920                 ent = trace_find_next_entry_inc(iter);
3921
3922         iter->pos = *pos;
3923
3924         return ent;
3925 }
3926
3927 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3928 {
3929         struct ring_buffer_iter *buf_iter;
3930         unsigned long entries = 0;
3931         u64 ts;
3932
3933         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3934
3935         buf_iter = trace_buffer_iter(iter, cpu);
3936         if (!buf_iter)
3937                 return;
3938
3939         ring_buffer_iter_reset(buf_iter);
3940
3941         /*
3942          * We could have the case with the max latency tracers
3943          * that a reset never took place on a cpu. This is evident
3944          * by the timestamp being before the start of the buffer.
3945          */
3946         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3947                 if (ts >= iter->array_buffer->time_start)
3948                         break;
3949                 entries++;
3950                 ring_buffer_iter_advance(buf_iter);
3951         }
3952
3953         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3954 }
3955
3956 /*
3957  * The current tracer is copied to avoid a global locking
3958  * all around.
3959  */
3960 static void *s_start(struct seq_file *m, loff_t *pos)
3961 {
3962         struct trace_iterator *iter = m->private;
3963         struct trace_array *tr = iter->tr;
3964         int cpu_file = iter->cpu_file;
3965         void *p = NULL;
3966         loff_t l = 0;
3967         int cpu;
3968
3969         /*
3970          * copy the tracer to avoid using a global lock all around.
3971          * iter->trace is a copy of current_trace, the pointer to the
3972          * name may be used instead of a strcmp(), as iter->trace->name
3973          * will point to the same string as current_trace->name.
3974          */
3975         mutex_lock(&trace_types_lock);
3976         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3977                 *iter->trace = *tr->current_trace;
3978         mutex_unlock(&trace_types_lock);
3979
3980 #ifdef CONFIG_TRACER_MAX_TRACE
3981         if (iter->snapshot && iter->trace->use_max_tr)
3982                 return ERR_PTR(-EBUSY);
3983 #endif
3984
3985         if (!iter->snapshot)
3986                 atomic_inc(&trace_record_taskinfo_disabled);
3987
3988         if (*pos != iter->pos) {
3989                 iter->ent = NULL;
3990                 iter->cpu = 0;
3991                 iter->idx = -1;
3992
3993                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3994                         for_each_tracing_cpu(cpu)
3995                                 tracing_iter_reset(iter, cpu);
3996                 } else
3997                         tracing_iter_reset(iter, cpu_file);
3998
3999                 iter->leftover = 0;
4000                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4001                         ;
4002
4003         } else {
4004                 /*
4005                  * If we overflowed the seq_file before, then we want
4006                  * to just reuse the trace_seq buffer again.
4007                  */
4008                 if (iter->leftover)
4009                         p = iter;
4010                 else {
4011                         l = *pos - 1;
4012                         p = s_next(m, p, &l);
4013                 }
4014         }
4015
4016         trace_event_read_lock();
4017         trace_access_lock(cpu_file);
4018         return p;
4019 }
4020
4021 static void s_stop(struct seq_file *m, void *p)
4022 {
4023         struct trace_iterator *iter = m->private;
4024
4025 #ifdef CONFIG_TRACER_MAX_TRACE
4026         if (iter->snapshot && iter->trace->use_max_tr)
4027                 return;
4028 #endif
4029
4030         if (!iter->snapshot)
4031                 atomic_dec(&trace_record_taskinfo_disabled);
4032
4033         trace_access_unlock(iter->cpu_file);
4034         trace_event_read_unlock();
4035 }
4036
4037 static void
4038 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4039                       unsigned long *entries, int cpu)
4040 {
4041         unsigned long count;
4042
4043         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4044         /*
4045          * If this buffer has skipped entries, then we hold all
4046          * entries for the trace and we need to ignore the
4047          * ones before the time stamp.
4048          */
4049         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4050                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4051                 /* total is the same as the entries */
4052                 *total = count;
4053         } else
4054                 *total = count +
4055                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4056         *entries = count;
4057 }
4058
4059 static void
4060 get_total_entries(struct array_buffer *buf,
4061                   unsigned long *total, unsigned long *entries)
4062 {
4063         unsigned long t, e;
4064         int cpu;
4065
4066         *total = 0;
4067         *entries = 0;
4068
4069         for_each_tracing_cpu(cpu) {
4070                 get_total_entries_cpu(buf, &t, &e, cpu);
4071                 *total += t;
4072                 *entries += e;
4073         }
4074 }
4075
4076 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4077 {
4078         unsigned long total, entries;
4079
4080         if (!tr)
4081                 tr = &global_trace;
4082
4083         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4084
4085         return entries;
4086 }
4087
4088 unsigned long trace_total_entries(struct trace_array *tr)
4089 {
4090         unsigned long total, entries;
4091
4092         if (!tr)
4093                 tr = &global_trace;
4094
4095         get_total_entries(&tr->array_buffer, &total, &entries);
4096
4097         return entries;
4098 }
4099
4100 static void print_lat_help_header(struct seq_file *m)
4101 {
4102         seq_puts(m, "#                    _------=> CPU#            \n"
4103                     "#                   / _-----=> irqs-off        \n"
4104                     "#                  | / _----=> need-resched    \n"
4105                     "#                  || / _---=> hardirq/softirq \n"
4106                     "#                  ||| / _--=> preempt-depth   \n"
4107                     "#                  |||| /     delay            \n"
4108                     "#  cmd     pid     ||||| time  |   caller      \n"
4109                     "#     \\   /        |||||  \\    |   /         \n");
4110 }
4111
4112 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4113 {
4114         unsigned long total;
4115         unsigned long entries;
4116
4117         get_total_entries(buf, &total, &entries);
4118         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4119                    entries, total, num_online_cpus());
4120         seq_puts(m, "#\n");
4121 }
4122
4123 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4124                                    unsigned int flags)
4125 {
4126         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4127
4128         print_event_info(buf, m);
4129
4130         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4131         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4132 }
4133
4134 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4135                                        unsigned int flags)
4136 {
4137         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4138         const char *space = "            ";
4139         int prec = tgid ? 12 : 2;
4140
4141         print_event_info(buf, m);
4142
4143         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4144         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4145         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4146         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4147         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4148         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4149         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4150 }
4151
4152 void
4153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4154 {
4155         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4156         struct array_buffer *buf = iter->array_buffer;
4157         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4158         struct tracer *type = iter->trace;
4159         unsigned long entries;
4160         unsigned long total;
4161         const char *name = "preemption";
4162
4163         name = type->name;
4164
4165         get_total_entries(buf, &total, &entries);
4166
4167         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4168                    name, UTS_RELEASE);
4169         seq_puts(m, "# -----------------------------------"
4170                  "---------------------------------\n");
4171         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4172                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4173                    nsecs_to_usecs(data->saved_latency),
4174                    entries,
4175                    total,
4176                    buf->cpu,
4177 #if defined(CONFIG_PREEMPT_NONE)
4178                    "server",
4179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4180                    "desktop",
4181 #elif defined(CONFIG_PREEMPT)
4182                    "preempt",
4183 #elif defined(CONFIG_PREEMPT_RT)
4184                    "preempt_rt",
4185 #else
4186                    "unknown",
4187 #endif
4188                    /* These are reserved for later use */
4189                    0, 0, 0, 0);
4190 #ifdef CONFIG_SMP
4191         seq_printf(m, " #P:%d)\n", num_online_cpus());
4192 #else
4193         seq_puts(m, ")\n");
4194 #endif
4195         seq_puts(m, "#    -----------------\n");
4196         seq_printf(m, "#    | task: %.16s-%d "
4197                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4198                    data->comm, data->pid,
4199                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4200                    data->policy, data->rt_priority);
4201         seq_puts(m, "#    -----------------\n");
4202
4203         if (data->critical_start) {
4204                 seq_puts(m, "#  => started at: ");
4205                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4206                 trace_print_seq(m, &iter->seq);
4207                 seq_puts(m, "\n#  => ended at:   ");
4208                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4209                 trace_print_seq(m, &iter->seq);
4210                 seq_puts(m, "\n#\n");
4211         }
4212
4213         seq_puts(m, "#\n");
4214 }
4215
4216 static void test_cpu_buff_start(struct trace_iterator *iter)
4217 {
4218         struct trace_seq *s = &iter->seq;
4219         struct trace_array *tr = iter->tr;
4220
4221         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4222                 return;
4223
4224         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4225                 return;
4226
4227         if (cpumask_available(iter->started) &&
4228             cpumask_test_cpu(iter->cpu, iter->started))
4229                 return;
4230
4231         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4232                 return;
4233
4234         if (cpumask_available(iter->started))
4235                 cpumask_set_cpu(iter->cpu, iter->started);
4236
4237         /* Don't print started cpu buffer for the first entry of the trace */
4238         if (iter->idx > 1)
4239                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4240                                 iter->cpu);
4241 }
4242
4243 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4244 {
4245         struct trace_array *tr = iter->tr;
4246         struct trace_seq *s = &iter->seq;
4247         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4248         struct trace_entry *entry;
4249         struct trace_event *event;
4250
4251         entry = iter->ent;
4252
4253         test_cpu_buff_start(iter);
4254
4255         event = ftrace_find_event(entry->type);
4256
4257         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4258                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4259                         trace_print_lat_context(iter);
4260                 else
4261                         trace_print_context(iter);
4262         }
4263
4264         if (trace_seq_has_overflowed(s))
4265                 return TRACE_TYPE_PARTIAL_LINE;
4266
4267         if (event)
4268                 return event->funcs->trace(iter, sym_flags, event);
4269
4270         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4271
4272         return trace_handle_return(s);
4273 }
4274
4275 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4276 {
4277         struct trace_array *tr = iter->tr;
4278         struct trace_seq *s = &iter->seq;
4279         struct trace_entry *entry;
4280         struct trace_event *event;
4281
4282         entry = iter->ent;
4283
4284         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4285                 trace_seq_printf(s, "%d %d %llu ",
4286                                  entry->pid, iter->cpu, iter->ts);
4287
4288         if (trace_seq_has_overflowed(s))
4289                 return TRACE_TYPE_PARTIAL_LINE;
4290
4291         event = ftrace_find_event(entry->type);
4292         if (event)
4293                 return event->funcs->raw(iter, 0, event);
4294
4295         trace_seq_printf(s, "%d ?\n", entry->type);
4296
4297         return trace_handle_return(s);
4298 }
4299
4300 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4301 {
4302         struct trace_array *tr = iter->tr;
4303         struct trace_seq *s = &iter->seq;
4304         unsigned char newline = '\n';
4305         struct trace_entry *entry;
4306         struct trace_event *event;
4307
4308         entry = iter->ent;
4309
4310         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4311                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4312                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4313                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4314                 if (trace_seq_has_overflowed(s))
4315                         return TRACE_TYPE_PARTIAL_LINE;
4316         }
4317
4318         event = ftrace_find_event(entry->type);
4319         if (event) {
4320                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4321                 if (ret != TRACE_TYPE_HANDLED)
4322                         return ret;
4323         }
4324
4325         SEQ_PUT_FIELD(s, newline);
4326
4327         return trace_handle_return(s);
4328 }
4329
4330 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4331 {
4332         struct trace_array *tr = iter->tr;
4333         struct trace_seq *s = &iter->seq;
4334         struct trace_entry *entry;
4335         struct trace_event *event;
4336
4337         entry = iter->ent;
4338
4339         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4340                 SEQ_PUT_FIELD(s, entry->pid);
4341                 SEQ_PUT_FIELD(s, iter->cpu);
4342                 SEQ_PUT_FIELD(s, iter->ts);
4343                 if (trace_seq_has_overflowed(s))
4344                         return TRACE_TYPE_PARTIAL_LINE;
4345         }
4346
4347         event = ftrace_find_event(entry->type);
4348         return event ? event->funcs->binary(iter, 0, event) :
4349                 TRACE_TYPE_HANDLED;
4350 }
4351
4352 int trace_empty(struct trace_iterator *iter)
4353 {
4354         struct ring_buffer_iter *buf_iter;
4355         int cpu;
4356
4357         /* If we are looking at one CPU buffer, only check that one */
4358         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4359                 cpu = iter->cpu_file;
4360                 buf_iter = trace_buffer_iter(iter, cpu);
4361                 if (buf_iter) {
4362                         if (!ring_buffer_iter_empty(buf_iter))
4363                                 return 0;
4364                 } else {
4365                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4366                                 return 0;
4367                 }
4368                 return 1;
4369         }
4370
4371         for_each_tracing_cpu(cpu) {
4372                 buf_iter = trace_buffer_iter(iter, cpu);
4373                 if (buf_iter) {
4374                         if (!ring_buffer_iter_empty(buf_iter))
4375                                 return 0;
4376                 } else {
4377                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4378                                 return 0;
4379                 }
4380         }
4381
4382         return 1;
4383 }
4384
4385 /*  Called with trace_event_read_lock() held. */
4386 enum print_line_t print_trace_line(struct trace_iterator *iter)
4387 {
4388         struct trace_array *tr = iter->tr;
4389         unsigned long trace_flags = tr->trace_flags;
4390         enum print_line_t ret;
4391
4392         if (iter->lost_events) {
4393                 if (iter->lost_events == (unsigned long)-1)
4394                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4395                                          iter->cpu);
4396                 else
4397                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4398                                          iter->cpu, iter->lost_events);
4399                 if (trace_seq_has_overflowed(&iter->seq))
4400                         return TRACE_TYPE_PARTIAL_LINE;
4401         }
4402
4403         if (iter->trace && iter->trace->print_line) {
4404                 ret = iter->trace->print_line(iter);
4405                 if (ret != TRACE_TYPE_UNHANDLED)
4406                         return ret;
4407         }
4408
4409         if (iter->ent->type == TRACE_BPUTS &&
4410                         trace_flags & TRACE_ITER_PRINTK &&
4411                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4412                 return trace_print_bputs_msg_only(iter);
4413
4414         if (iter->ent->type == TRACE_BPRINT &&
4415                         trace_flags & TRACE_ITER_PRINTK &&
4416                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4417                 return trace_print_bprintk_msg_only(iter);
4418
4419         if (iter->ent->type == TRACE_PRINT &&
4420                         trace_flags & TRACE_ITER_PRINTK &&
4421                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4422                 return trace_print_printk_msg_only(iter);
4423
4424         if (trace_flags & TRACE_ITER_BIN)
4425                 return print_bin_fmt(iter);
4426
4427         if (trace_flags & TRACE_ITER_HEX)
4428                 return print_hex_fmt(iter);
4429
4430         if (trace_flags & TRACE_ITER_RAW)
4431                 return print_raw_fmt(iter);
4432
4433         return print_trace_fmt(iter);
4434 }
4435
4436 void trace_latency_header(struct seq_file *m)
4437 {
4438         struct trace_iterator *iter = m->private;
4439         struct trace_array *tr = iter->tr;
4440
4441         /* print nothing if the buffers are empty */
4442         if (trace_empty(iter))
4443                 return;
4444
4445         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4446                 print_trace_header(m, iter);
4447
4448         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4449                 print_lat_help_header(m);
4450 }
4451
4452 void trace_default_header(struct seq_file *m)
4453 {
4454         struct trace_iterator *iter = m->private;
4455         struct trace_array *tr = iter->tr;
4456         unsigned long trace_flags = tr->trace_flags;
4457
4458         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4459                 return;
4460
4461         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4462                 /* print nothing if the buffers are empty */
4463                 if (trace_empty(iter))
4464                         return;
4465                 print_trace_header(m, iter);
4466                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4467                         print_lat_help_header(m);
4468         } else {
4469                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4470                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4471                                 print_func_help_header_irq(iter->array_buffer,
4472                                                            m, trace_flags);
4473                         else
4474                                 print_func_help_header(iter->array_buffer, m,
4475                                                        trace_flags);
4476                 }
4477         }
4478 }
4479
4480 static void test_ftrace_alive(struct seq_file *m)
4481 {
4482         if (!ftrace_is_dead())
4483                 return;
4484         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4485                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4486 }
4487
4488 #ifdef CONFIG_TRACER_MAX_TRACE
4489 static void show_snapshot_main_help(struct seq_file *m)
4490 {
4491         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4492                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4493                     "#                      Takes a snapshot of the main buffer.\n"
4494                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4495                     "#                      (Doesn't have to be '2' works with any number that\n"
4496                     "#                       is not a '0' or '1')\n");
4497 }
4498
4499 static void show_snapshot_percpu_help(struct seq_file *m)
4500 {
4501         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4502 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4503         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4504                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4505 #else
4506         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4507                     "#                     Must use main snapshot file to allocate.\n");
4508 #endif
4509         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4510                     "#                      (Doesn't have to be '2' works with any number that\n"
4511                     "#                       is not a '0' or '1')\n");
4512 }
4513
4514 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4515 {
4516         if (iter->tr->allocated_snapshot)
4517                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4518         else
4519                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4520
4521         seq_puts(m, "# Snapshot commands:\n");
4522         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4523                 show_snapshot_main_help(m);
4524         else
4525                 show_snapshot_percpu_help(m);
4526 }
4527 #else
4528 /* Should never be called */
4529 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4530 #endif
4531
4532 static int s_show(struct seq_file *m, void *v)
4533 {
4534         struct trace_iterator *iter = v;
4535         int ret;
4536
4537         if (iter->ent == NULL) {
4538                 if (iter->tr) {
4539                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4540                         seq_puts(m, "#\n");
4541                         test_ftrace_alive(m);
4542                 }
4543                 if (iter->snapshot && trace_empty(iter))
4544                         print_snapshot_help(m, iter);
4545                 else if (iter->trace && iter->trace->print_header)
4546                         iter->trace->print_header(m);
4547                 else
4548                         trace_default_header(m);
4549
4550         } else if (iter->leftover) {
4551                 /*
4552                  * If we filled the seq_file buffer earlier, we
4553                  * want to just show it now.
4554                  */
4555                 ret = trace_print_seq(m, &iter->seq);
4556
4557                 /* ret should this time be zero, but you never know */
4558                 iter->leftover = ret;
4559
4560         } else {
4561                 print_trace_line(iter);
4562                 ret = trace_print_seq(m, &iter->seq);
4563                 /*
4564                  * If we overflow the seq_file buffer, then it will
4565                  * ask us for this data again at start up.
4566                  * Use that instead.
4567                  *  ret is 0 if seq_file write succeeded.
4568                  *        -1 otherwise.
4569                  */
4570                 iter->leftover = ret;
4571         }
4572
4573         return 0;
4574 }
4575
4576 /*
4577  * Should be used after trace_array_get(), trace_types_lock
4578  * ensures that i_cdev was already initialized.
4579  */
4580 static inline int tracing_get_cpu(struct inode *inode)
4581 {
4582         if (inode->i_cdev) /* See trace_create_cpu_file() */
4583                 return (long)inode->i_cdev - 1;
4584         return RING_BUFFER_ALL_CPUS;
4585 }
4586
4587 static const struct seq_operations tracer_seq_ops = {
4588         .start          = s_start,
4589         .next           = s_next,
4590         .stop           = s_stop,
4591         .show           = s_show,
4592 };
4593
4594 static struct trace_iterator *
4595 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4596 {
4597         struct trace_array *tr = inode->i_private;
4598         struct trace_iterator *iter;
4599         int cpu;
4600
4601         if (tracing_disabled)
4602                 return ERR_PTR(-ENODEV);
4603
4604         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4605         if (!iter)
4606                 return ERR_PTR(-ENOMEM);
4607
4608         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4609                                     GFP_KERNEL);
4610         if (!iter->buffer_iter)
4611                 goto release;
4612
4613         /*
4614          * trace_find_next_entry() may need to save off iter->ent.
4615          * It will place it into the iter->temp buffer. As most
4616          * events are less than 128, allocate a buffer of that size.
4617          * If one is greater, then trace_find_next_entry() will
4618          * allocate a new buffer to adjust for the bigger iter->ent.
4619          * It's not critical if it fails to get allocated here.
4620          */
4621         iter->temp = kmalloc(128, GFP_KERNEL);
4622         if (iter->temp)
4623                 iter->temp_size = 128;
4624
4625         /*
4626          * trace_event_printf() may need to modify given format
4627          * string to replace %p with %px so that it shows real address
4628          * instead of hash value. However, that is only for the event
4629          * tracing, other tracer may not need. Defer the allocation
4630          * until it is needed.
4631          */
4632         iter->fmt = NULL;
4633         iter->fmt_size = 0;
4634
4635         /*
4636          * We make a copy of the current tracer to avoid concurrent
4637          * changes on it while we are reading.
4638          */
4639         mutex_lock(&trace_types_lock);
4640         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4641         if (!iter->trace)
4642                 goto fail;
4643
4644         *iter->trace = *tr->current_trace;
4645
4646         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4647                 goto fail;
4648
4649         iter->tr = tr;
4650
4651 #ifdef CONFIG_TRACER_MAX_TRACE
4652         /* Currently only the top directory has a snapshot */
4653         if (tr->current_trace->print_max || snapshot)
4654                 iter->array_buffer = &tr->max_buffer;
4655         else
4656 #endif
4657                 iter->array_buffer = &tr->array_buffer;
4658         iter->snapshot = snapshot;
4659         iter->pos = -1;
4660         iter->cpu_file = tracing_get_cpu(inode);
4661         mutex_init(&iter->mutex);
4662
4663         /* Notify the tracer early; before we stop tracing. */
4664         if (iter->trace->open)
4665                 iter->trace->open(iter);
4666
4667         /* Annotate start of buffers if we had overruns */
4668         if (ring_buffer_overruns(iter->array_buffer->buffer))
4669                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4670
4671         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4672         if (trace_clocks[tr->clock_id].in_ns)
4673                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4674
4675         /*
4676          * If pause-on-trace is enabled, then stop the trace while
4677          * dumping, unless this is the "snapshot" file
4678          */
4679         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4680                 tracing_stop_tr(tr);
4681
4682         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4683                 for_each_tracing_cpu(cpu) {
4684                         iter->buffer_iter[cpu] =
4685                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4686                                                          cpu, GFP_KERNEL);
4687                 }
4688                 ring_buffer_read_prepare_sync();
4689                 for_each_tracing_cpu(cpu) {
4690                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4691                         tracing_iter_reset(iter, cpu);
4692                 }
4693         } else {
4694                 cpu = iter->cpu_file;
4695                 iter->buffer_iter[cpu] =
4696                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4697                                                  cpu, GFP_KERNEL);
4698                 ring_buffer_read_prepare_sync();
4699                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4700                 tracing_iter_reset(iter, cpu);
4701         }
4702
4703         mutex_unlock(&trace_types_lock);
4704
4705         return iter;
4706
4707  fail:
4708         mutex_unlock(&trace_types_lock);
4709         kfree(iter->trace);
4710         kfree(iter->temp);
4711         kfree(iter->buffer_iter);
4712 release:
4713         seq_release_private(inode, file);
4714         return ERR_PTR(-ENOMEM);
4715 }
4716
4717 int tracing_open_generic(struct inode *inode, struct file *filp)
4718 {
4719         int ret;
4720
4721         ret = tracing_check_open_get_tr(NULL);
4722         if (ret)
4723                 return ret;
4724
4725         filp->private_data = inode->i_private;
4726         return 0;
4727 }
4728
4729 bool tracing_is_disabled(void)
4730 {
4731         return (tracing_disabled) ? true: false;
4732 }
4733
4734 /*
4735  * Open and update trace_array ref count.
4736  * Must have the current trace_array passed to it.
4737  */
4738 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4739 {
4740         struct trace_array *tr = inode->i_private;
4741         int ret;
4742
4743         ret = tracing_check_open_get_tr(tr);
4744         if (ret)
4745                 return ret;
4746
4747         filp->private_data = inode->i_private;
4748
4749         return 0;
4750 }
4751
4752 static int tracing_release(struct inode *inode, struct file *file)
4753 {
4754         struct trace_array *tr = inode->i_private;
4755         struct seq_file *m = file->private_data;
4756         struct trace_iterator *iter;
4757         int cpu;
4758
4759         if (!(file->f_mode & FMODE_READ)) {
4760                 trace_array_put(tr);
4761                 return 0;
4762         }
4763
4764         /* Writes do not use seq_file */
4765         iter = m->private;
4766         mutex_lock(&trace_types_lock);
4767
4768         for_each_tracing_cpu(cpu) {
4769                 if (iter->buffer_iter[cpu])
4770                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4771         }
4772
4773         if (iter->trace && iter->trace->close)
4774                 iter->trace->close(iter);
4775
4776         if (!iter->snapshot && tr->stop_count)
4777                 /* reenable tracing if it was previously enabled */
4778                 tracing_start_tr(tr);
4779
4780         __trace_array_put(tr);
4781
4782         mutex_unlock(&trace_types_lock);
4783
4784         mutex_destroy(&iter->mutex);
4785         free_cpumask_var(iter->started);
4786         kfree(iter->fmt);
4787         kfree(iter->temp);
4788         kfree(iter->trace);
4789         kfree(iter->buffer_iter);
4790         seq_release_private(inode, file);
4791
4792         return 0;
4793 }
4794
4795 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4796 {
4797         struct trace_array *tr = inode->i_private;
4798
4799         trace_array_put(tr);
4800         return 0;
4801 }
4802
4803 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4804 {
4805         struct trace_array *tr = inode->i_private;
4806
4807         trace_array_put(tr);
4808
4809         return single_release(inode, file);
4810 }
4811
4812 static int tracing_open(struct inode *inode, struct file *file)
4813 {
4814         struct trace_array *tr = inode->i_private;
4815         struct trace_iterator *iter;
4816         int ret;
4817
4818         ret = tracing_check_open_get_tr(tr);
4819         if (ret)
4820                 return ret;
4821
4822         /* If this file was open for write, then erase contents */
4823         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4824                 int cpu = tracing_get_cpu(inode);
4825                 struct array_buffer *trace_buf = &tr->array_buffer;
4826
4827 #ifdef CONFIG_TRACER_MAX_TRACE
4828                 if (tr->current_trace->print_max)
4829                         trace_buf = &tr->max_buffer;
4830 #endif
4831
4832                 if (cpu == RING_BUFFER_ALL_CPUS)
4833                         tracing_reset_online_cpus(trace_buf);
4834                 else
4835                         tracing_reset_cpu(trace_buf, cpu);
4836         }
4837
4838         if (file->f_mode & FMODE_READ) {
4839                 iter = __tracing_open(inode, file, false);
4840                 if (IS_ERR(iter))
4841                         ret = PTR_ERR(iter);
4842                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4843                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4844         }
4845
4846         if (ret < 0)
4847                 trace_array_put(tr);
4848
4849         return ret;
4850 }
4851
4852 /*
4853  * Some tracers are not suitable for instance buffers.
4854  * A tracer is always available for the global array (toplevel)
4855  * or if it explicitly states that it is.
4856  */
4857 static bool
4858 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4859 {
4860         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4861 }
4862
4863 /* Find the next tracer that this trace array may use */
4864 static struct tracer *
4865 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4866 {
4867         while (t && !trace_ok_for_array(t, tr))
4868                 t = t->next;
4869
4870         return t;
4871 }
4872
4873 static void *
4874 t_next(struct seq_file *m, void *v, loff_t *pos)
4875 {
4876         struct trace_array *tr = m->private;
4877         struct tracer *t = v;
4878
4879         (*pos)++;
4880
4881         if (t)
4882                 t = get_tracer_for_array(tr, t->next);
4883
4884         return t;
4885 }
4886
4887 static void *t_start(struct seq_file *m, loff_t *pos)
4888 {
4889         struct trace_array *tr = m->private;
4890         struct tracer *t;
4891         loff_t l = 0;
4892
4893         mutex_lock(&trace_types_lock);
4894
4895         t = get_tracer_for_array(tr, trace_types);
4896         for (; t && l < *pos; t = t_next(m, t, &l))
4897                         ;
4898
4899         return t;
4900 }
4901
4902 static void t_stop(struct seq_file *m, void *p)
4903 {
4904         mutex_unlock(&trace_types_lock);
4905 }
4906
4907 static int t_show(struct seq_file *m, void *v)
4908 {
4909         struct tracer *t = v;
4910
4911         if (!t)
4912                 return 0;
4913
4914         seq_puts(m, t->name);
4915         if (t->next)
4916                 seq_putc(m, ' ');
4917         else
4918                 seq_putc(m, '\n');
4919
4920         return 0;
4921 }
4922
4923 static const struct seq_operations show_traces_seq_ops = {
4924         .start          = t_start,
4925         .next           = t_next,
4926         .stop           = t_stop,
4927         .show           = t_show,
4928 };
4929
4930 static int show_traces_open(struct inode *inode, struct file *file)
4931 {
4932         struct trace_array *tr = inode->i_private;
4933         struct seq_file *m;
4934         int ret;
4935
4936         ret = tracing_check_open_get_tr(tr);
4937         if (ret)
4938                 return ret;
4939
4940         ret = seq_open(file, &show_traces_seq_ops);
4941         if (ret) {
4942                 trace_array_put(tr);
4943                 return ret;
4944         }
4945
4946         m = file->private_data;
4947         m->private = tr;
4948
4949         return 0;
4950 }
4951
4952 static int show_traces_release(struct inode *inode, struct file *file)
4953 {
4954         struct trace_array *tr = inode->i_private;
4955
4956         trace_array_put(tr);
4957         return seq_release(inode, file);
4958 }
4959
4960 static ssize_t
4961 tracing_write_stub(struct file *filp, const char __user *ubuf,
4962                    size_t count, loff_t *ppos)
4963 {
4964         return count;
4965 }
4966
4967 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4968 {
4969         int ret;
4970
4971         if (file->f_mode & FMODE_READ)
4972                 ret = seq_lseek(file, offset, whence);
4973         else
4974                 file->f_pos = ret = 0;
4975
4976         return ret;
4977 }
4978
4979 static const struct file_operations tracing_fops = {
4980         .open           = tracing_open,
4981         .read           = seq_read,
4982         .write          = tracing_write_stub,
4983         .llseek         = tracing_lseek,
4984         .release        = tracing_release,
4985 };
4986
4987 static const struct file_operations show_traces_fops = {
4988         .open           = show_traces_open,
4989         .read           = seq_read,
4990         .llseek         = seq_lseek,
4991         .release        = show_traces_release,
4992 };
4993
4994 static ssize_t
4995 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4996                      size_t count, loff_t *ppos)
4997 {
4998         struct trace_array *tr = file_inode(filp)->i_private;
4999         char *mask_str;
5000         int len;
5001
5002         len = snprintf(NULL, 0, "%*pb\n",
5003                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5004         mask_str = kmalloc(len, GFP_KERNEL);
5005         if (!mask_str)
5006                 return -ENOMEM;
5007
5008         len = snprintf(mask_str, len, "%*pb\n",
5009                        cpumask_pr_args(tr->tracing_cpumask));
5010         if (len >= count) {
5011                 count = -EINVAL;
5012                 goto out_err;
5013         }
5014         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5015
5016 out_err:
5017         kfree(mask_str);
5018
5019         return count;
5020 }
5021
5022 int tracing_set_cpumask(struct trace_array *tr,
5023                         cpumask_var_t tracing_cpumask_new)
5024 {
5025         int cpu;
5026
5027         if (!tr)
5028                 return -EINVAL;
5029
5030         local_irq_disable();
5031         arch_spin_lock(&tr->max_lock);
5032         for_each_tracing_cpu(cpu) {
5033                 /*
5034                  * Increase/decrease the disabled counter if we are
5035                  * about to flip a bit in the cpumask:
5036                  */
5037                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5038                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5039                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5040                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5041                 }
5042                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5043                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5044                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5045                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5046                 }
5047         }
5048         arch_spin_unlock(&tr->max_lock);
5049         local_irq_enable();
5050
5051         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5052
5053         return 0;
5054 }
5055
5056 static ssize_t
5057 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5058                       size_t count, loff_t *ppos)
5059 {
5060         struct trace_array *tr = file_inode(filp)->i_private;
5061         cpumask_var_t tracing_cpumask_new;
5062         int err;
5063
5064         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5065                 return -ENOMEM;
5066
5067         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5068         if (err)
5069                 goto err_free;
5070
5071         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5072         if (err)
5073                 goto err_free;
5074
5075         free_cpumask_var(tracing_cpumask_new);
5076
5077         return count;
5078
5079 err_free:
5080         free_cpumask_var(tracing_cpumask_new);
5081
5082         return err;
5083 }
5084
5085 static const struct file_operations tracing_cpumask_fops = {
5086         .open           = tracing_open_generic_tr,
5087         .read           = tracing_cpumask_read,
5088         .write          = tracing_cpumask_write,
5089         .release        = tracing_release_generic_tr,
5090         .llseek         = generic_file_llseek,
5091 };
5092
5093 static int tracing_trace_options_show(struct seq_file *m, void *v)
5094 {
5095         struct tracer_opt *trace_opts;
5096         struct trace_array *tr = m->private;
5097         u32 tracer_flags;
5098         int i;
5099
5100         mutex_lock(&trace_types_lock);
5101         tracer_flags = tr->current_trace->flags->val;
5102         trace_opts = tr->current_trace->flags->opts;
5103
5104         for (i = 0; trace_options[i]; i++) {
5105                 if (tr->trace_flags & (1 << i))
5106                         seq_printf(m, "%s\n", trace_options[i]);
5107                 else
5108                         seq_printf(m, "no%s\n", trace_options[i]);
5109         }
5110
5111         for (i = 0; trace_opts[i].name; i++) {
5112                 if (tracer_flags & trace_opts[i].bit)
5113                         seq_printf(m, "%s\n", trace_opts[i].name);
5114                 else
5115                         seq_printf(m, "no%s\n", trace_opts[i].name);
5116         }
5117         mutex_unlock(&trace_types_lock);
5118
5119         return 0;
5120 }
5121
5122 static int __set_tracer_option(struct trace_array *tr,
5123                                struct tracer_flags *tracer_flags,
5124                                struct tracer_opt *opts, int neg)
5125 {
5126         struct tracer *trace = tracer_flags->trace;
5127         int ret;
5128
5129         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5130         if (ret)
5131                 return ret;
5132
5133         if (neg)
5134                 tracer_flags->val &= ~opts->bit;
5135         else
5136                 tracer_flags->val |= opts->bit;
5137         return 0;
5138 }
5139
5140 /* Try to assign a tracer specific option */
5141 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5142 {
5143         struct tracer *trace = tr->current_trace;
5144         struct tracer_flags *tracer_flags = trace->flags;
5145         struct tracer_opt *opts = NULL;
5146         int i;
5147
5148         for (i = 0; tracer_flags->opts[i].name; i++) {
5149                 opts = &tracer_flags->opts[i];
5150
5151                 if (strcmp(cmp, opts->name) == 0)
5152                         return __set_tracer_option(tr, trace->flags, opts, neg);
5153         }
5154
5155         return -EINVAL;
5156 }
5157
5158 /* Some tracers require overwrite to stay enabled */
5159 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5160 {
5161         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5162                 return -1;
5163
5164         return 0;
5165 }
5166
5167 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5168 {
5169         if ((mask == TRACE_ITER_RECORD_TGID) ||
5170             (mask == TRACE_ITER_RECORD_CMD))
5171                 lockdep_assert_held(&event_mutex);
5172
5173         /* do nothing if flag is already set */
5174         if (!!(tr->trace_flags & mask) == !!enabled)
5175                 return 0;
5176
5177         /* Give the tracer a chance to approve the change */
5178         if (tr->current_trace->flag_changed)
5179                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5180                         return -EINVAL;
5181
5182         if (enabled)
5183                 tr->trace_flags |= mask;
5184         else
5185                 tr->trace_flags &= ~mask;
5186
5187         if (mask == TRACE_ITER_RECORD_CMD)
5188                 trace_event_enable_cmd_record(enabled);
5189
5190         if (mask == TRACE_ITER_RECORD_TGID) {
5191                 if (!tgid_map)
5192                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5193                                            sizeof(*tgid_map),
5194                                            GFP_KERNEL);
5195                 if (!tgid_map) {
5196                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5197                         return -ENOMEM;
5198                 }
5199
5200                 trace_event_enable_tgid_record(enabled);
5201         }
5202
5203         if (mask == TRACE_ITER_EVENT_FORK)
5204                 trace_event_follow_fork(tr, enabled);
5205
5206         if (mask == TRACE_ITER_FUNC_FORK)
5207                 ftrace_pid_follow_fork(tr, enabled);
5208
5209         if (mask == TRACE_ITER_OVERWRITE) {
5210                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5211 #ifdef CONFIG_TRACER_MAX_TRACE
5212                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5213 #endif
5214         }
5215
5216         if (mask == TRACE_ITER_PRINTK) {
5217                 trace_printk_start_stop_comm(enabled);
5218                 trace_printk_control(enabled);
5219         }
5220
5221         return 0;
5222 }
5223
5224 int trace_set_options(struct trace_array *tr, char *option)
5225 {
5226         char *cmp;
5227         int neg = 0;
5228         int ret;
5229         size_t orig_len = strlen(option);
5230         int len;
5231
5232         cmp = strstrip(option);
5233
5234         len = str_has_prefix(cmp, "no");
5235         if (len)
5236                 neg = 1;
5237
5238         cmp += len;
5239
5240         mutex_lock(&event_mutex);
5241         mutex_lock(&trace_types_lock);
5242
5243         ret = match_string(trace_options, -1, cmp);
5244         /* If no option could be set, test the specific tracer options */
5245         if (ret < 0)
5246                 ret = set_tracer_option(tr, cmp, neg);
5247         else
5248                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5249
5250         mutex_unlock(&trace_types_lock);
5251         mutex_unlock(&event_mutex);
5252
5253         /*
5254          * If the first trailing whitespace is replaced with '\0' by strstrip,
5255          * turn it back into a space.
5256          */
5257         if (orig_len > strlen(option))
5258                 option[strlen(option)] = ' ';
5259
5260         return ret;
5261 }
5262
5263 static void __init apply_trace_boot_options(void)
5264 {
5265         char *buf = trace_boot_options_buf;
5266         char *option;
5267
5268         while (true) {
5269                 option = strsep(&buf, ",");
5270
5271                 if (!option)
5272                         break;
5273
5274                 if (*option)
5275                         trace_set_options(&global_trace, option);
5276
5277                 /* Put back the comma to allow this to be called again */
5278                 if (buf)
5279                         *(buf - 1) = ',';
5280         }
5281 }
5282
5283 static ssize_t
5284 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5285                         size_t cnt, loff_t *ppos)
5286 {
5287         struct seq_file *m = filp->private_data;
5288         struct trace_array *tr = m->private;
5289         char buf[64];
5290         int ret;
5291
5292         if (cnt >= sizeof(buf))
5293                 return -EINVAL;
5294
5295         if (copy_from_user(buf, ubuf, cnt))
5296                 return -EFAULT;
5297
5298         buf[cnt] = 0;
5299
5300         ret = trace_set_options(tr, buf);
5301         if (ret < 0)
5302                 return ret;
5303
5304         *ppos += cnt;
5305
5306         return cnt;
5307 }
5308
5309 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5310 {
5311         struct trace_array *tr = inode->i_private;
5312         int ret;
5313
5314         ret = tracing_check_open_get_tr(tr);
5315         if (ret)
5316                 return ret;
5317
5318         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5319         if (ret < 0)
5320                 trace_array_put(tr);
5321
5322         return ret;
5323 }
5324
5325 static const struct file_operations tracing_iter_fops = {
5326         .open           = tracing_trace_options_open,
5327         .read           = seq_read,
5328         .llseek         = seq_lseek,
5329         .release        = tracing_single_release_tr,
5330         .write          = tracing_trace_options_write,
5331 };
5332
5333 static const char readme_msg[] =
5334         "tracing mini-HOWTO:\n\n"
5335         "# echo 0 > tracing_on : quick way to disable tracing\n"
5336         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5337         " Important files:\n"
5338         "  trace\t\t\t- The static contents of the buffer\n"
5339         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5340         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5341         "  current_tracer\t- function and latency tracers\n"
5342         "  available_tracers\t- list of configured tracers for current_tracer\n"
5343         "  error_log\t- error log for failed commands (that support it)\n"
5344         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5345         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5346         "  trace_clock\t\t-change the clock used to order events\n"
5347         "       local:   Per cpu clock but may not be synced across CPUs\n"
5348         "      global:   Synced across CPUs but slows tracing down.\n"
5349         "     counter:   Not a clock, but just an increment\n"
5350         "      uptime:   Jiffy counter from time of boot\n"
5351         "        perf:   Same clock that perf events use\n"
5352 #ifdef CONFIG_X86_64
5353         "     x86-tsc:   TSC cycle counter\n"
5354 #endif
5355         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5356         "       delta:   Delta difference against a buffer-wide timestamp\n"
5357         "    absolute:   Absolute (standalone) timestamp\n"
5358         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5359         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5360         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5361         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5362         "\t\t\t  Remove sub-buffer with rmdir\n"
5363         "  trace_options\t\t- Set format or modify how tracing happens\n"
5364         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5365         "\t\t\t  option name\n"
5366         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5367 #ifdef CONFIG_DYNAMIC_FTRACE
5368         "\n  available_filter_functions - list of functions that can be filtered on\n"
5369         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5370         "\t\t\t  functions\n"
5371         "\t     accepts: func_full_name or glob-matching-pattern\n"
5372         "\t     modules: Can select a group via module\n"
5373         "\t      Format: :mod:<module-name>\n"
5374         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5375         "\t    triggers: a command to perform when function is hit\n"
5376         "\t      Format: <function>:<trigger>[:count]\n"
5377         "\t     trigger: traceon, traceoff\n"
5378         "\t\t      enable_event:<system>:<event>\n"
5379         "\t\t      disable_event:<system>:<event>\n"
5380 #ifdef CONFIG_STACKTRACE
5381         "\t\t      stacktrace\n"
5382 #endif
5383 #ifdef CONFIG_TRACER_SNAPSHOT
5384         "\t\t      snapshot\n"
5385 #endif
5386         "\t\t      dump\n"
5387         "\t\t      cpudump\n"
5388         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5389         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5390         "\t     The first one will disable tracing every time do_fault is hit\n"
5391         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5392         "\t       The first time do trap is hit and it disables tracing, the\n"
5393         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5394         "\t       the counter will not decrement. It only decrements when the\n"
5395         "\t       trigger did work\n"
5396         "\t     To remove trigger without count:\n"
5397         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5398         "\t     To remove trigger with a count:\n"
5399         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5400         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5401         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5402         "\t    modules: Can select a group via module command :mod:\n"
5403         "\t    Does not accept triggers\n"
5404 #endif /* CONFIG_DYNAMIC_FTRACE */
5405 #ifdef CONFIG_FUNCTION_TRACER
5406         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5407         "\t\t    (function)\n"
5408         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5409         "\t\t    (function)\n"
5410 #endif
5411 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5412         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5413         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5414         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5415 #endif
5416 #ifdef CONFIG_TRACER_SNAPSHOT
5417         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5418         "\t\t\t  snapshot buffer. Read the contents for more\n"
5419         "\t\t\t  information\n"
5420 #endif
5421 #ifdef CONFIG_STACK_TRACER
5422         "  stack_trace\t\t- Shows the max stack trace when active\n"
5423         "  stack_max_size\t- Shows current max stack size that was traced\n"
5424         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5425         "\t\t\t  new trace)\n"
5426 #ifdef CONFIG_DYNAMIC_FTRACE
5427         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5428         "\t\t\t  traces\n"
5429 #endif
5430 #endif /* CONFIG_STACK_TRACER */
5431 #ifdef CONFIG_DYNAMIC_EVENTS
5432         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5433         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5434 #endif
5435 #ifdef CONFIG_KPROBE_EVENTS
5436         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5437         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5438 #endif
5439 #ifdef CONFIG_UPROBE_EVENTS
5440         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5441         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5442 #endif
5443 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5444         "\t  accepts: event-definitions (one definition per line)\n"
5445         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5446         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5447 #ifdef CONFIG_HIST_TRIGGERS
5448         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5449 #endif
5450         "\t           -:[<group>/]<event>\n"
5451 #ifdef CONFIG_KPROBE_EVENTS
5452         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5453   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5454 #endif
5455 #ifdef CONFIG_UPROBE_EVENTS
5456   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5457 #endif
5458         "\t     args: <name>=fetcharg[:type]\n"
5459         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5460 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5461         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5462 #else
5463         "\t           $stack<index>, $stack, $retval, $comm,\n"
5464 #endif
5465         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5466         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5467         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5468         "\t           <type>\\[<array-size>\\]\n"
5469 #ifdef CONFIG_HIST_TRIGGERS
5470         "\t    field: <stype> <name>;\n"
5471         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5472         "\t           [unsigned] char/int/long\n"
5473 #endif
5474 #endif
5475         "  events/\t\t- Directory containing all trace event subsystems:\n"
5476         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5477         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5478         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5479         "\t\t\t  events\n"
5480         "      filter\t\t- If set, only events passing filter are traced\n"
5481         "  events/<system>/<event>/\t- Directory containing control files for\n"
5482         "\t\t\t  <event>:\n"
5483         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5484         "      filter\t\t- If set, only events passing filter are traced\n"
5485         "      trigger\t\t- If set, a command to perform when event is hit\n"
5486         "\t    Format: <trigger>[:count][if <filter>]\n"
5487         "\t   trigger: traceon, traceoff\n"
5488         "\t            enable_event:<system>:<event>\n"
5489         "\t            disable_event:<system>:<event>\n"
5490 #ifdef CONFIG_HIST_TRIGGERS
5491         "\t            enable_hist:<system>:<event>\n"
5492         "\t            disable_hist:<system>:<event>\n"
5493 #endif
5494 #ifdef CONFIG_STACKTRACE
5495         "\t\t    stacktrace\n"
5496 #endif
5497 #ifdef CONFIG_TRACER_SNAPSHOT
5498         "\t\t    snapshot\n"
5499 #endif
5500 #ifdef CONFIG_HIST_TRIGGERS
5501         "\t\t    hist (see below)\n"
5502 #endif
5503         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5504         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5505         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5506         "\t                  events/block/block_unplug/trigger\n"
5507         "\t   The first disables tracing every time block_unplug is hit.\n"
5508         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5509         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5510         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5511         "\t   Like function triggers, the counter is only decremented if it\n"
5512         "\t    enabled or disabled tracing.\n"
5513         "\t   To remove a trigger without a count:\n"
5514         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5515         "\t   To remove a trigger with a count:\n"
5516         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5517         "\t   Filters can be ignored when removing a trigger.\n"
5518 #ifdef CONFIG_HIST_TRIGGERS
5519         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5520         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5521         "\t            [:values=<field1[,field2,...]>]\n"
5522         "\t            [:sort=<field1[,field2,...]>]\n"
5523         "\t            [:size=#entries]\n"
5524         "\t            [:pause][:continue][:clear]\n"
5525         "\t            [:name=histname1]\n"
5526         "\t            [:<handler>.<action>]\n"
5527         "\t            [if <filter>]\n\n"
5528         "\t    When a matching event is hit, an entry is added to a hash\n"
5529         "\t    table using the key(s) and value(s) named, and the value of a\n"
5530         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5531         "\t    correspond to fields in the event's format description.  Keys\n"
5532         "\t    can be any field, or the special string 'stacktrace'.\n"
5533         "\t    Compound keys consisting of up to two fields can be specified\n"
5534         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5535         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5536         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5537         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5538         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5539         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5540         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5541         "\t    its histogram data will be shared with other triggers of the\n"
5542         "\t    same name, and trigger hits will update this common data.\n\n"
5543         "\t    Reading the 'hist' file for the event will dump the hash\n"
5544         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5545         "\t    triggers attached to an event, there will be a table for each\n"
5546         "\t    trigger in the output.  The table displayed for a named\n"
5547         "\t    trigger will be the same as any other instance having the\n"
5548         "\t    same name.  The default format used to display a given field\n"
5549         "\t    can be modified by appending any of the following modifiers\n"
5550         "\t    to the field name, as applicable:\n\n"
5551         "\t            .hex        display a number as a hex value\n"
5552         "\t            .sym        display an address as a symbol\n"
5553         "\t            .sym-offset display an address as a symbol and offset\n"
5554         "\t            .execname   display a common_pid as a program name\n"
5555         "\t            .syscall    display a syscall id as a syscall name\n"
5556         "\t            .log2       display log2 value rather than raw number\n"
5557         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5558         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5559         "\t    trigger or to start a hist trigger but not log any events\n"
5560         "\t    until told to do so.  'continue' can be used to start or\n"
5561         "\t    restart a paused hist trigger.\n\n"
5562         "\t    The 'clear' parameter will clear the contents of a running\n"
5563         "\t    hist trigger and leave its current paused/active state\n"
5564         "\t    unchanged.\n\n"
5565         "\t    The enable_hist and disable_hist triggers can be used to\n"
5566         "\t    have one event conditionally start and stop another event's\n"
5567         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5568         "\t    the enable_event and disable_event triggers.\n\n"
5569         "\t    Hist trigger handlers and actions are executed whenever a\n"
5570         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5571         "\t        <handler>.<action>\n\n"
5572         "\t    The available handlers are:\n\n"
5573         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5574         "\t        onmax(var)               - invoke if var exceeds current max\n"
5575         "\t        onchange(var)            - invoke action if var changes\n\n"
5576         "\t    The available actions are:\n\n"
5577         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5578         "\t        save(field,...)                      - save current event fields\n"
5579 #ifdef CONFIG_TRACER_SNAPSHOT
5580         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5581 #endif
5582 #ifdef CONFIG_SYNTH_EVENTS
5583         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5584         "\t  Write into this file to define/undefine new synthetic events.\n"
5585         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5586 #endif
5587 #endif
5588 ;
5589
5590 static ssize_t
5591 tracing_readme_read(struct file *filp, char __user *ubuf,
5592                        size_t cnt, loff_t *ppos)
5593 {
5594         return simple_read_from_buffer(ubuf, cnt, ppos,
5595                                         readme_msg, strlen(readme_msg));
5596 }
5597
5598 static const struct file_operations tracing_readme_fops = {
5599         .open           = tracing_open_generic,
5600         .read           = tracing_readme_read,
5601         .llseek         = generic_file_llseek,
5602 };
5603
5604 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5605 {
5606         int *ptr = v;
5607
5608         if (*pos || m->count)
5609                 ptr++;
5610
5611         (*pos)++;
5612
5613         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5614                 if (trace_find_tgid(*ptr))
5615                         return ptr;
5616         }
5617
5618         return NULL;
5619 }
5620
5621 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5622 {
5623         void *v;
5624         loff_t l = 0;
5625
5626         if (!tgid_map)
5627                 return NULL;
5628
5629         v = &tgid_map[0];
5630         while (l <= *pos) {
5631                 v = saved_tgids_next(m, v, &l);
5632                 if (!v)
5633                         return NULL;
5634         }
5635
5636         return v;
5637 }
5638
5639 static void saved_tgids_stop(struct seq_file *m, void *v)
5640 {
5641 }
5642
5643 static int saved_tgids_show(struct seq_file *m, void *v)
5644 {
5645         int pid = (int *)v - tgid_map;
5646
5647         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5648         return 0;
5649 }
5650
5651 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5652         .start          = saved_tgids_start,
5653         .stop           = saved_tgids_stop,
5654         .next           = saved_tgids_next,
5655         .show           = saved_tgids_show,
5656 };
5657
5658 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5659 {
5660         int ret;
5661
5662         ret = tracing_check_open_get_tr(NULL);
5663         if (ret)
5664                 return ret;
5665
5666         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5667 }
5668
5669
5670 static const struct file_operations tracing_saved_tgids_fops = {
5671         .open           = tracing_saved_tgids_open,
5672         .read           = seq_read,
5673         .llseek         = seq_lseek,
5674         .release        = seq_release,
5675 };
5676
5677 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5678 {
5679         unsigned int *ptr = v;
5680
5681         if (*pos || m->count)
5682                 ptr++;
5683
5684         (*pos)++;
5685
5686         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5687              ptr++) {
5688                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5689                         continue;
5690
5691                 return ptr;
5692         }
5693
5694         return NULL;
5695 }
5696
5697 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5698 {
5699         void *v;
5700         loff_t l = 0;
5701
5702         preempt_disable();
5703         arch_spin_lock(&trace_cmdline_lock);
5704
5705         v = &savedcmd->map_cmdline_to_pid[0];
5706         while (l <= *pos) {
5707                 v = saved_cmdlines_next(m, v, &l);
5708                 if (!v)
5709                         return NULL;
5710         }
5711
5712         return v;
5713 }
5714
5715 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5716 {
5717         arch_spin_unlock(&trace_cmdline_lock);
5718         preempt_enable();
5719 }
5720
5721 static int saved_cmdlines_show(struct seq_file *m, void *v)
5722 {
5723         char buf[TASK_COMM_LEN];
5724         unsigned int *pid = v;
5725
5726         __trace_find_cmdline(*pid, buf);
5727         seq_printf(m, "%d %s\n", *pid, buf);
5728         return 0;
5729 }
5730
5731 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5732         .start          = saved_cmdlines_start,
5733         .next           = saved_cmdlines_next,
5734         .stop           = saved_cmdlines_stop,
5735         .show           = saved_cmdlines_show,
5736 };
5737
5738 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5739 {
5740         int ret;
5741
5742         ret = tracing_check_open_get_tr(NULL);
5743         if (ret)
5744                 return ret;
5745
5746         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5747 }
5748
5749 static const struct file_operations tracing_saved_cmdlines_fops = {
5750         .open           = tracing_saved_cmdlines_open,
5751         .read           = seq_read,
5752         .llseek         = seq_lseek,
5753         .release        = seq_release,
5754 };
5755
5756 static ssize_t
5757 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5758                                  size_t cnt, loff_t *ppos)
5759 {
5760         char buf[64];
5761         int r;
5762
5763         arch_spin_lock(&trace_cmdline_lock);
5764         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5765         arch_spin_unlock(&trace_cmdline_lock);
5766
5767         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5768 }
5769
5770 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5771 {
5772         kfree(s->saved_cmdlines);
5773         kfree(s->map_cmdline_to_pid);
5774         kfree(s);
5775 }
5776
5777 static int tracing_resize_saved_cmdlines(unsigned int val)
5778 {
5779         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5780
5781         s = kmalloc(sizeof(*s), GFP_KERNEL);
5782         if (!s)
5783                 return -ENOMEM;
5784
5785         if (allocate_cmdlines_buffer(val, s) < 0) {
5786                 kfree(s);
5787                 return -ENOMEM;
5788         }
5789
5790         arch_spin_lock(&trace_cmdline_lock);
5791         savedcmd_temp = savedcmd;
5792         savedcmd = s;
5793         arch_spin_unlock(&trace_cmdline_lock);
5794         free_saved_cmdlines_buffer(savedcmd_temp);
5795
5796         return 0;
5797 }
5798
5799 static ssize_t
5800 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5801                                   size_t cnt, loff_t *ppos)
5802 {
5803         unsigned long val;
5804         int ret;
5805
5806         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5807         if (ret)
5808                 return ret;
5809
5810         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5811         if (!val || val > PID_MAX_DEFAULT)
5812                 return -EINVAL;
5813
5814         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5815         if (ret < 0)
5816                 return ret;
5817
5818         *ppos += cnt;
5819
5820         return cnt;
5821 }
5822
5823 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5824         .open           = tracing_open_generic,
5825         .read           = tracing_saved_cmdlines_size_read,
5826         .write          = tracing_saved_cmdlines_size_write,
5827 };
5828
5829 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5830 static union trace_eval_map_item *
5831 update_eval_map(union trace_eval_map_item *ptr)
5832 {
5833         if (!ptr->map.eval_string) {
5834                 if (ptr->tail.next) {
5835                         ptr = ptr->tail.next;
5836                         /* Set ptr to the next real item (skip head) */
5837                         ptr++;
5838                 } else
5839                         return NULL;
5840         }
5841         return ptr;
5842 }
5843
5844 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5845 {
5846         union trace_eval_map_item *ptr = v;
5847
5848         /*
5849          * Paranoid! If ptr points to end, we don't want to increment past it.
5850          * This really should never happen.
5851          */
5852         (*pos)++;
5853         ptr = update_eval_map(ptr);
5854         if (WARN_ON_ONCE(!ptr))
5855                 return NULL;
5856
5857         ptr++;
5858         ptr = update_eval_map(ptr);
5859
5860         return ptr;
5861 }
5862
5863 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5864 {
5865         union trace_eval_map_item *v;
5866         loff_t l = 0;
5867
5868         mutex_lock(&trace_eval_mutex);
5869
5870         v = trace_eval_maps;
5871         if (v)
5872                 v++;
5873
5874         while (v && l < *pos) {
5875                 v = eval_map_next(m, v, &l);
5876         }
5877
5878         return v;
5879 }
5880
5881 static void eval_map_stop(struct seq_file *m, void *v)
5882 {
5883         mutex_unlock(&trace_eval_mutex);
5884 }
5885
5886 static int eval_map_show(struct seq_file *m, void *v)
5887 {
5888         union trace_eval_map_item *ptr = v;
5889
5890         seq_printf(m, "%s %ld (%s)\n",
5891                    ptr->map.eval_string, ptr->map.eval_value,
5892                    ptr->map.system);
5893
5894         return 0;
5895 }
5896
5897 static const struct seq_operations tracing_eval_map_seq_ops = {
5898         .start          = eval_map_start,
5899         .next           = eval_map_next,
5900         .stop           = eval_map_stop,
5901         .show           = eval_map_show,
5902 };
5903
5904 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5905 {
5906         int ret;
5907
5908         ret = tracing_check_open_get_tr(NULL);
5909         if (ret)
5910                 return ret;
5911
5912         return seq_open(filp, &tracing_eval_map_seq_ops);
5913 }
5914
5915 static const struct file_operations tracing_eval_map_fops = {
5916         .open           = tracing_eval_map_open,
5917         .read           = seq_read,
5918         .llseek         = seq_lseek,
5919         .release        = seq_release,
5920 };
5921
5922 static inline union trace_eval_map_item *
5923 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5924 {
5925         /* Return tail of array given the head */
5926         return ptr + ptr->head.length + 1;
5927 }
5928
5929 static void
5930 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5931                            int len)
5932 {
5933         struct trace_eval_map **stop;
5934         struct trace_eval_map **map;
5935         union trace_eval_map_item *map_array;
5936         union trace_eval_map_item *ptr;
5937
5938         stop = start + len;
5939
5940         /*
5941          * The trace_eval_maps contains the map plus a head and tail item,
5942          * where the head holds the module and length of array, and the
5943          * tail holds a pointer to the next list.
5944          */
5945         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5946         if (!map_array) {
5947                 pr_warn("Unable to allocate trace eval mapping\n");
5948                 return;
5949         }
5950
5951         mutex_lock(&trace_eval_mutex);
5952
5953         if (!trace_eval_maps)
5954                 trace_eval_maps = map_array;
5955         else {
5956                 ptr = trace_eval_maps;
5957                 for (;;) {
5958                         ptr = trace_eval_jmp_to_tail(ptr);
5959                         if (!ptr->tail.next)
5960                                 break;
5961                         ptr = ptr->tail.next;
5962
5963                 }
5964                 ptr->tail.next = map_array;
5965         }
5966         map_array->head.mod = mod;
5967         map_array->head.length = len;
5968         map_array++;
5969
5970         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5971                 map_array->map = **map;
5972                 map_array++;
5973         }
5974         memset(map_array, 0, sizeof(*map_array));
5975
5976         mutex_unlock(&trace_eval_mutex);
5977 }
5978
5979 static void trace_create_eval_file(struct dentry *d_tracer)
5980 {
5981         trace_create_file("eval_map", 0444, d_tracer,
5982                           NULL, &tracing_eval_map_fops);
5983 }
5984
5985 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5986 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5987 static inline void trace_insert_eval_map_file(struct module *mod,
5988                               struct trace_eval_map **start, int len) { }
5989 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5990
5991 static void trace_insert_eval_map(struct module *mod,
5992                                   struct trace_eval_map **start, int len)
5993 {
5994         struct trace_eval_map **map;
5995
5996         if (len <= 0)
5997                 return;
5998
5999         map = start;
6000
6001         trace_event_eval_update(map, len);
6002
6003         trace_insert_eval_map_file(mod, start, len);
6004 }
6005
6006 static ssize_t
6007 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6008                        size_t cnt, loff_t *ppos)
6009 {
6010         struct trace_array *tr = filp->private_data;
6011         char buf[MAX_TRACER_SIZE+2];
6012         int r;
6013
6014         mutex_lock(&trace_types_lock);
6015         r = sprintf(buf, "%s\n", tr->current_trace->name);
6016         mutex_unlock(&trace_types_lock);
6017
6018         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6019 }
6020
6021 int tracer_init(struct tracer *t, struct trace_array *tr)
6022 {
6023         tracing_reset_online_cpus(&tr->array_buffer);
6024         return t->init(tr);
6025 }
6026
6027 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6028 {
6029         int cpu;
6030
6031         for_each_tracing_cpu(cpu)
6032                 per_cpu_ptr(buf->data, cpu)->entries = val;
6033 }
6034
6035 #ifdef CONFIG_TRACER_MAX_TRACE
6036 /* resize @tr's buffer to the size of @size_tr's entries */
6037 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6038                                         struct array_buffer *size_buf, int cpu_id)
6039 {
6040         int cpu, ret = 0;
6041
6042         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6043                 for_each_tracing_cpu(cpu) {
6044                         ret = ring_buffer_resize(trace_buf->buffer,
6045                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6046                         if (ret < 0)
6047                                 break;
6048                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6049                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6050                 }
6051         } else {
6052                 ret = ring_buffer_resize(trace_buf->buffer,
6053                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6054                 if (ret == 0)
6055                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6056                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6057         }
6058
6059         return ret;
6060 }
6061 #endif /* CONFIG_TRACER_MAX_TRACE */
6062
6063 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6064                                         unsigned long size, int cpu)
6065 {
6066         int ret;
6067
6068         /*
6069          * If kernel or user changes the size of the ring buffer
6070          * we use the size that was given, and we can forget about
6071          * expanding it later.
6072          */
6073         ring_buffer_expanded = true;
6074
6075         /* May be called before buffers are initialized */
6076         if (!tr->array_buffer.buffer)
6077                 return 0;
6078
6079         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6080         if (ret < 0)
6081                 return ret;
6082
6083 #ifdef CONFIG_TRACER_MAX_TRACE
6084         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6085             !tr->current_trace->use_max_tr)
6086                 goto out;
6087
6088         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6089         if (ret < 0) {
6090                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6091                                                      &tr->array_buffer, cpu);
6092                 if (r < 0) {
6093                         /*
6094                          * AARGH! We are left with different
6095                          * size max buffer!!!!
6096                          * The max buffer is our "snapshot" buffer.
6097                          * When a tracer needs a snapshot (one of the
6098                          * latency tracers), it swaps the max buffer
6099                          * with the saved snap shot. We succeeded to
6100                          * update the size of the main buffer, but failed to
6101                          * update the size of the max buffer. But when we tried
6102                          * to reset the main buffer to the original size, we
6103                          * failed there too. This is very unlikely to
6104                          * happen, but if it does, warn and kill all
6105                          * tracing.
6106                          */
6107                         WARN_ON(1);
6108                         tracing_disabled = 1;
6109                 }
6110                 return ret;
6111         }
6112
6113         if (cpu == RING_BUFFER_ALL_CPUS)
6114                 set_buffer_entries(&tr->max_buffer, size);
6115         else
6116                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6117
6118  out:
6119 #endif /* CONFIG_TRACER_MAX_TRACE */
6120
6121         if (cpu == RING_BUFFER_ALL_CPUS)
6122                 set_buffer_entries(&tr->array_buffer, size);
6123         else
6124                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6125
6126         return ret;
6127 }
6128
6129 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6130                                   unsigned long size, int cpu_id)
6131 {
6132         int ret = size;
6133
6134         mutex_lock(&trace_types_lock);
6135
6136         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6137                 /* make sure, this cpu is enabled in the mask */
6138                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6139                         ret = -EINVAL;
6140                         goto out;
6141                 }
6142         }
6143
6144         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6145         if (ret < 0)
6146                 ret = -ENOMEM;
6147
6148 out:
6149         mutex_unlock(&trace_types_lock);
6150
6151         return ret;
6152 }
6153
6154
6155 /**
6156  * tracing_update_buffers - used by tracing facility to expand ring buffers
6157  *
6158  * To save on memory when the tracing is never used on a system with it
6159  * configured in. The ring buffers are set to a minimum size. But once
6160  * a user starts to use the tracing facility, then they need to grow
6161  * to their default size.
6162  *
6163  * This function is to be called when a tracer is about to be used.
6164  */
6165 int tracing_update_buffers(void)
6166 {
6167         int ret = 0;
6168
6169         mutex_lock(&trace_types_lock);
6170         if (!ring_buffer_expanded)
6171                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6172                                                 RING_BUFFER_ALL_CPUS);
6173         mutex_unlock(&trace_types_lock);
6174
6175         return ret;
6176 }
6177
6178 struct trace_option_dentry;
6179
6180 static void
6181 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6182
6183 /*
6184  * Used to clear out the tracer before deletion of an instance.
6185  * Must have trace_types_lock held.
6186  */
6187 static void tracing_set_nop(struct trace_array *tr)
6188 {
6189         if (tr->current_trace == &nop_trace)
6190                 return;
6191         
6192         tr->current_trace->enabled--;
6193
6194         if (tr->current_trace->reset)
6195                 tr->current_trace->reset(tr);
6196
6197         tr->current_trace = &nop_trace;
6198 }
6199
6200 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6201 {
6202         /* Only enable if the directory has been created already. */
6203         if (!tr->dir)
6204                 return;
6205
6206         create_trace_option_files(tr, t);
6207 }
6208
6209 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6210 {
6211         struct tracer *t;
6212 #ifdef CONFIG_TRACER_MAX_TRACE
6213         bool had_max_tr;
6214 #endif
6215         int ret = 0;
6216
6217         mutex_lock(&trace_types_lock);
6218
6219         if (!ring_buffer_expanded) {
6220                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6221                                                 RING_BUFFER_ALL_CPUS);
6222                 if (ret < 0)
6223                         goto out;
6224                 ret = 0;
6225         }
6226
6227         for (t = trace_types; t; t = t->next) {
6228                 if (strcmp(t->name, buf) == 0)
6229                         break;
6230         }
6231         if (!t) {
6232                 ret = -EINVAL;
6233                 goto out;
6234         }
6235         if (t == tr->current_trace)
6236                 goto out;
6237
6238 #ifdef CONFIG_TRACER_SNAPSHOT
6239         if (t->use_max_tr) {
6240                 arch_spin_lock(&tr->max_lock);
6241                 if (tr->cond_snapshot)
6242                         ret = -EBUSY;
6243                 arch_spin_unlock(&tr->max_lock);
6244                 if (ret)
6245                         goto out;
6246         }
6247 #endif
6248         /* Some tracers won't work on kernel command line */
6249         if (system_state < SYSTEM_RUNNING && t->noboot) {
6250                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6251                         t->name);
6252                 goto out;
6253         }
6254
6255         /* Some tracers are only allowed for the top level buffer */
6256         if (!trace_ok_for_array(t, tr)) {
6257                 ret = -EINVAL;
6258                 goto out;
6259         }
6260
6261         /* If trace pipe files are being read, we can't change the tracer */
6262         if (tr->trace_ref) {
6263                 ret = -EBUSY;
6264                 goto out;
6265         }
6266
6267         trace_branch_disable();
6268
6269         tr->current_trace->enabled--;
6270
6271         if (tr->current_trace->reset)
6272                 tr->current_trace->reset(tr);
6273
6274         /* Current trace needs to be nop_trace before synchronize_rcu */
6275         tr->current_trace = &nop_trace;
6276
6277 #ifdef CONFIG_TRACER_MAX_TRACE
6278         had_max_tr = tr->allocated_snapshot;
6279
6280         if (had_max_tr && !t->use_max_tr) {
6281                 /*
6282                  * We need to make sure that the update_max_tr sees that
6283                  * current_trace changed to nop_trace to keep it from
6284                  * swapping the buffers after we resize it.
6285                  * The update_max_tr is called from interrupts disabled
6286                  * so a synchronized_sched() is sufficient.
6287                  */
6288                 synchronize_rcu();
6289                 free_snapshot(tr);
6290         }
6291 #endif
6292
6293 #ifdef CONFIG_TRACER_MAX_TRACE
6294         if (t->use_max_tr && !had_max_tr) {
6295                 ret = tracing_alloc_snapshot_instance(tr);
6296                 if (ret < 0)
6297                         goto out;
6298         }
6299 #endif
6300
6301         if (t->init) {
6302                 ret = tracer_init(t, tr);
6303                 if (ret)
6304                         goto out;
6305         }
6306
6307         tr->current_trace = t;
6308         tr->current_trace->enabled++;
6309         trace_branch_enable(tr);
6310  out:
6311         mutex_unlock(&trace_types_lock);
6312
6313         return ret;
6314 }
6315
6316 static ssize_t
6317 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6318                         size_t cnt, loff_t *ppos)
6319 {
6320         struct trace_array *tr = filp->private_data;
6321         char buf[MAX_TRACER_SIZE+1];
6322         int i;
6323         size_t ret;
6324         int err;
6325
6326         ret = cnt;
6327
6328         if (cnt > MAX_TRACER_SIZE)
6329                 cnt = MAX_TRACER_SIZE;
6330
6331         if (copy_from_user(buf, ubuf, cnt))
6332                 return -EFAULT;
6333
6334         buf[cnt] = 0;
6335
6336         /* strip ending whitespace. */
6337         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6338                 buf[i] = 0;
6339
6340         err = tracing_set_tracer(tr, buf);
6341         if (err)
6342                 return err;
6343
6344         *ppos += ret;
6345
6346         return ret;
6347 }
6348
6349 static ssize_t
6350 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6351                    size_t cnt, loff_t *ppos)
6352 {
6353         char buf[64];
6354         int r;
6355
6356         r = snprintf(buf, sizeof(buf), "%ld\n",
6357                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6358         if (r > sizeof(buf))
6359                 r = sizeof(buf);
6360         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6361 }
6362
6363 static ssize_t
6364 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6365                     size_t cnt, loff_t *ppos)
6366 {
6367         unsigned long val;
6368         int ret;
6369
6370         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6371         if (ret)
6372                 return ret;
6373
6374         *ptr = val * 1000;
6375
6376         return cnt;
6377 }
6378
6379 static ssize_t
6380 tracing_thresh_read(struct file *filp, char __user *ubuf,
6381                     size_t cnt, loff_t *ppos)
6382 {
6383         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6384 }
6385
6386 static ssize_t
6387 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6388                      size_t cnt, loff_t *ppos)
6389 {
6390         struct trace_array *tr = filp->private_data;
6391         int ret;
6392
6393         mutex_lock(&trace_types_lock);
6394         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6395         if (ret < 0)
6396                 goto out;
6397
6398         if (tr->current_trace->update_thresh) {
6399                 ret = tr->current_trace->update_thresh(tr);
6400                 if (ret < 0)
6401                         goto out;
6402         }
6403
6404         ret = cnt;
6405 out:
6406         mutex_unlock(&trace_types_lock);
6407
6408         return ret;
6409 }
6410
6411 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6412
6413 static ssize_t
6414 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6415                      size_t cnt, loff_t *ppos)
6416 {
6417         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6418 }
6419
6420 static ssize_t
6421 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6422                       size_t cnt, loff_t *ppos)
6423 {
6424         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6425 }
6426
6427 #endif
6428
6429 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6430 {
6431         struct trace_array *tr = inode->i_private;
6432         struct trace_iterator *iter;
6433         int ret;
6434
6435         ret = tracing_check_open_get_tr(tr);
6436         if (ret)
6437                 return ret;
6438
6439         mutex_lock(&trace_types_lock);
6440
6441         /* create a buffer to store the information to pass to userspace */
6442         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6443         if (!iter) {
6444                 ret = -ENOMEM;
6445                 __trace_array_put(tr);
6446                 goto out;
6447         }
6448
6449         trace_seq_init(&iter->seq);
6450         iter->trace = tr->current_trace;
6451
6452         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6453                 ret = -ENOMEM;
6454                 goto fail;
6455         }
6456
6457         /* trace pipe does not show start of buffer */
6458         cpumask_setall(iter->started);
6459
6460         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6461                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6462
6463         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6464         if (trace_clocks[tr->clock_id].in_ns)
6465                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6466
6467         iter->tr = tr;
6468         iter->array_buffer = &tr->array_buffer;
6469         iter->cpu_file = tracing_get_cpu(inode);
6470         mutex_init(&iter->mutex);
6471         filp->private_data = iter;
6472
6473         if (iter->trace->pipe_open)
6474                 iter->trace->pipe_open(iter);
6475
6476         nonseekable_open(inode, filp);
6477
6478         tr->trace_ref++;
6479 out:
6480         mutex_unlock(&trace_types_lock);
6481         return ret;
6482
6483 fail:
6484         kfree(iter);
6485         __trace_array_put(tr);
6486         mutex_unlock(&trace_types_lock);
6487         return ret;
6488 }
6489
6490 static int tracing_release_pipe(struct inode *inode, struct file *file)
6491 {
6492         struct trace_iterator *iter = file->private_data;
6493         struct trace_array *tr = inode->i_private;
6494
6495         mutex_lock(&trace_types_lock);
6496
6497         tr->trace_ref--;
6498
6499         if (iter->trace->pipe_close)
6500                 iter->trace->pipe_close(iter);
6501
6502         mutex_unlock(&trace_types_lock);
6503
6504         free_cpumask_var(iter->started);
6505         mutex_destroy(&iter->mutex);
6506         kfree(iter);
6507
6508         trace_array_put(tr);
6509
6510         return 0;
6511 }
6512
6513 static __poll_t
6514 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6515 {
6516         struct trace_array *tr = iter->tr;
6517
6518         /* Iterators are static, they should be filled or empty */
6519         if (trace_buffer_iter(iter, iter->cpu_file))
6520                 return EPOLLIN | EPOLLRDNORM;
6521
6522         if (tr->trace_flags & TRACE_ITER_BLOCK)
6523                 /*
6524                  * Always select as readable when in blocking mode
6525                  */
6526                 return EPOLLIN | EPOLLRDNORM;
6527         else
6528                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6529                                              filp, poll_table);
6530 }
6531
6532 static __poll_t
6533 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6534 {
6535         struct trace_iterator *iter = filp->private_data;
6536
6537         return trace_poll(iter, filp, poll_table);
6538 }
6539
6540 /* Must be called with iter->mutex held. */
6541 static int tracing_wait_pipe(struct file *filp)
6542 {
6543         struct trace_iterator *iter = filp->private_data;
6544         int ret;
6545
6546         while (trace_empty(iter)) {
6547
6548                 if ((filp->f_flags & O_NONBLOCK)) {
6549                         return -EAGAIN;
6550                 }
6551
6552                 /*
6553                  * We block until we read something and tracing is disabled.
6554                  * We still block if tracing is disabled, but we have never
6555                  * read anything. This allows a user to cat this file, and
6556                  * then enable tracing. But after we have read something,
6557                  * we give an EOF when tracing is again disabled.
6558                  *
6559                  * iter->pos will be 0 if we haven't read anything.
6560                  */
6561                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6562                         break;
6563
6564                 mutex_unlock(&iter->mutex);
6565
6566                 ret = wait_on_pipe(iter, 0);
6567
6568                 mutex_lock(&iter->mutex);
6569
6570                 if (ret)
6571                         return ret;
6572         }
6573
6574         return 1;
6575 }
6576
6577 /*
6578  * Consumer reader.
6579  */
6580 static ssize_t
6581 tracing_read_pipe(struct file *filp, char __user *ubuf,
6582                   size_t cnt, loff_t *ppos)
6583 {
6584         struct trace_iterator *iter = filp->private_data;
6585         ssize_t sret;
6586
6587         /*
6588          * Avoid more than one consumer on a single file descriptor
6589          * This is just a matter of traces coherency, the ring buffer itself
6590          * is protected.
6591          */
6592         mutex_lock(&iter->mutex);
6593
6594         /* return any leftover data */
6595         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6596         if (sret != -EBUSY)
6597                 goto out;
6598
6599         trace_seq_init(&iter->seq);
6600
6601         if (iter->trace->read) {
6602                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6603                 if (sret)
6604                         goto out;
6605         }
6606
6607 waitagain:
6608         sret = tracing_wait_pipe(filp);
6609         if (sret <= 0)
6610                 goto out;
6611
6612         /* stop when tracing is finished */
6613         if (trace_empty(iter)) {
6614                 sret = 0;
6615                 goto out;
6616         }
6617
6618         if (cnt >= PAGE_SIZE)
6619                 cnt = PAGE_SIZE - 1;
6620
6621         /* reset all but tr, trace, and overruns */
6622         memset(&iter->seq, 0,
6623                sizeof(struct trace_iterator) -
6624                offsetof(struct trace_iterator, seq));
6625         cpumask_clear(iter->started);
6626         trace_seq_init(&iter->seq);
6627         iter->pos = -1;
6628
6629         trace_event_read_lock();
6630         trace_access_lock(iter->cpu_file);
6631         while (trace_find_next_entry_inc(iter) != NULL) {
6632                 enum print_line_t ret;
6633                 int save_len = iter->seq.seq.len;
6634
6635                 ret = print_trace_line(iter);
6636                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6637                         /* don't print partial lines */
6638                         iter->seq.seq.len = save_len;
6639                         break;
6640                 }
6641                 if (ret != TRACE_TYPE_NO_CONSUME)
6642                         trace_consume(iter);
6643
6644                 if (trace_seq_used(&iter->seq) >= cnt)
6645                         break;
6646
6647                 /*
6648                  * Setting the full flag means we reached the trace_seq buffer
6649                  * size and we should leave by partial output condition above.
6650                  * One of the trace_seq_* functions is not used properly.
6651                  */
6652                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6653                           iter->ent->type);
6654         }
6655         trace_access_unlock(iter->cpu_file);
6656         trace_event_read_unlock();
6657
6658         /* Now copy what we have to the user */
6659         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6660         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6661                 trace_seq_init(&iter->seq);
6662
6663         /*
6664          * If there was nothing to send to user, in spite of consuming trace
6665          * entries, go back to wait for more entries.
6666          */
6667         if (sret == -EBUSY)
6668                 goto waitagain;
6669
6670 out:
6671         mutex_unlock(&iter->mutex);
6672
6673         return sret;
6674 }
6675
6676 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6677                                      unsigned int idx)
6678 {
6679         __free_page(spd->pages[idx]);
6680 }
6681
6682 static size_t
6683 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6684 {
6685         size_t count;
6686         int save_len;
6687         int ret;
6688
6689         /* Seq buffer is page-sized, exactly what we need. */
6690         for (;;) {
6691                 save_len = iter->seq.seq.len;
6692                 ret = print_trace_line(iter);
6693
6694                 if (trace_seq_has_overflowed(&iter->seq)) {
6695                         iter->seq.seq.len = save_len;
6696                         break;
6697                 }
6698
6699                 /*
6700                  * This should not be hit, because it should only
6701                  * be set if the iter->seq overflowed. But check it
6702                  * anyway to be safe.
6703                  */
6704                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6705                         iter->seq.seq.len = save_len;
6706                         break;
6707                 }
6708
6709                 count = trace_seq_used(&iter->seq) - save_len;
6710                 if (rem < count) {
6711                         rem = 0;
6712                         iter->seq.seq.len = save_len;
6713                         break;
6714                 }
6715
6716                 if (ret != TRACE_TYPE_NO_CONSUME)
6717                         trace_consume(iter);
6718                 rem -= count;
6719                 if (!trace_find_next_entry_inc(iter))   {
6720                         rem = 0;
6721                         iter->ent = NULL;
6722                         break;
6723                 }
6724         }
6725
6726         return rem;
6727 }
6728
6729 static ssize_t tracing_splice_read_pipe(struct file *filp,
6730                                         loff_t *ppos,
6731                                         struct pipe_inode_info *pipe,
6732                                         size_t len,
6733                                         unsigned int flags)
6734 {
6735         struct page *pages_def[PIPE_DEF_BUFFERS];
6736         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6737         struct trace_iterator *iter = filp->private_data;
6738         struct splice_pipe_desc spd = {
6739                 .pages          = pages_def,
6740                 .partial        = partial_def,
6741                 .nr_pages       = 0, /* This gets updated below. */
6742                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6743                 .ops            = &default_pipe_buf_ops,
6744                 .spd_release    = tracing_spd_release_pipe,
6745         };
6746         ssize_t ret;
6747         size_t rem;
6748         unsigned int i;
6749
6750         if (splice_grow_spd(pipe, &spd))
6751                 return -ENOMEM;
6752
6753         mutex_lock(&iter->mutex);
6754
6755         if (iter->trace->splice_read) {
6756                 ret = iter->trace->splice_read(iter, filp,
6757                                                ppos, pipe, len, flags);
6758                 if (ret)
6759                         goto out_err;
6760         }
6761
6762         ret = tracing_wait_pipe(filp);
6763         if (ret <= 0)
6764                 goto out_err;
6765
6766         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6767                 ret = -EFAULT;
6768                 goto out_err;
6769         }
6770
6771         trace_event_read_lock();
6772         trace_access_lock(iter->cpu_file);
6773
6774         /* Fill as many pages as possible. */
6775         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6776                 spd.pages[i] = alloc_page(GFP_KERNEL);
6777                 if (!spd.pages[i])
6778                         break;
6779
6780                 rem = tracing_fill_pipe_page(rem, iter);
6781
6782                 /* Copy the data into the page, so we can start over. */
6783                 ret = trace_seq_to_buffer(&iter->seq,
6784                                           page_address(spd.pages[i]),
6785                                           trace_seq_used(&iter->seq));
6786                 if (ret < 0) {
6787                         __free_page(spd.pages[i]);
6788                         break;
6789                 }
6790                 spd.partial[i].offset = 0;
6791                 spd.partial[i].len = trace_seq_used(&iter->seq);
6792
6793                 trace_seq_init(&iter->seq);
6794         }
6795
6796         trace_access_unlock(iter->cpu_file);
6797         trace_event_read_unlock();
6798         mutex_unlock(&iter->mutex);
6799
6800         spd.nr_pages = i;
6801
6802         if (i)
6803                 ret = splice_to_pipe(pipe, &spd);
6804         else
6805                 ret = 0;
6806 out:
6807         splice_shrink_spd(&spd);
6808         return ret;
6809
6810 out_err:
6811         mutex_unlock(&iter->mutex);
6812         goto out;
6813 }
6814
6815 static ssize_t
6816 tracing_entries_read(struct file *filp, char __user *ubuf,
6817                      size_t cnt, loff_t *ppos)
6818 {
6819         struct inode *inode = file_inode(filp);
6820         struct trace_array *tr = inode->i_private;
6821         int cpu = tracing_get_cpu(inode);
6822         char buf[64];
6823         int r = 0;
6824         ssize_t ret;
6825
6826         mutex_lock(&trace_types_lock);
6827
6828         if (cpu == RING_BUFFER_ALL_CPUS) {
6829                 int cpu, buf_size_same;
6830                 unsigned long size;
6831
6832                 size = 0;
6833                 buf_size_same = 1;
6834                 /* check if all cpu sizes are same */
6835                 for_each_tracing_cpu(cpu) {
6836                         /* fill in the size from first enabled cpu */
6837                         if (size == 0)
6838                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6839                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6840                                 buf_size_same = 0;
6841                                 break;
6842                         }
6843                 }
6844
6845                 if (buf_size_same) {
6846                         if (!ring_buffer_expanded)
6847                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6848                                             size >> 10,
6849                                             trace_buf_size >> 10);
6850                         else
6851                                 r = sprintf(buf, "%lu\n", size >> 10);
6852                 } else
6853                         r = sprintf(buf, "X\n");
6854         } else
6855                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6856
6857         mutex_unlock(&trace_types_lock);
6858
6859         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6860         return ret;
6861 }
6862
6863 static ssize_t
6864 tracing_entries_write(struct file *filp, const char __user *ubuf,
6865                       size_t cnt, loff_t *ppos)
6866 {
6867         struct inode *inode = file_inode(filp);
6868         struct trace_array *tr = inode->i_private;
6869         unsigned long val;
6870         int ret;
6871
6872         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6873         if (ret)
6874                 return ret;
6875
6876         /* must have at least 1 entry */
6877         if (!val)
6878                 return -EINVAL;
6879
6880         /* value is in KB */
6881         val <<= 10;
6882         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6883         if (ret < 0)
6884                 return ret;
6885
6886         *ppos += cnt;
6887
6888         return cnt;
6889 }
6890
6891 static ssize_t
6892 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6893                                 size_t cnt, loff_t *ppos)
6894 {
6895         struct trace_array *tr = filp->private_data;
6896         char buf[64];
6897         int r, cpu;
6898         unsigned long size = 0, expanded_size = 0;
6899
6900         mutex_lock(&trace_types_lock);
6901         for_each_tracing_cpu(cpu) {
6902                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6903                 if (!ring_buffer_expanded)
6904                         expanded_size += trace_buf_size >> 10;
6905         }
6906         if (ring_buffer_expanded)
6907                 r = sprintf(buf, "%lu\n", size);
6908         else
6909                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6910         mutex_unlock(&trace_types_lock);
6911
6912         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6913 }
6914
6915 static ssize_t
6916 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6917                           size_t cnt, loff_t *ppos)
6918 {
6919         /*
6920          * There is no need to read what the user has written, this function
6921          * is just to make sure that there is no error when "echo" is used
6922          */
6923
6924         *ppos += cnt;
6925
6926         return cnt;
6927 }
6928
6929 static int
6930 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6931 {
6932         struct trace_array *tr = inode->i_private;
6933
6934         /* disable tracing ? */
6935         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6936                 tracer_tracing_off(tr);
6937         /* resize the ring buffer to 0 */
6938         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6939
6940         trace_array_put(tr);
6941
6942         return 0;
6943 }
6944
6945 static ssize_t
6946 tracing_mark_write(struct file *filp, const char __user *ubuf,
6947                                         size_t cnt, loff_t *fpos)
6948 {
6949         struct trace_array *tr = filp->private_data;
6950         struct ring_buffer_event *event;
6951         enum event_trigger_type tt = ETT_NONE;
6952         struct trace_buffer *buffer;
6953         struct print_entry *entry;
6954         ssize_t written;
6955         int size;
6956         int len;
6957
6958 /* Used in tracing_mark_raw_write() as well */
6959 #define FAULTED_STR "<faulted>"
6960 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6961
6962         if (tracing_disabled)
6963                 return -EINVAL;
6964
6965         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6966                 return -EINVAL;
6967
6968         if (cnt > TRACE_BUF_SIZE)
6969                 cnt = TRACE_BUF_SIZE;
6970
6971         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6972
6973         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6974
6975         /* If less than "<faulted>", then make sure we can still add that */
6976         if (cnt < FAULTED_SIZE)
6977                 size += FAULTED_SIZE - cnt;
6978
6979         buffer = tr->array_buffer.buffer;
6980         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6981                                             tracing_gen_ctx());
6982         if (unlikely(!event))
6983                 /* Ring buffer disabled, return as if not open for write */
6984                 return -EBADF;
6985
6986         entry = ring_buffer_event_data(event);
6987         entry->ip = _THIS_IP_;
6988
6989         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6990         if (len) {
6991                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6992                 cnt = FAULTED_SIZE;
6993                 written = -EFAULT;
6994         } else
6995                 written = cnt;
6996
6997         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6998                 /* do not add \n before testing triggers, but add \0 */
6999                 entry->buf[cnt] = '\0';
7000                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7001         }
7002
7003         if (entry->buf[cnt - 1] != '\n') {
7004                 entry->buf[cnt] = '\n';
7005                 entry->buf[cnt + 1] = '\0';
7006         } else
7007                 entry->buf[cnt] = '\0';
7008
7009         if (static_branch_unlikely(&trace_marker_exports_enabled))
7010                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7011         __buffer_unlock_commit(buffer, event);
7012
7013         if (tt)
7014                 event_triggers_post_call(tr->trace_marker_file, tt);
7015
7016         if (written > 0)
7017                 *fpos += written;
7018
7019         return written;
7020 }
7021
7022 /* Limit it for now to 3K (including tag) */
7023 #define RAW_DATA_MAX_SIZE (1024*3)
7024
7025 static ssize_t
7026 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7027                                         size_t cnt, loff_t *fpos)
7028 {
7029         struct trace_array *tr = filp->private_data;
7030         struct ring_buffer_event *event;
7031         struct trace_buffer *buffer;
7032         struct raw_data_entry *entry;
7033         ssize_t written;
7034         int size;
7035         int len;
7036
7037 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7038
7039         if (tracing_disabled)
7040                 return -EINVAL;
7041
7042         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7043                 return -EINVAL;
7044
7045         /* The marker must at least have a tag id */
7046         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7047                 return -EINVAL;
7048
7049         if (cnt > TRACE_BUF_SIZE)
7050                 cnt = TRACE_BUF_SIZE;
7051
7052         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7053
7054         size = sizeof(*entry) + cnt;
7055         if (cnt < FAULT_SIZE_ID)
7056                 size += FAULT_SIZE_ID - cnt;
7057
7058         buffer = tr->array_buffer.buffer;
7059         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7060                                             tracing_gen_ctx());
7061         if (!event)
7062                 /* Ring buffer disabled, return as if not open for write */
7063                 return -EBADF;
7064
7065         entry = ring_buffer_event_data(event);
7066
7067         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7068         if (len) {
7069                 entry->id = -1;
7070                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7071                 written = -EFAULT;
7072         } else
7073                 written = cnt;
7074
7075         __buffer_unlock_commit(buffer, event);
7076
7077         if (written > 0)
7078                 *fpos += written;
7079
7080         return written;
7081 }
7082
7083 static int tracing_clock_show(struct seq_file *m, void *v)
7084 {
7085         struct trace_array *tr = m->private;
7086         int i;
7087
7088         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7089                 seq_printf(m,
7090                         "%s%s%s%s", i ? " " : "",
7091                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7092                         i == tr->clock_id ? "]" : "");
7093         seq_putc(m, '\n');
7094
7095         return 0;
7096 }
7097
7098 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7099 {
7100         int i;
7101
7102         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7103                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7104                         break;
7105         }
7106         if (i == ARRAY_SIZE(trace_clocks))
7107                 return -EINVAL;
7108
7109         mutex_lock(&trace_types_lock);
7110
7111         tr->clock_id = i;
7112
7113         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7114
7115         /*
7116          * New clock may not be consistent with the previous clock.
7117          * Reset the buffer so that it doesn't have incomparable timestamps.
7118          */
7119         tracing_reset_online_cpus(&tr->array_buffer);
7120
7121 #ifdef CONFIG_TRACER_MAX_TRACE
7122         if (tr->max_buffer.buffer)
7123                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7124         tracing_reset_online_cpus(&tr->max_buffer);
7125 #endif
7126
7127         mutex_unlock(&trace_types_lock);
7128
7129         return 0;
7130 }
7131
7132 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7133                                    size_t cnt, loff_t *fpos)
7134 {
7135         struct seq_file *m = filp->private_data;
7136         struct trace_array *tr = m->private;
7137         char buf[64];
7138         const char *clockstr;
7139         int ret;
7140
7141         if (cnt >= sizeof(buf))
7142                 return -EINVAL;
7143
7144         if (copy_from_user(buf, ubuf, cnt))
7145                 return -EFAULT;
7146
7147         buf[cnt] = 0;
7148
7149         clockstr = strstrip(buf);
7150
7151         ret = tracing_set_clock(tr, clockstr);
7152         if (ret)
7153                 return ret;
7154
7155         *fpos += cnt;
7156
7157         return cnt;
7158 }
7159
7160 static int tracing_clock_open(struct inode *inode, struct file *file)
7161 {
7162         struct trace_array *tr = inode->i_private;
7163         int ret;
7164
7165         ret = tracing_check_open_get_tr(tr);
7166         if (ret)
7167                 return ret;
7168
7169         ret = single_open(file, tracing_clock_show, inode->i_private);
7170         if (ret < 0)
7171                 trace_array_put(tr);
7172
7173         return ret;
7174 }
7175
7176 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7177 {
7178         struct trace_array *tr = m->private;
7179
7180         mutex_lock(&trace_types_lock);
7181
7182         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7183                 seq_puts(m, "delta [absolute]\n");
7184         else
7185                 seq_puts(m, "[delta] absolute\n");
7186
7187         mutex_unlock(&trace_types_lock);
7188
7189         return 0;
7190 }
7191
7192 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7193 {
7194         struct trace_array *tr = inode->i_private;
7195         int ret;
7196
7197         ret = tracing_check_open_get_tr(tr);
7198         if (ret)
7199                 return ret;
7200
7201         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7202         if (ret < 0)
7203                 trace_array_put(tr);
7204
7205         return ret;
7206 }
7207
7208 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7209 {
7210         if (rbe == this_cpu_read(trace_buffered_event))
7211                 return ring_buffer_time_stamp(buffer);
7212
7213         return ring_buffer_event_time_stamp(buffer, rbe);
7214 }
7215
7216 /*
7217  * Set or disable using the per CPU trace_buffer_event when possible.
7218  */
7219 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7220 {
7221         int ret = 0;
7222
7223         mutex_lock(&trace_types_lock);
7224
7225         if (set && tr->no_filter_buffering_ref++)
7226                 goto out;
7227
7228         if (!set) {
7229                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7230                         ret = -EINVAL;
7231                         goto out;
7232                 }
7233
7234                 --tr->no_filter_buffering_ref;
7235         }
7236  out:
7237         mutex_unlock(&trace_types_lock);
7238
7239         return ret;
7240 }
7241
7242 struct ftrace_buffer_info {
7243         struct trace_iterator   iter;
7244         void                    *spare;
7245         unsigned int            spare_cpu;
7246         unsigned int            read;
7247 };
7248
7249 #ifdef CONFIG_TRACER_SNAPSHOT
7250 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7251 {
7252         struct trace_array *tr = inode->i_private;
7253         struct trace_iterator *iter;
7254         struct seq_file *m;
7255         int ret;
7256
7257         ret = tracing_check_open_get_tr(tr);
7258         if (ret)
7259                 return ret;
7260
7261         if (file->f_mode & FMODE_READ) {
7262                 iter = __tracing_open(inode, file, true);
7263                 if (IS_ERR(iter))
7264                         ret = PTR_ERR(iter);
7265         } else {
7266                 /* Writes still need the seq_file to hold the private data */
7267                 ret = -ENOMEM;
7268                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7269                 if (!m)
7270                         goto out;
7271                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7272                 if (!iter) {
7273                         kfree(m);
7274                         goto out;
7275                 }
7276                 ret = 0;
7277
7278                 iter->tr = tr;
7279                 iter->array_buffer = &tr->max_buffer;
7280                 iter->cpu_file = tracing_get_cpu(inode);
7281                 m->private = iter;
7282                 file->private_data = m;
7283         }
7284 out:
7285         if (ret < 0)
7286                 trace_array_put(tr);
7287
7288         return ret;
7289 }
7290
7291 static ssize_t
7292 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7293                        loff_t *ppos)
7294 {
7295         struct seq_file *m = filp->private_data;
7296         struct trace_iterator *iter = m->private;
7297         struct trace_array *tr = iter->tr;
7298         unsigned long val;
7299         int ret;
7300
7301         ret = tracing_update_buffers();
7302         if (ret < 0)
7303                 return ret;
7304
7305         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7306         if (ret)
7307                 return ret;
7308
7309         mutex_lock(&trace_types_lock);
7310
7311         if (tr->current_trace->use_max_tr) {
7312                 ret = -EBUSY;
7313                 goto out;
7314         }
7315
7316         arch_spin_lock(&tr->max_lock);
7317         if (tr->cond_snapshot)
7318                 ret = -EBUSY;
7319         arch_spin_unlock(&tr->max_lock);
7320         if (ret)
7321                 goto out;
7322
7323         switch (val) {
7324         case 0:
7325                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7326                         ret = -EINVAL;
7327                         break;
7328                 }
7329                 if (tr->allocated_snapshot)
7330                         free_snapshot(tr);
7331                 break;
7332         case 1:
7333 /* Only allow per-cpu swap if the ring buffer supports it */
7334 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7335                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7336                         ret = -EINVAL;
7337                         break;
7338                 }
7339 #endif
7340                 if (tr->allocated_snapshot)
7341                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7342                                         &tr->array_buffer, iter->cpu_file);
7343                 else
7344                         ret = tracing_alloc_snapshot_instance(tr);
7345                 if (ret < 0)
7346                         break;
7347                 local_irq_disable();
7348                 /* Now, we're going to swap */
7349                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7350                         update_max_tr(tr, current, smp_processor_id(), NULL);
7351                 else
7352                         update_max_tr_single(tr, current, iter->cpu_file);
7353                 local_irq_enable();
7354                 break;
7355         default:
7356                 if (tr->allocated_snapshot) {
7357                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7358                                 tracing_reset_online_cpus(&tr->max_buffer);
7359                         else
7360                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7361                 }
7362                 break;
7363         }
7364
7365         if (ret >= 0) {
7366                 *ppos += cnt;
7367                 ret = cnt;
7368         }
7369 out:
7370         mutex_unlock(&trace_types_lock);
7371         return ret;
7372 }
7373
7374 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7375 {
7376         struct seq_file *m = file->private_data;
7377         int ret;
7378
7379         ret = tracing_release(inode, file);
7380
7381         if (file->f_mode & FMODE_READ)
7382                 return ret;
7383
7384         /* If write only, the seq_file is just a stub */
7385         if (m)
7386                 kfree(m->private);
7387         kfree(m);
7388
7389         return 0;
7390 }
7391
7392 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7393 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7394                                     size_t count, loff_t *ppos);
7395 static int tracing_buffers_release(struct inode *inode, struct file *file);
7396 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7397                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7398
7399 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7400 {
7401         struct ftrace_buffer_info *info;
7402         int ret;
7403
7404         /* The following checks for tracefs lockdown */
7405         ret = tracing_buffers_open(inode, filp);
7406         if (ret < 0)
7407                 return ret;
7408
7409         info = filp->private_data;
7410
7411         if (info->iter.trace->use_max_tr) {
7412                 tracing_buffers_release(inode, filp);
7413                 return -EBUSY;
7414         }
7415
7416         info->iter.snapshot = true;
7417         info->iter.array_buffer = &info->iter.tr->max_buffer;
7418
7419         return ret;
7420 }
7421
7422 #endif /* CONFIG_TRACER_SNAPSHOT */
7423
7424
7425 static const struct file_operations tracing_thresh_fops = {
7426         .open           = tracing_open_generic,
7427         .read           = tracing_thresh_read,
7428         .write          = tracing_thresh_write,
7429         .llseek         = generic_file_llseek,
7430 };
7431
7432 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7433 static const struct file_operations tracing_max_lat_fops = {
7434         .open           = tracing_open_generic,
7435         .read           = tracing_max_lat_read,
7436         .write          = tracing_max_lat_write,
7437         .llseek         = generic_file_llseek,
7438 };
7439 #endif
7440
7441 static const struct file_operations set_tracer_fops = {
7442         .open           = tracing_open_generic,
7443         .read           = tracing_set_trace_read,
7444         .write          = tracing_set_trace_write,
7445         .llseek         = generic_file_llseek,
7446 };
7447
7448 static const struct file_operations tracing_pipe_fops = {
7449         .open           = tracing_open_pipe,
7450         .poll           = tracing_poll_pipe,
7451         .read           = tracing_read_pipe,
7452         .splice_read    = tracing_splice_read_pipe,
7453         .release        = tracing_release_pipe,
7454         .llseek         = no_llseek,
7455 };
7456
7457 static const struct file_operations tracing_entries_fops = {
7458         .open           = tracing_open_generic_tr,
7459         .read           = tracing_entries_read,
7460         .write          = tracing_entries_write,
7461         .llseek         = generic_file_llseek,
7462         .release        = tracing_release_generic_tr,
7463 };
7464
7465 static const struct file_operations tracing_total_entries_fops = {
7466         .open           = tracing_open_generic_tr,
7467         .read           = tracing_total_entries_read,
7468         .llseek         = generic_file_llseek,
7469         .release        = tracing_release_generic_tr,
7470 };
7471
7472 static const struct file_operations tracing_free_buffer_fops = {
7473         .open           = tracing_open_generic_tr,
7474         .write          = tracing_free_buffer_write,
7475         .release        = tracing_free_buffer_release,
7476 };
7477
7478 static const struct file_operations tracing_mark_fops = {
7479         .open           = tracing_open_generic_tr,
7480         .write          = tracing_mark_write,
7481         .llseek         = generic_file_llseek,
7482         .release        = tracing_release_generic_tr,
7483 };
7484
7485 static const struct file_operations tracing_mark_raw_fops = {
7486         .open           = tracing_open_generic_tr,
7487         .write          = tracing_mark_raw_write,
7488         .llseek         = generic_file_llseek,
7489         .release        = tracing_release_generic_tr,
7490 };
7491
7492 static const struct file_operations trace_clock_fops = {
7493         .open           = tracing_clock_open,
7494         .read           = seq_read,
7495         .llseek         = seq_lseek,
7496         .release        = tracing_single_release_tr,
7497         .write          = tracing_clock_write,
7498 };
7499
7500 static const struct file_operations trace_time_stamp_mode_fops = {
7501         .open           = tracing_time_stamp_mode_open,
7502         .read           = seq_read,
7503         .llseek         = seq_lseek,
7504         .release        = tracing_single_release_tr,
7505 };
7506
7507 #ifdef CONFIG_TRACER_SNAPSHOT
7508 static const struct file_operations snapshot_fops = {
7509         .open           = tracing_snapshot_open,
7510         .read           = seq_read,
7511         .write          = tracing_snapshot_write,
7512         .llseek         = tracing_lseek,
7513         .release        = tracing_snapshot_release,
7514 };
7515
7516 static const struct file_operations snapshot_raw_fops = {
7517         .open           = snapshot_raw_open,
7518         .read           = tracing_buffers_read,
7519         .release        = tracing_buffers_release,
7520         .splice_read    = tracing_buffers_splice_read,
7521         .llseek         = no_llseek,
7522 };
7523
7524 #endif /* CONFIG_TRACER_SNAPSHOT */
7525
7526 #define TRACING_LOG_ERRS_MAX    8
7527 #define TRACING_LOG_LOC_MAX     128
7528
7529 #define CMD_PREFIX "  Command: "
7530
7531 struct err_info {
7532         const char      **errs; /* ptr to loc-specific array of err strings */
7533         u8              type;   /* index into errs -> specific err string */
7534         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7535         u64             ts;
7536 };
7537
7538 struct tracing_log_err {
7539         struct list_head        list;
7540         struct err_info         info;
7541         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7542         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7543 };
7544
7545 static DEFINE_MUTEX(tracing_err_log_lock);
7546
7547 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7548 {
7549         struct tracing_log_err *err;
7550
7551         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7552                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7553                 if (!err)
7554                         err = ERR_PTR(-ENOMEM);
7555                 tr->n_err_log_entries++;
7556
7557                 return err;
7558         }
7559
7560         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7561         list_del(&err->list);
7562
7563         return err;
7564 }
7565
7566 /**
7567  * err_pos - find the position of a string within a command for error careting
7568  * @cmd: The tracing command that caused the error
7569  * @str: The string to position the caret at within @cmd
7570  *
7571  * Finds the position of the first occurrence of @str within @cmd.  The
7572  * return value can be passed to tracing_log_err() for caret placement
7573  * within @cmd.
7574  *
7575  * Returns the index within @cmd of the first occurrence of @str or 0
7576  * if @str was not found.
7577  */
7578 unsigned int err_pos(char *cmd, const char *str)
7579 {
7580         char *found;
7581
7582         if (WARN_ON(!strlen(cmd)))
7583                 return 0;
7584
7585         found = strstr(cmd, str);
7586         if (found)
7587                 return found - cmd;
7588
7589         return 0;
7590 }
7591
7592 /**
7593  * tracing_log_err - write an error to the tracing error log
7594  * @tr: The associated trace array for the error (NULL for top level array)
7595  * @loc: A string describing where the error occurred
7596  * @cmd: The tracing command that caused the error
7597  * @errs: The array of loc-specific static error strings
7598  * @type: The index into errs[], which produces the specific static err string
7599  * @pos: The position the caret should be placed in the cmd
7600  *
7601  * Writes an error into tracing/error_log of the form:
7602  *
7603  * <loc>: error: <text>
7604  *   Command: <cmd>
7605  *              ^
7606  *
7607  * tracing/error_log is a small log file containing the last
7608  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7609  * unless there has been a tracing error, and the error log can be
7610  * cleared and have its memory freed by writing the empty string in
7611  * truncation mode to it i.e. echo > tracing/error_log.
7612  *
7613  * NOTE: the @errs array along with the @type param are used to
7614  * produce a static error string - this string is not copied and saved
7615  * when the error is logged - only a pointer to it is saved.  See
7616  * existing callers for examples of how static strings are typically
7617  * defined for use with tracing_log_err().
7618  */
7619 void tracing_log_err(struct trace_array *tr,
7620                      const char *loc, const char *cmd,
7621                      const char **errs, u8 type, u8 pos)
7622 {
7623         struct tracing_log_err *err;
7624
7625         if (!tr)
7626                 tr = &global_trace;
7627
7628         mutex_lock(&tracing_err_log_lock);
7629         err = get_tracing_log_err(tr);
7630         if (PTR_ERR(err) == -ENOMEM) {
7631                 mutex_unlock(&tracing_err_log_lock);
7632                 return;
7633         }
7634
7635         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7636         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7637
7638         err->info.errs = errs;
7639         err->info.type = type;
7640         err->info.pos = pos;
7641         err->info.ts = local_clock();
7642
7643         list_add_tail(&err->list, &tr->err_log);
7644         mutex_unlock(&tracing_err_log_lock);
7645 }
7646
7647 static void clear_tracing_err_log(struct trace_array *tr)
7648 {
7649         struct tracing_log_err *err, *next;
7650
7651         mutex_lock(&tracing_err_log_lock);
7652         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7653                 list_del(&err->list);
7654                 kfree(err);
7655         }
7656
7657         tr->n_err_log_entries = 0;
7658         mutex_unlock(&tracing_err_log_lock);
7659 }
7660
7661 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7662 {
7663         struct trace_array *tr = m->private;
7664
7665         mutex_lock(&tracing_err_log_lock);
7666
7667         return seq_list_start(&tr->err_log, *pos);
7668 }
7669
7670 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7671 {
7672         struct trace_array *tr = m->private;
7673
7674         return seq_list_next(v, &tr->err_log, pos);
7675 }
7676
7677 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7678 {
7679         mutex_unlock(&tracing_err_log_lock);
7680 }
7681
7682 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7683 {
7684         u8 i;
7685
7686         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7687                 seq_putc(m, ' ');
7688         for (i = 0; i < pos; i++)
7689                 seq_putc(m, ' ');
7690         seq_puts(m, "^\n");
7691 }
7692
7693 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7694 {
7695         struct tracing_log_err *err = v;
7696
7697         if (err) {
7698                 const char *err_text = err->info.errs[err->info.type];
7699                 u64 sec = err->info.ts;
7700                 u32 nsec;
7701
7702                 nsec = do_div(sec, NSEC_PER_SEC);
7703                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7704                            err->loc, err_text);
7705                 seq_printf(m, "%s", err->cmd);
7706                 tracing_err_log_show_pos(m, err->info.pos);
7707         }
7708
7709         return 0;
7710 }
7711
7712 static const struct seq_operations tracing_err_log_seq_ops = {
7713         .start  = tracing_err_log_seq_start,
7714         .next   = tracing_err_log_seq_next,
7715         .stop   = tracing_err_log_seq_stop,
7716         .show   = tracing_err_log_seq_show
7717 };
7718
7719 static int tracing_err_log_open(struct inode *inode, struct file *file)
7720 {
7721         struct trace_array *tr = inode->i_private;
7722         int ret = 0;
7723
7724         ret = tracing_check_open_get_tr(tr);
7725         if (ret)
7726                 return ret;
7727
7728         /* If this file was opened for write, then erase contents */
7729         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7730                 clear_tracing_err_log(tr);
7731
7732         if (file->f_mode & FMODE_READ) {
7733                 ret = seq_open(file, &tracing_err_log_seq_ops);
7734                 if (!ret) {
7735                         struct seq_file *m = file->private_data;
7736                         m->private = tr;
7737                 } else {
7738                         trace_array_put(tr);
7739                 }
7740         }
7741         return ret;
7742 }
7743
7744 static ssize_t tracing_err_log_write(struct file *file,
7745                                      const char __user *buffer,
7746                                      size_t count, loff_t *ppos)
7747 {
7748         return count;
7749 }
7750
7751 static int tracing_err_log_release(struct inode *inode, struct file *file)
7752 {
7753         struct trace_array *tr = inode->i_private;
7754
7755         trace_array_put(tr);
7756
7757         if (file->f_mode & FMODE_READ)
7758                 seq_release(inode, file);
7759
7760         return 0;
7761 }
7762
7763 static const struct file_operations tracing_err_log_fops = {
7764         .open           = tracing_err_log_open,
7765         .write          = tracing_err_log_write,
7766         .read           = seq_read,
7767         .llseek         = seq_lseek,
7768         .release        = tracing_err_log_release,
7769 };
7770
7771 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7772 {
7773         struct trace_array *tr = inode->i_private;
7774         struct ftrace_buffer_info *info;
7775         int ret;
7776
7777         ret = tracing_check_open_get_tr(tr);
7778         if (ret)
7779                 return ret;
7780
7781         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7782         if (!info) {
7783                 trace_array_put(tr);
7784                 return -ENOMEM;
7785         }
7786
7787         mutex_lock(&trace_types_lock);
7788
7789         info->iter.tr           = tr;
7790         info->iter.cpu_file     = tracing_get_cpu(inode);
7791         info->iter.trace        = tr->current_trace;
7792         info->iter.array_buffer = &tr->array_buffer;
7793         info->spare             = NULL;
7794         /* Force reading ring buffer for first read */
7795         info->read              = (unsigned int)-1;
7796
7797         filp->private_data = info;
7798
7799         tr->trace_ref++;
7800
7801         mutex_unlock(&trace_types_lock);
7802
7803         ret = nonseekable_open(inode, filp);
7804         if (ret < 0)
7805                 trace_array_put(tr);
7806
7807         return ret;
7808 }
7809
7810 static __poll_t
7811 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7812 {
7813         struct ftrace_buffer_info *info = filp->private_data;
7814         struct trace_iterator *iter = &info->iter;
7815
7816         return trace_poll(iter, filp, poll_table);
7817 }
7818
7819 static ssize_t
7820 tracing_buffers_read(struct file *filp, char __user *ubuf,
7821                      size_t count, loff_t *ppos)
7822 {
7823         struct ftrace_buffer_info *info = filp->private_data;
7824         struct trace_iterator *iter = &info->iter;
7825         ssize_t ret = 0;
7826         ssize_t size;
7827
7828         if (!count)
7829                 return 0;
7830
7831 #ifdef CONFIG_TRACER_MAX_TRACE
7832         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7833                 return -EBUSY;
7834 #endif
7835
7836         if (!info->spare) {
7837                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7838                                                           iter->cpu_file);
7839                 if (IS_ERR(info->spare)) {
7840                         ret = PTR_ERR(info->spare);
7841                         info->spare = NULL;
7842                 } else {
7843                         info->spare_cpu = iter->cpu_file;
7844                 }
7845         }
7846         if (!info->spare)
7847                 return ret;
7848
7849         /* Do we have previous read data to read? */
7850         if (info->read < PAGE_SIZE)
7851                 goto read;
7852
7853  again:
7854         trace_access_lock(iter->cpu_file);
7855         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7856                                     &info->spare,
7857                                     count,
7858                                     iter->cpu_file, 0);
7859         trace_access_unlock(iter->cpu_file);
7860
7861         if (ret < 0) {
7862                 if (trace_empty(iter)) {
7863                         if ((filp->f_flags & O_NONBLOCK))
7864                                 return -EAGAIN;
7865
7866                         ret = wait_on_pipe(iter, 0);
7867                         if (ret)
7868                                 return ret;
7869
7870                         goto again;
7871                 }
7872                 return 0;
7873         }
7874
7875         info->read = 0;
7876  read:
7877         size = PAGE_SIZE - info->read;
7878         if (size > count)
7879                 size = count;
7880
7881         ret = copy_to_user(ubuf, info->spare + info->read, size);
7882         if (ret == size)
7883                 return -EFAULT;
7884
7885         size -= ret;
7886
7887         *ppos += size;
7888         info->read += size;
7889
7890         return size;
7891 }
7892
7893 static int tracing_buffers_release(struct inode *inode, struct file *file)
7894 {
7895         struct ftrace_buffer_info *info = file->private_data;
7896         struct trace_iterator *iter = &info->iter;
7897
7898         mutex_lock(&trace_types_lock);
7899
7900         iter->tr->trace_ref--;
7901
7902         __trace_array_put(iter->tr);
7903
7904         if (info->spare)
7905                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7906                                            info->spare_cpu, info->spare);
7907         kvfree(info);
7908
7909         mutex_unlock(&trace_types_lock);
7910
7911         return 0;
7912 }
7913
7914 struct buffer_ref {
7915         struct trace_buffer     *buffer;
7916         void                    *page;
7917         int                     cpu;
7918         refcount_t              refcount;
7919 };
7920
7921 static void buffer_ref_release(struct buffer_ref *ref)
7922 {
7923         if (!refcount_dec_and_test(&ref->refcount))
7924                 return;
7925         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7926         kfree(ref);
7927 }
7928
7929 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7930                                     struct pipe_buffer *buf)
7931 {
7932         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7933
7934         buffer_ref_release(ref);
7935         buf->private = 0;
7936 }
7937
7938 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7939                                 struct pipe_buffer *buf)
7940 {
7941         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7942
7943         if (refcount_read(&ref->refcount) > INT_MAX/2)
7944                 return false;
7945
7946         refcount_inc(&ref->refcount);
7947         return true;
7948 }
7949
7950 /* Pipe buffer operations for a buffer. */
7951 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7952         .release                = buffer_pipe_buf_release,
7953         .get                    = buffer_pipe_buf_get,
7954 };
7955
7956 /*
7957  * Callback from splice_to_pipe(), if we need to release some pages
7958  * at the end of the spd in case we error'ed out in filling the pipe.
7959  */
7960 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7961 {
7962         struct buffer_ref *ref =
7963                 (struct buffer_ref *)spd->partial[i].private;
7964
7965         buffer_ref_release(ref);
7966         spd->partial[i].private = 0;
7967 }
7968
7969 static ssize_t
7970 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7971                             struct pipe_inode_info *pipe, size_t len,
7972                             unsigned int flags)
7973 {
7974         struct ftrace_buffer_info *info = file->private_data;
7975         struct trace_iterator *iter = &info->iter;
7976         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7977         struct page *pages_def[PIPE_DEF_BUFFERS];
7978         struct splice_pipe_desc spd = {
7979                 .pages          = pages_def,
7980                 .partial        = partial_def,
7981                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7982                 .ops            = &buffer_pipe_buf_ops,
7983                 .spd_release    = buffer_spd_release,
7984         };
7985         struct buffer_ref *ref;
7986         int entries, i;
7987         ssize_t ret = 0;
7988
7989 #ifdef CONFIG_TRACER_MAX_TRACE
7990         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7991                 return -EBUSY;
7992 #endif
7993
7994         if (*ppos & (PAGE_SIZE - 1))
7995                 return -EINVAL;
7996
7997         if (len & (PAGE_SIZE - 1)) {
7998                 if (len < PAGE_SIZE)
7999                         return -EINVAL;
8000                 len &= PAGE_MASK;
8001         }
8002
8003         if (splice_grow_spd(pipe, &spd))
8004                 return -ENOMEM;
8005
8006  again:
8007         trace_access_lock(iter->cpu_file);
8008         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8009
8010         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8011                 struct page *page;
8012                 int r;
8013
8014                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8015                 if (!ref) {
8016                         ret = -ENOMEM;
8017                         break;
8018                 }
8019
8020                 refcount_set(&ref->refcount, 1);
8021                 ref->buffer = iter->array_buffer->buffer;
8022                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8023                 if (IS_ERR(ref->page)) {
8024                         ret = PTR_ERR(ref->page);
8025                         ref->page = NULL;
8026                         kfree(ref);
8027                         break;
8028                 }
8029                 ref->cpu = iter->cpu_file;
8030
8031                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8032                                           len, iter->cpu_file, 1);
8033                 if (r < 0) {
8034                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8035                                                    ref->page);
8036                         kfree(ref);
8037                         break;
8038                 }
8039
8040                 page = virt_to_page(ref->page);
8041
8042                 spd.pages[i] = page;
8043                 spd.partial[i].len = PAGE_SIZE;
8044                 spd.partial[i].offset = 0;
8045                 spd.partial[i].private = (unsigned long)ref;
8046                 spd.nr_pages++;
8047                 *ppos += PAGE_SIZE;
8048
8049                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8050         }
8051
8052         trace_access_unlock(iter->cpu_file);
8053         spd.nr_pages = i;
8054
8055         /* did we read anything? */
8056         if (!spd.nr_pages) {
8057                 if (ret)
8058                         goto out;
8059
8060                 ret = -EAGAIN;
8061                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8062                         goto out;
8063
8064                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8065                 if (ret)
8066                         goto out;
8067
8068                 goto again;
8069         }
8070
8071         ret = splice_to_pipe(pipe, &spd);
8072 out:
8073         splice_shrink_spd(&spd);
8074
8075         return ret;
8076 }
8077
8078 static const struct file_operations tracing_buffers_fops = {
8079         .open           = tracing_buffers_open,
8080         .read           = tracing_buffers_read,
8081         .poll           = tracing_buffers_poll,
8082         .release        = tracing_buffers_release,
8083         .splice_read    = tracing_buffers_splice_read,
8084         .llseek         = no_llseek,
8085 };
8086
8087 static ssize_t
8088 tracing_stats_read(struct file *filp, char __user *ubuf,
8089                    size_t count, loff_t *ppos)
8090 {
8091         struct inode *inode = file_inode(filp);
8092         struct trace_array *tr = inode->i_private;
8093         struct array_buffer *trace_buf = &tr->array_buffer;
8094         int cpu = tracing_get_cpu(inode);
8095         struct trace_seq *s;
8096         unsigned long cnt;
8097         unsigned long long t;
8098         unsigned long usec_rem;
8099
8100         s = kmalloc(sizeof(*s), GFP_KERNEL);
8101         if (!s)
8102                 return -ENOMEM;
8103
8104         trace_seq_init(s);
8105
8106         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8107         trace_seq_printf(s, "entries: %ld\n", cnt);
8108
8109         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8110         trace_seq_printf(s, "overrun: %ld\n", cnt);
8111
8112         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8113         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8114
8115         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8116         trace_seq_printf(s, "bytes: %ld\n", cnt);
8117
8118         if (trace_clocks[tr->clock_id].in_ns) {
8119                 /* local or global for trace_clock */
8120                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8121                 usec_rem = do_div(t, USEC_PER_SEC);
8122                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8123                                                                 t, usec_rem);
8124
8125                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8126                 usec_rem = do_div(t, USEC_PER_SEC);
8127                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8128         } else {
8129                 /* counter or tsc mode for trace_clock */
8130                 trace_seq_printf(s, "oldest event ts: %llu\n",
8131                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8132
8133                 trace_seq_printf(s, "now ts: %llu\n",
8134                                 ring_buffer_time_stamp(trace_buf->buffer));
8135         }
8136
8137         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8138         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8139
8140         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8141         trace_seq_printf(s, "read events: %ld\n", cnt);
8142
8143         count = simple_read_from_buffer(ubuf, count, ppos,
8144                                         s->buffer, trace_seq_used(s));
8145
8146         kfree(s);
8147
8148         return count;
8149 }
8150
8151 static const struct file_operations tracing_stats_fops = {
8152         .open           = tracing_open_generic_tr,
8153         .read           = tracing_stats_read,
8154         .llseek         = generic_file_llseek,
8155         .release        = tracing_release_generic_tr,
8156 };
8157
8158 #ifdef CONFIG_DYNAMIC_FTRACE
8159
8160 static ssize_t
8161 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8162                   size_t cnt, loff_t *ppos)
8163 {
8164         ssize_t ret;
8165         char *buf;
8166         int r;
8167
8168         /* 256 should be plenty to hold the amount needed */
8169         buf = kmalloc(256, GFP_KERNEL);
8170         if (!buf)
8171                 return -ENOMEM;
8172
8173         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8174                       ftrace_update_tot_cnt,
8175                       ftrace_number_of_pages,
8176                       ftrace_number_of_groups);
8177
8178         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8179         kfree(buf);
8180         return ret;
8181 }
8182
8183 static const struct file_operations tracing_dyn_info_fops = {
8184         .open           = tracing_open_generic,
8185         .read           = tracing_read_dyn_info,
8186         .llseek         = generic_file_llseek,
8187 };
8188 #endif /* CONFIG_DYNAMIC_FTRACE */
8189
8190 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8191 static void
8192 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8193                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8194                 void *data)
8195 {
8196         tracing_snapshot_instance(tr);
8197 }
8198
8199 static void
8200 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8201                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8202                       void *data)
8203 {
8204         struct ftrace_func_mapper *mapper = data;
8205         long *count = NULL;
8206
8207         if (mapper)
8208                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8209
8210         if (count) {
8211
8212                 if (*count <= 0)
8213                         return;
8214
8215                 (*count)--;
8216         }
8217
8218         tracing_snapshot_instance(tr);
8219 }
8220
8221 static int
8222 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8223                       struct ftrace_probe_ops *ops, void *data)
8224 {
8225         struct ftrace_func_mapper *mapper = data;
8226         long *count = NULL;
8227
8228         seq_printf(m, "%ps:", (void *)ip);
8229
8230         seq_puts(m, "snapshot");
8231
8232         if (mapper)
8233                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8234
8235         if (count)
8236                 seq_printf(m, ":count=%ld\n", *count);
8237         else
8238                 seq_puts(m, ":unlimited\n");
8239
8240         return 0;
8241 }
8242
8243 static int
8244 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8245                      unsigned long ip, void *init_data, void **data)
8246 {
8247         struct ftrace_func_mapper *mapper = *data;
8248
8249         if (!mapper) {
8250                 mapper = allocate_ftrace_func_mapper();
8251                 if (!mapper)
8252                         return -ENOMEM;
8253                 *data = mapper;
8254         }
8255
8256         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8257 }
8258
8259 static void
8260 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8261                      unsigned long ip, void *data)
8262 {
8263         struct ftrace_func_mapper *mapper = data;
8264
8265         if (!ip) {
8266                 if (!mapper)
8267                         return;
8268                 free_ftrace_func_mapper(mapper, NULL);
8269                 return;
8270         }
8271
8272         ftrace_func_mapper_remove_ip(mapper, ip);
8273 }
8274
8275 static struct ftrace_probe_ops snapshot_probe_ops = {
8276         .func                   = ftrace_snapshot,
8277         .print                  = ftrace_snapshot_print,
8278 };
8279
8280 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8281         .func                   = ftrace_count_snapshot,
8282         .print                  = ftrace_snapshot_print,
8283         .init                   = ftrace_snapshot_init,
8284         .free                   = ftrace_snapshot_free,
8285 };
8286
8287 static int
8288 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8289                                char *glob, char *cmd, char *param, int enable)
8290 {
8291         struct ftrace_probe_ops *ops;
8292         void *count = (void *)-1;
8293         char *number;
8294         int ret;
8295
8296         if (!tr)
8297                 return -ENODEV;
8298
8299         /* hash funcs only work with set_ftrace_filter */
8300         if (!enable)
8301                 return -EINVAL;
8302
8303         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8304
8305         if (glob[0] == '!')
8306                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8307
8308         if (!param)
8309                 goto out_reg;
8310
8311         number = strsep(&param, ":");
8312
8313         if (!strlen(number))
8314                 goto out_reg;
8315
8316         /*
8317          * We use the callback data field (which is a pointer)
8318          * as our counter.
8319          */
8320         ret = kstrtoul(number, 0, (unsigned long *)&count);
8321         if (ret)
8322                 return ret;
8323
8324  out_reg:
8325         ret = tracing_alloc_snapshot_instance(tr);
8326         if (ret < 0)
8327                 goto out;
8328
8329         ret = register_ftrace_function_probe(glob, tr, ops, count);
8330
8331  out:
8332         return ret < 0 ? ret : 0;
8333 }
8334
8335 static struct ftrace_func_command ftrace_snapshot_cmd = {
8336         .name                   = "snapshot",
8337         .func                   = ftrace_trace_snapshot_callback,
8338 };
8339
8340 static __init int register_snapshot_cmd(void)
8341 {
8342         return register_ftrace_command(&ftrace_snapshot_cmd);
8343 }
8344 #else
8345 static inline __init int register_snapshot_cmd(void) { return 0; }
8346 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8347
8348 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8349 {
8350         if (WARN_ON(!tr->dir))
8351                 return ERR_PTR(-ENODEV);
8352
8353         /* Top directory uses NULL as the parent */
8354         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8355                 return NULL;
8356
8357         /* All sub buffers have a descriptor */
8358         return tr->dir;
8359 }
8360
8361 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8362 {
8363         struct dentry *d_tracer;
8364
8365         if (tr->percpu_dir)
8366                 return tr->percpu_dir;
8367
8368         d_tracer = tracing_get_dentry(tr);
8369         if (IS_ERR(d_tracer))
8370                 return NULL;
8371
8372         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8373
8374         MEM_FAIL(!tr->percpu_dir,
8375                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8376
8377         return tr->percpu_dir;
8378 }
8379
8380 static struct dentry *
8381 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8382                       void *data, long cpu, const struct file_operations *fops)
8383 {
8384         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8385
8386         if (ret) /* See tracing_get_cpu() */
8387                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8388         return ret;
8389 }
8390
8391 static void
8392 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8393 {
8394         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8395         struct dentry *d_cpu;
8396         char cpu_dir[30]; /* 30 characters should be more than enough */
8397
8398         if (!d_percpu)
8399                 return;
8400
8401         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8402         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8403         if (!d_cpu) {
8404                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8405                 return;
8406         }
8407
8408         /* per cpu trace_pipe */
8409         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8410                                 tr, cpu, &tracing_pipe_fops);
8411
8412         /* per cpu trace */
8413         trace_create_cpu_file("trace", 0644, d_cpu,
8414                                 tr, cpu, &tracing_fops);
8415
8416         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8417                                 tr, cpu, &tracing_buffers_fops);
8418
8419         trace_create_cpu_file("stats", 0444, d_cpu,
8420                                 tr, cpu, &tracing_stats_fops);
8421
8422         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8423                                 tr, cpu, &tracing_entries_fops);
8424
8425 #ifdef CONFIG_TRACER_SNAPSHOT
8426         trace_create_cpu_file("snapshot", 0644, d_cpu,
8427                                 tr, cpu, &snapshot_fops);
8428
8429         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8430                                 tr, cpu, &snapshot_raw_fops);
8431 #endif
8432 }
8433
8434 #ifdef CONFIG_FTRACE_SELFTEST
8435 /* Let selftest have access to static functions in this file */
8436 #include "trace_selftest.c"
8437 #endif
8438
8439 static ssize_t
8440 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8441                         loff_t *ppos)
8442 {
8443         struct trace_option_dentry *topt = filp->private_data;
8444         char *buf;
8445
8446         if (topt->flags->val & topt->opt->bit)
8447                 buf = "1\n";
8448         else
8449                 buf = "0\n";
8450
8451         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8452 }
8453
8454 static ssize_t
8455 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8456                          loff_t *ppos)
8457 {
8458         struct trace_option_dentry *topt = filp->private_data;
8459         unsigned long val;
8460         int ret;
8461
8462         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8463         if (ret)
8464                 return ret;
8465
8466         if (val != 0 && val != 1)
8467                 return -EINVAL;
8468
8469         if (!!(topt->flags->val & topt->opt->bit) != val) {
8470                 mutex_lock(&trace_types_lock);
8471                 ret = __set_tracer_option(topt->tr, topt->flags,
8472                                           topt->opt, !val);
8473                 mutex_unlock(&trace_types_lock);
8474                 if (ret)
8475                         return ret;
8476         }
8477
8478         *ppos += cnt;
8479
8480         return cnt;
8481 }
8482
8483
8484 static const struct file_operations trace_options_fops = {
8485         .open = tracing_open_generic,
8486         .read = trace_options_read,
8487         .write = trace_options_write,
8488         .llseek = generic_file_llseek,
8489 };
8490
8491 /*
8492  * In order to pass in both the trace_array descriptor as well as the index
8493  * to the flag that the trace option file represents, the trace_array
8494  * has a character array of trace_flags_index[], which holds the index
8495  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8496  * The address of this character array is passed to the flag option file
8497  * read/write callbacks.
8498  *
8499  * In order to extract both the index and the trace_array descriptor,
8500  * get_tr_index() uses the following algorithm.
8501  *
8502  *   idx = *ptr;
8503  *
8504  * As the pointer itself contains the address of the index (remember
8505  * index[1] == 1).
8506  *
8507  * Then to get the trace_array descriptor, by subtracting that index
8508  * from the ptr, we get to the start of the index itself.
8509  *
8510  *   ptr - idx == &index[0]
8511  *
8512  * Then a simple container_of() from that pointer gets us to the
8513  * trace_array descriptor.
8514  */
8515 static void get_tr_index(void *data, struct trace_array **ptr,
8516                          unsigned int *pindex)
8517 {
8518         *pindex = *(unsigned char *)data;
8519
8520         *ptr = container_of(data - *pindex, struct trace_array,
8521                             trace_flags_index);
8522 }
8523
8524 static ssize_t
8525 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8526                         loff_t *ppos)
8527 {
8528         void *tr_index = filp->private_data;
8529         struct trace_array *tr;
8530         unsigned int index;
8531         char *buf;
8532
8533         get_tr_index(tr_index, &tr, &index);
8534
8535         if (tr->trace_flags & (1 << index))
8536                 buf = "1\n";
8537         else
8538                 buf = "0\n";
8539
8540         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8541 }
8542
8543 static ssize_t
8544 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8545                          loff_t *ppos)
8546 {
8547         void *tr_index = filp->private_data;
8548         struct trace_array *tr;
8549         unsigned int index;
8550         unsigned long val;
8551         int ret;
8552
8553         get_tr_index(tr_index, &tr, &index);
8554
8555         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8556         if (ret)
8557                 return ret;
8558
8559         if (val != 0 && val != 1)
8560                 return -EINVAL;
8561
8562         mutex_lock(&event_mutex);
8563         mutex_lock(&trace_types_lock);
8564         ret = set_tracer_flag(tr, 1 << index, val);
8565         mutex_unlock(&trace_types_lock);
8566         mutex_unlock(&event_mutex);
8567
8568         if (ret < 0)
8569                 return ret;
8570
8571         *ppos += cnt;
8572
8573         return cnt;
8574 }
8575
8576 static const struct file_operations trace_options_core_fops = {
8577         .open = tracing_open_generic,
8578         .read = trace_options_core_read,
8579         .write = trace_options_core_write,
8580         .llseek = generic_file_llseek,
8581 };
8582
8583 struct dentry *trace_create_file(const char *name,
8584                                  umode_t mode,
8585                                  struct dentry *parent,
8586                                  void *data,
8587                                  const struct file_operations *fops)
8588 {
8589         struct dentry *ret;
8590
8591         ret = tracefs_create_file(name, mode, parent, data, fops);
8592         if (!ret)
8593                 pr_warn("Could not create tracefs '%s' entry\n", name);
8594
8595         return ret;
8596 }
8597
8598
8599 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8600 {
8601         struct dentry *d_tracer;
8602
8603         if (tr->options)
8604                 return tr->options;
8605
8606         d_tracer = tracing_get_dentry(tr);
8607         if (IS_ERR(d_tracer))
8608                 return NULL;
8609
8610         tr->options = tracefs_create_dir("options", d_tracer);
8611         if (!tr->options) {
8612                 pr_warn("Could not create tracefs directory 'options'\n");
8613                 return NULL;
8614         }
8615
8616         return tr->options;
8617 }
8618
8619 static void
8620 create_trace_option_file(struct trace_array *tr,
8621                          struct trace_option_dentry *topt,
8622                          struct tracer_flags *flags,
8623                          struct tracer_opt *opt)
8624 {
8625         struct dentry *t_options;
8626
8627         t_options = trace_options_init_dentry(tr);
8628         if (!t_options)
8629                 return;
8630
8631         topt->flags = flags;
8632         topt->opt = opt;
8633         topt->tr = tr;
8634
8635         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8636                                     &trace_options_fops);
8637
8638 }
8639
8640 static void
8641 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8642 {
8643         struct trace_option_dentry *topts;
8644         struct trace_options *tr_topts;
8645         struct tracer_flags *flags;
8646         struct tracer_opt *opts;
8647         int cnt;
8648         int i;
8649
8650         if (!tracer)
8651                 return;
8652
8653         flags = tracer->flags;
8654
8655         if (!flags || !flags->opts)
8656                 return;
8657
8658         /*
8659          * If this is an instance, only create flags for tracers
8660          * the instance may have.
8661          */
8662         if (!trace_ok_for_array(tracer, tr))
8663                 return;
8664
8665         for (i = 0; i < tr->nr_topts; i++) {
8666                 /* Make sure there's no duplicate flags. */
8667                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8668                         return;
8669         }
8670
8671         opts = flags->opts;
8672
8673         for (cnt = 0; opts[cnt].name; cnt++)
8674                 ;
8675
8676         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8677         if (!topts)
8678                 return;
8679
8680         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8681                             GFP_KERNEL);
8682         if (!tr_topts) {
8683                 kfree(topts);
8684                 return;
8685         }
8686
8687         tr->topts = tr_topts;
8688         tr->topts[tr->nr_topts].tracer = tracer;
8689         tr->topts[tr->nr_topts].topts = topts;
8690         tr->nr_topts++;
8691
8692         for (cnt = 0; opts[cnt].name; cnt++) {
8693                 create_trace_option_file(tr, &topts[cnt], flags,
8694                                          &opts[cnt]);
8695                 MEM_FAIL(topts[cnt].entry == NULL,
8696                           "Failed to create trace option: %s",
8697                           opts[cnt].name);
8698         }
8699 }
8700
8701 static struct dentry *
8702 create_trace_option_core_file(struct trace_array *tr,
8703                               const char *option, long index)
8704 {
8705         struct dentry *t_options;
8706
8707         t_options = trace_options_init_dentry(tr);
8708         if (!t_options)
8709                 return NULL;
8710
8711         return trace_create_file(option, 0644, t_options,
8712                                  (void *)&tr->trace_flags_index[index],
8713                                  &trace_options_core_fops);
8714 }
8715
8716 static void create_trace_options_dir(struct trace_array *tr)
8717 {
8718         struct dentry *t_options;
8719         bool top_level = tr == &global_trace;
8720         int i;
8721
8722         t_options = trace_options_init_dentry(tr);
8723         if (!t_options)
8724                 return;
8725
8726         for (i = 0; trace_options[i]; i++) {
8727                 if (top_level ||
8728                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8729                         create_trace_option_core_file(tr, trace_options[i], i);
8730         }
8731 }
8732
8733 static ssize_t
8734 rb_simple_read(struct file *filp, char __user *ubuf,
8735                size_t cnt, loff_t *ppos)
8736 {
8737         struct trace_array *tr = filp->private_data;
8738         char buf[64];
8739         int r;
8740
8741         r = tracer_tracing_is_on(tr);
8742         r = sprintf(buf, "%d\n", r);
8743
8744         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8745 }
8746
8747 static ssize_t
8748 rb_simple_write(struct file *filp, const char __user *ubuf,
8749                 size_t cnt, loff_t *ppos)
8750 {
8751         struct trace_array *tr = filp->private_data;
8752         struct trace_buffer *buffer = tr->array_buffer.buffer;
8753         unsigned long val;
8754         int ret;
8755
8756         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8757         if (ret)
8758                 return ret;
8759
8760         if (buffer) {
8761                 mutex_lock(&trace_types_lock);
8762                 if (!!val == tracer_tracing_is_on(tr)) {
8763                         val = 0; /* do nothing */
8764                 } else if (val) {
8765                         tracer_tracing_on(tr);
8766                         if (tr->current_trace->start)
8767                                 tr->current_trace->start(tr);
8768                 } else {
8769                         tracer_tracing_off(tr);
8770                         if (tr->current_trace->stop)
8771                                 tr->current_trace->stop(tr);
8772                 }
8773                 mutex_unlock(&trace_types_lock);
8774         }
8775
8776         (*ppos)++;
8777
8778         return cnt;
8779 }
8780
8781 static const struct file_operations rb_simple_fops = {
8782         .open           = tracing_open_generic_tr,
8783         .read           = rb_simple_read,
8784         .write          = rb_simple_write,
8785         .release        = tracing_release_generic_tr,
8786         .llseek         = default_llseek,
8787 };
8788
8789 static ssize_t
8790 buffer_percent_read(struct file *filp, char __user *ubuf,
8791                     size_t cnt, loff_t *ppos)
8792 {
8793         struct trace_array *tr = filp->private_data;
8794         char buf[64];
8795         int r;
8796
8797         r = tr->buffer_percent;
8798         r = sprintf(buf, "%d\n", r);
8799
8800         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8801 }
8802
8803 static ssize_t
8804 buffer_percent_write(struct file *filp, const char __user *ubuf,
8805                      size_t cnt, loff_t *ppos)
8806 {
8807         struct trace_array *tr = filp->private_data;
8808         unsigned long val;
8809         int ret;
8810
8811         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8812         if (ret)
8813                 return ret;
8814
8815         if (val > 100)
8816                 return -EINVAL;
8817
8818         if (!val)
8819                 val = 1;
8820
8821         tr->buffer_percent = val;
8822
8823         (*ppos)++;
8824
8825         return cnt;
8826 }
8827
8828 static const struct file_operations buffer_percent_fops = {
8829         .open           = tracing_open_generic_tr,
8830         .read           = buffer_percent_read,
8831         .write          = buffer_percent_write,
8832         .release        = tracing_release_generic_tr,
8833         .llseek         = default_llseek,
8834 };
8835
8836 static struct dentry *trace_instance_dir;
8837
8838 static void
8839 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8840
8841 static int
8842 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8843 {
8844         enum ring_buffer_flags rb_flags;
8845
8846         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8847
8848         buf->tr = tr;
8849
8850         buf->buffer = ring_buffer_alloc(size, rb_flags);
8851         if (!buf->buffer)
8852                 return -ENOMEM;
8853
8854         buf->data = alloc_percpu(struct trace_array_cpu);
8855         if (!buf->data) {
8856                 ring_buffer_free(buf->buffer);
8857                 buf->buffer = NULL;
8858                 return -ENOMEM;
8859         }
8860
8861         /* Allocate the first page for all buffers */
8862         set_buffer_entries(&tr->array_buffer,
8863                            ring_buffer_size(tr->array_buffer.buffer, 0));
8864
8865         return 0;
8866 }
8867
8868 static int allocate_trace_buffers(struct trace_array *tr, int size)
8869 {
8870         int ret;
8871
8872         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8873         if (ret)
8874                 return ret;
8875
8876 #ifdef CONFIG_TRACER_MAX_TRACE
8877         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8878                                     allocate_snapshot ? size : 1);
8879         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8880                 ring_buffer_free(tr->array_buffer.buffer);
8881                 tr->array_buffer.buffer = NULL;
8882                 free_percpu(tr->array_buffer.data);
8883                 tr->array_buffer.data = NULL;
8884                 return -ENOMEM;
8885         }
8886         tr->allocated_snapshot = allocate_snapshot;
8887
8888         /*
8889          * Only the top level trace array gets its snapshot allocated
8890          * from the kernel command line.
8891          */
8892         allocate_snapshot = false;
8893 #endif
8894
8895         return 0;
8896 }
8897
8898 static void free_trace_buffer(struct array_buffer *buf)
8899 {
8900         if (buf->buffer) {
8901                 ring_buffer_free(buf->buffer);
8902                 buf->buffer = NULL;
8903                 free_percpu(buf->data);
8904                 buf->data = NULL;
8905         }
8906 }
8907
8908 static void free_trace_buffers(struct trace_array *tr)
8909 {
8910         if (!tr)
8911                 return;
8912
8913         free_trace_buffer(&tr->array_buffer);
8914
8915 #ifdef CONFIG_TRACER_MAX_TRACE
8916         free_trace_buffer(&tr->max_buffer);
8917 #endif
8918 }
8919
8920 static void init_trace_flags_index(struct trace_array *tr)
8921 {
8922         int i;
8923
8924         /* Used by the trace options files */
8925         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8926                 tr->trace_flags_index[i] = i;
8927 }
8928
8929 static void __update_tracer_options(struct trace_array *tr)
8930 {
8931         struct tracer *t;
8932
8933         for (t = trace_types; t; t = t->next)
8934                 add_tracer_options(tr, t);
8935 }
8936
8937 static void update_tracer_options(struct trace_array *tr)
8938 {
8939         mutex_lock(&trace_types_lock);
8940         __update_tracer_options(tr);
8941         mutex_unlock(&trace_types_lock);
8942 }
8943
8944 /* Must have trace_types_lock held */
8945 struct trace_array *trace_array_find(const char *instance)
8946 {
8947         struct trace_array *tr, *found = NULL;
8948
8949         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8950                 if (tr->name && strcmp(tr->name, instance) == 0) {
8951                         found = tr;
8952                         break;
8953                 }
8954         }
8955
8956         return found;
8957 }
8958
8959 struct trace_array *trace_array_find_get(const char *instance)
8960 {
8961         struct trace_array *tr;
8962
8963         mutex_lock(&trace_types_lock);
8964         tr = trace_array_find(instance);
8965         if (tr)
8966                 tr->ref++;
8967         mutex_unlock(&trace_types_lock);
8968
8969         return tr;
8970 }
8971
8972 static int trace_array_create_dir(struct trace_array *tr)
8973 {
8974         int ret;
8975
8976         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8977         if (!tr->dir)
8978                 return -EINVAL;
8979
8980         ret = event_trace_add_tracer(tr->dir, tr);
8981         if (ret)
8982                 tracefs_remove(tr->dir);
8983
8984         init_tracer_tracefs(tr, tr->dir);
8985         __update_tracer_options(tr);
8986
8987         return ret;
8988 }
8989
8990 static struct trace_array *trace_array_create(const char *name)
8991 {
8992         struct trace_array *tr;
8993         int ret;
8994
8995         ret = -ENOMEM;
8996         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8997         if (!tr)
8998                 return ERR_PTR(ret);
8999
9000         tr->name = kstrdup(name, GFP_KERNEL);
9001         if (!tr->name)
9002                 goto out_free_tr;
9003
9004         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9005                 goto out_free_tr;
9006
9007         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9008
9009         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9010
9011         raw_spin_lock_init(&tr->start_lock);
9012
9013         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9014
9015         tr->current_trace = &nop_trace;
9016
9017         INIT_LIST_HEAD(&tr->systems);
9018         INIT_LIST_HEAD(&tr->events);
9019         INIT_LIST_HEAD(&tr->hist_vars);
9020         INIT_LIST_HEAD(&tr->err_log);
9021
9022         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9023                 goto out_free_tr;
9024
9025         if (ftrace_allocate_ftrace_ops(tr) < 0)
9026                 goto out_free_tr;
9027
9028         ftrace_init_trace_array(tr);
9029
9030         init_trace_flags_index(tr);
9031
9032         if (trace_instance_dir) {
9033                 ret = trace_array_create_dir(tr);
9034                 if (ret)
9035                         goto out_free_tr;
9036         } else
9037                 __trace_early_add_events(tr);
9038
9039         list_add(&tr->list, &ftrace_trace_arrays);
9040
9041         tr->ref++;
9042
9043         return tr;
9044
9045  out_free_tr:
9046         ftrace_free_ftrace_ops(tr);
9047         free_trace_buffers(tr);
9048         free_cpumask_var(tr->tracing_cpumask);
9049         kfree(tr->name);
9050         kfree(tr);
9051
9052         return ERR_PTR(ret);
9053 }
9054
9055 static int instance_mkdir(const char *name)
9056 {
9057         struct trace_array *tr;
9058         int ret;
9059
9060         mutex_lock(&event_mutex);
9061         mutex_lock(&trace_types_lock);
9062
9063         ret = -EEXIST;
9064         if (trace_array_find(name))
9065                 goto out_unlock;
9066
9067         tr = trace_array_create(name);
9068
9069         ret = PTR_ERR_OR_ZERO(tr);
9070
9071 out_unlock:
9072         mutex_unlock(&trace_types_lock);
9073         mutex_unlock(&event_mutex);
9074         return ret;
9075 }
9076
9077 /**
9078  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9079  * @name: The name of the trace array to be looked up/created.
9080  *
9081  * Returns pointer to trace array with given name.
9082  * NULL, if it cannot be created.
9083  *
9084  * NOTE: This function increments the reference counter associated with the
9085  * trace array returned. This makes sure it cannot be freed while in use.
9086  * Use trace_array_put() once the trace array is no longer needed.
9087  * If the trace_array is to be freed, trace_array_destroy() needs to
9088  * be called after the trace_array_put(), or simply let user space delete
9089  * it from the tracefs instances directory. But until the
9090  * trace_array_put() is called, user space can not delete it.
9091  *
9092  */
9093 struct trace_array *trace_array_get_by_name(const char *name)
9094 {
9095         struct trace_array *tr;
9096
9097         mutex_lock(&event_mutex);
9098         mutex_lock(&trace_types_lock);
9099
9100         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9101                 if (tr->name && strcmp(tr->name, name) == 0)
9102                         goto out_unlock;
9103         }
9104
9105         tr = trace_array_create(name);
9106
9107         if (IS_ERR(tr))
9108                 tr = NULL;
9109 out_unlock:
9110         if (tr)
9111                 tr->ref++;
9112
9113         mutex_unlock(&trace_types_lock);
9114         mutex_unlock(&event_mutex);
9115         return tr;
9116 }
9117 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9118
9119 static int __remove_instance(struct trace_array *tr)
9120 {
9121         int i;
9122
9123         /* Reference counter for a newly created trace array = 1. */
9124         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9125                 return -EBUSY;
9126
9127         list_del(&tr->list);
9128
9129         /* Disable all the flags that were enabled coming in */
9130         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9131                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9132                         set_tracer_flag(tr, 1 << i, 0);
9133         }
9134
9135         tracing_set_nop(tr);
9136         clear_ftrace_function_probes(tr);
9137         event_trace_del_tracer(tr);
9138         ftrace_clear_pids(tr);
9139         ftrace_destroy_function_files(tr);
9140         tracefs_remove(tr->dir);
9141         free_percpu(tr->last_func_repeats);
9142         free_trace_buffers(tr);
9143
9144         for (i = 0; i < tr->nr_topts; i++) {
9145                 kfree(tr->topts[i].topts);
9146         }
9147         kfree(tr->topts);
9148
9149         free_cpumask_var(tr->tracing_cpumask);
9150         kfree(tr->name);
9151         kfree(tr);
9152
9153         return 0;
9154 }
9155
9156 int trace_array_destroy(struct trace_array *this_tr)
9157 {
9158         struct trace_array *tr;
9159         int ret;
9160
9161         if (!this_tr)
9162                 return -EINVAL;
9163
9164         mutex_lock(&event_mutex);
9165         mutex_lock(&trace_types_lock);
9166
9167         ret = -ENODEV;
9168
9169         /* Making sure trace array exists before destroying it. */
9170         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9171                 if (tr == this_tr) {
9172                         ret = __remove_instance(tr);
9173                         break;
9174                 }
9175         }
9176
9177         mutex_unlock(&trace_types_lock);
9178         mutex_unlock(&event_mutex);
9179
9180         return ret;
9181 }
9182 EXPORT_SYMBOL_GPL(trace_array_destroy);
9183
9184 static int instance_rmdir(const char *name)
9185 {
9186         struct trace_array *tr;
9187         int ret;
9188
9189         mutex_lock(&event_mutex);
9190         mutex_lock(&trace_types_lock);
9191
9192         ret = -ENODEV;
9193         tr = trace_array_find(name);
9194         if (tr)
9195                 ret = __remove_instance(tr);
9196
9197         mutex_unlock(&trace_types_lock);
9198         mutex_unlock(&event_mutex);
9199
9200         return ret;
9201 }
9202
9203 static __init void create_trace_instances(struct dentry *d_tracer)
9204 {
9205         struct trace_array *tr;
9206
9207         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9208                                                          instance_mkdir,
9209                                                          instance_rmdir);
9210         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9211                 return;
9212
9213         mutex_lock(&event_mutex);
9214         mutex_lock(&trace_types_lock);
9215
9216         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9217                 if (!tr->name)
9218                         continue;
9219                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9220                              "Failed to create instance directory\n"))
9221                         break;
9222         }
9223
9224         mutex_unlock(&trace_types_lock);
9225         mutex_unlock(&event_mutex);
9226 }
9227
9228 static void
9229 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9230 {
9231         struct trace_event_file *file;
9232         int cpu;
9233
9234         trace_create_file("available_tracers", 0444, d_tracer,
9235                         tr, &show_traces_fops);
9236
9237         trace_create_file("current_tracer", 0644, d_tracer,
9238                         tr, &set_tracer_fops);
9239
9240         trace_create_file("tracing_cpumask", 0644, d_tracer,
9241                           tr, &tracing_cpumask_fops);
9242
9243         trace_create_file("trace_options", 0644, d_tracer,
9244                           tr, &tracing_iter_fops);
9245
9246         trace_create_file("trace", 0644, d_tracer,
9247                           tr, &tracing_fops);
9248
9249         trace_create_file("trace_pipe", 0444, d_tracer,
9250                           tr, &tracing_pipe_fops);
9251
9252         trace_create_file("buffer_size_kb", 0644, d_tracer,
9253                           tr, &tracing_entries_fops);
9254
9255         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9256                           tr, &tracing_total_entries_fops);
9257
9258         trace_create_file("free_buffer", 0200, d_tracer,
9259                           tr, &tracing_free_buffer_fops);
9260
9261         trace_create_file("trace_marker", 0220, d_tracer,
9262                           tr, &tracing_mark_fops);
9263
9264         file = __find_event_file(tr, "ftrace", "print");
9265         if (file && file->dir)
9266                 trace_create_file("trigger", 0644, file->dir, file,
9267                                   &event_trigger_fops);
9268         tr->trace_marker_file = file;
9269
9270         trace_create_file("trace_marker_raw", 0220, d_tracer,
9271                           tr, &tracing_mark_raw_fops);
9272
9273         trace_create_file("trace_clock", 0644, d_tracer, tr,
9274                           &trace_clock_fops);
9275
9276         trace_create_file("tracing_on", 0644, d_tracer,
9277                           tr, &rb_simple_fops);
9278
9279         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9280                           &trace_time_stamp_mode_fops);
9281
9282         tr->buffer_percent = 50;
9283
9284         trace_create_file("buffer_percent", 0444, d_tracer,
9285                         tr, &buffer_percent_fops);
9286
9287         create_trace_options_dir(tr);
9288
9289 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9290         trace_create_maxlat_file(tr, d_tracer);
9291 #endif
9292
9293         if (ftrace_create_function_files(tr, d_tracer))
9294                 MEM_FAIL(1, "Could not allocate function filter files");
9295
9296 #ifdef CONFIG_TRACER_SNAPSHOT
9297         trace_create_file("snapshot", 0644, d_tracer,
9298                           tr, &snapshot_fops);
9299 #endif
9300
9301         trace_create_file("error_log", 0644, d_tracer,
9302                           tr, &tracing_err_log_fops);
9303
9304         for_each_tracing_cpu(cpu)
9305                 tracing_init_tracefs_percpu(tr, cpu);
9306
9307         ftrace_init_tracefs(tr, d_tracer);
9308 }
9309
9310 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9311 {
9312         struct vfsmount *mnt;
9313         struct file_system_type *type;
9314
9315         /*
9316          * To maintain backward compatibility for tools that mount
9317          * debugfs to get to the tracing facility, tracefs is automatically
9318          * mounted to the debugfs/tracing directory.
9319          */
9320         type = get_fs_type("tracefs");
9321         if (!type)
9322                 return NULL;
9323         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9324         put_filesystem(type);
9325         if (IS_ERR(mnt))
9326                 return NULL;
9327         mntget(mnt);
9328
9329         return mnt;
9330 }
9331
9332 /**
9333  * tracing_init_dentry - initialize top level trace array
9334  *
9335  * This is called when creating files or directories in the tracing
9336  * directory. It is called via fs_initcall() by any of the boot up code
9337  * and expects to return the dentry of the top level tracing directory.
9338  */
9339 int tracing_init_dentry(void)
9340 {
9341         struct trace_array *tr = &global_trace;
9342
9343         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9344                 pr_warn("Tracing disabled due to lockdown\n");
9345                 return -EPERM;
9346         }
9347
9348         /* The top level trace array uses  NULL as parent */
9349         if (tr->dir)
9350                 return 0;
9351
9352         if (WARN_ON(!tracefs_initialized()))
9353                 return -ENODEV;
9354
9355         /*
9356          * As there may still be users that expect the tracing
9357          * files to exist in debugfs/tracing, we must automount
9358          * the tracefs file system there, so older tools still
9359          * work with the newer kernel.
9360          */
9361         tr->dir = debugfs_create_automount("tracing", NULL,
9362                                            trace_automount, NULL);
9363
9364         return 0;
9365 }
9366
9367 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9368 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9369
9370 static struct workqueue_struct *eval_map_wq __initdata;
9371 static struct work_struct eval_map_work __initdata;
9372
9373 static void __init eval_map_work_func(struct work_struct *work)
9374 {
9375         int len;
9376
9377         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9378         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9379 }
9380
9381 static int __init trace_eval_init(void)
9382 {
9383         INIT_WORK(&eval_map_work, eval_map_work_func);
9384
9385         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9386         if (!eval_map_wq) {
9387                 pr_err("Unable to allocate eval_map_wq\n");
9388                 /* Do work here */
9389                 eval_map_work_func(&eval_map_work);
9390                 return -ENOMEM;
9391         }
9392
9393         queue_work(eval_map_wq, &eval_map_work);
9394         return 0;
9395 }
9396
9397 static int __init trace_eval_sync(void)
9398 {
9399         /* Make sure the eval map updates are finished */
9400         if (eval_map_wq)
9401                 destroy_workqueue(eval_map_wq);
9402         return 0;
9403 }
9404
9405 late_initcall_sync(trace_eval_sync);
9406
9407
9408 #ifdef CONFIG_MODULES
9409 static void trace_module_add_evals(struct module *mod)
9410 {
9411         if (!mod->num_trace_evals)
9412                 return;
9413
9414         /*
9415          * Modules with bad taint do not have events created, do
9416          * not bother with enums either.
9417          */
9418         if (trace_module_has_bad_taint(mod))
9419                 return;
9420
9421         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9422 }
9423
9424 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9425 static void trace_module_remove_evals(struct module *mod)
9426 {
9427         union trace_eval_map_item *map;
9428         union trace_eval_map_item **last = &trace_eval_maps;
9429
9430         if (!mod->num_trace_evals)
9431                 return;
9432
9433         mutex_lock(&trace_eval_mutex);
9434
9435         map = trace_eval_maps;
9436
9437         while (map) {
9438                 if (map->head.mod == mod)
9439                         break;
9440                 map = trace_eval_jmp_to_tail(map);
9441                 last = &map->tail.next;
9442                 map = map->tail.next;
9443         }
9444         if (!map)
9445                 goto out;
9446
9447         *last = trace_eval_jmp_to_tail(map)->tail.next;
9448         kfree(map);
9449  out:
9450         mutex_unlock(&trace_eval_mutex);
9451 }
9452 #else
9453 static inline void trace_module_remove_evals(struct module *mod) { }
9454 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9455
9456 static int trace_module_notify(struct notifier_block *self,
9457                                unsigned long val, void *data)
9458 {
9459         struct module *mod = data;
9460
9461         switch (val) {
9462         case MODULE_STATE_COMING:
9463                 trace_module_add_evals(mod);
9464                 break;
9465         case MODULE_STATE_GOING:
9466                 trace_module_remove_evals(mod);
9467                 break;
9468         }
9469
9470         return NOTIFY_OK;
9471 }
9472
9473 static struct notifier_block trace_module_nb = {
9474         .notifier_call = trace_module_notify,
9475         .priority = 0,
9476 };
9477 #endif /* CONFIG_MODULES */
9478
9479 static __init int tracer_init_tracefs(void)
9480 {
9481         int ret;
9482
9483         trace_access_lock_init();
9484
9485         ret = tracing_init_dentry();
9486         if (ret)
9487                 return 0;
9488
9489         event_trace_init();
9490
9491         init_tracer_tracefs(&global_trace, NULL);
9492         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9493
9494         trace_create_file("tracing_thresh", 0644, NULL,
9495                         &global_trace, &tracing_thresh_fops);
9496
9497         trace_create_file("README", 0444, NULL,
9498                         NULL, &tracing_readme_fops);
9499
9500         trace_create_file("saved_cmdlines", 0444, NULL,
9501                         NULL, &tracing_saved_cmdlines_fops);
9502
9503         trace_create_file("saved_cmdlines_size", 0644, NULL,
9504                           NULL, &tracing_saved_cmdlines_size_fops);
9505
9506         trace_create_file("saved_tgids", 0444, NULL,
9507                         NULL, &tracing_saved_tgids_fops);
9508
9509         trace_eval_init();
9510
9511         trace_create_eval_file(NULL);
9512
9513 #ifdef CONFIG_MODULES
9514         register_module_notifier(&trace_module_nb);
9515 #endif
9516
9517 #ifdef CONFIG_DYNAMIC_FTRACE
9518         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9519                         NULL, &tracing_dyn_info_fops);
9520 #endif
9521
9522         create_trace_instances(NULL);
9523
9524         update_tracer_options(&global_trace);
9525
9526         return 0;
9527 }
9528
9529 static int trace_panic_handler(struct notifier_block *this,
9530                                unsigned long event, void *unused)
9531 {
9532         if (ftrace_dump_on_oops)
9533                 ftrace_dump(ftrace_dump_on_oops);
9534         return NOTIFY_OK;
9535 }
9536
9537 static struct notifier_block trace_panic_notifier = {
9538         .notifier_call  = trace_panic_handler,
9539         .next           = NULL,
9540         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9541 };
9542
9543 static int trace_die_handler(struct notifier_block *self,
9544                              unsigned long val,
9545                              void *data)
9546 {
9547         switch (val) {
9548         case DIE_OOPS:
9549                 if (ftrace_dump_on_oops)
9550                         ftrace_dump(ftrace_dump_on_oops);
9551                 break;
9552         default:
9553                 break;
9554         }
9555         return NOTIFY_OK;
9556 }
9557
9558 static struct notifier_block trace_die_notifier = {
9559         .notifier_call = trace_die_handler,
9560         .priority = 200
9561 };
9562
9563 /*
9564  * printk is set to max of 1024, we really don't need it that big.
9565  * Nothing should be printing 1000 characters anyway.
9566  */
9567 #define TRACE_MAX_PRINT         1000
9568
9569 /*
9570  * Define here KERN_TRACE so that we have one place to modify
9571  * it if we decide to change what log level the ftrace dump
9572  * should be at.
9573  */
9574 #define KERN_TRACE              KERN_EMERG
9575
9576 void
9577 trace_printk_seq(struct trace_seq *s)
9578 {
9579         /* Probably should print a warning here. */
9580         if (s->seq.len >= TRACE_MAX_PRINT)
9581                 s->seq.len = TRACE_MAX_PRINT;
9582
9583         /*
9584          * More paranoid code. Although the buffer size is set to
9585          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9586          * an extra layer of protection.
9587          */
9588         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9589                 s->seq.len = s->seq.size - 1;
9590
9591         /* should be zero ended, but we are paranoid. */
9592         s->buffer[s->seq.len] = 0;
9593
9594         printk(KERN_TRACE "%s", s->buffer);
9595
9596         trace_seq_init(s);
9597 }
9598
9599 void trace_init_global_iter(struct trace_iterator *iter)
9600 {
9601         iter->tr = &global_trace;
9602         iter->trace = iter->tr->current_trace;
9603         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9604         iter->array_buffer = &global_trace.array_buffer;
9605
9606         if (iter->trace && iter->trace->open)
9607                 iter->trace->open(iter);
9608
9609         /* Annotate start of buffers if we had overruns */
9610         if (ring_buffer_overruns(iter->array_buffer->buffer))
9611                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9612
9613         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9614         if (trace_clocks[iter->tr->clock_id].in_ns)
9615                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9616 }
9617
9618 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9619 {
9620         /* use static because iter can be a bit big for the stack */
9621         static struct trace_iterator iter;
9622         static atomic_t dump_running;
9623         struct trace_array *tr = &global_trace;
9624         unsigned int old_userobj;
9625         unsigned long flags;
9626         int cnt = 0, cpu;
9627
9628         /* Only allow one dump user at a time. */
9629         if (atomic_inc_return(&dump_running) != 1) {
9630                 atomic_dec(&dump_running);
9631                 return;
9632         }
9633
9634         /*
9635          * Always turn off tracing when we dump.
9636          * We don't need to show trace output of what happens
9637          * between multiple crashes.
9638          *
9639          * If the user does a sysrq-z, then they can re-enable
9640          * tracing with echo 1 > tracing_on.
9641          */
9642         tracing_off();
9643
9644         local_irq_save(flags);
9645         printk_nmi_direct_enter();
9646
9647         /* Simulate the iterator */
9648         trace_init_global_iter(&iter);
9649         /* Can not use kmalloc for iter.temp and iter.fmt */
9650         iter.temp = static_temp_buf;
9651         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9652         iter.fmt = static_fmt_buf;
9653         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9654
9655         for_each_tracing_cpu(cpu) {
9656                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9657         }
9658
9659         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9660
9661         /* don't look at user memory in panic mode */
9662         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9663
9664         switch (oops_dump_mode) {
9665         case DUMP_ALL:
9666                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9667                 break;
9668         case DUMP_ORIG:
9669                 iter.cpu_file = raw_smp_processor_id();
9670                 break;
9671         case DUMP_NONE:
9672                 goto out_enable;
9673         default:
9674                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9675                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9676         }
9677
9678         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9679
9680         /* Did function tracer already get disabled? */
9681         if (ftrace_is_dead()) {
9682                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9683                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9684         }
9685
9686         /*
9687          * We need to stop all tracing on all CPUS to read
9688          * the next buffer. This is a bit expensive, but is
9689          * not done often. We fill all what we can read,
9690          * and then release the locks again.
9691          */
9692
9693         while (!trace_empty(&iter)) {
9694
9695                 if (!cnt)
9696                         printk(KERN_TRACE "---------------------------------\n");
9697
9698                 cnt++;
9699
9700                 trace_iterator_reset(&iter);
9701                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9702
9703                 if (trace_find_next_entry_inc(&iter) != NULL) {
9704                         int ret;
9705
9706                         ret = print_trace_line(&iter);
9707                         if (ret != TRACE_TYPE_NO_CONSUME)
9708                                 trace_consume(&iter);
9709                 }
9710                 touch_nmi_watchdog();
9711
9712                 trace_printk_seq(&iter.seq);
9713         }
9714
9715         if (!cnt)
9716                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9717         else
9718                 printk(KERN_TRACE "---------------------------------\n");
9719
9720  out_enable:
9721         tr->trace_flags |= old_userobj;
9722
9723         for_each_tracing_cpu(cpu) {
9724                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9725         }
9726         atomic_dec(&dump_running);
9727         printk_nmi_direct_exit();
9728         local_irq_restore(flags);
9729 }
9730 EXPORT_SYMBOL_GPL(ftrace_dump);
9731
9732 #define WRITE_BUFSIZE  4096
9733
9734 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9735                                 size_t count, loff_t *ppos,
9736                                 int (*createfn)(const char *))
9737 {
9738         char *kbuf, *buf, *tmp;
9739         int ret = 0;
9740         size_t done = 0;
9741         size_t size;
9742
9743         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9744         if (!kbuf)
9745                 return -ENOMEM;
9746
9747         while (done < count) {
9748                 size = count - done;
9749
9750                 if (size >= WRITE_BUFSIZE)
9751                         size = WRITE_BUFSIZE - 1;
9752
9753                 if (copy_from_user(kbuf, buffer + done, size)) {
9754                         ret = -EFAULT;
9755                         goto out;
9756                 }
9757                 kbuf[size] = '\0';
9758                 buf = kbuf;
9759                 do {
9760                         tmp = strchr(buf, '\n');
9761                         if (tmp) {
9762                                 *tmp = '\0';
9763                                 size = tmp - buf + 1;
9764                         } else {
9765                                 size = strlen(buf);
9766                                 if (done + size < count) {
9767                                         if (buf != kbuf)
9768                                                 break;
9769                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9770                                         pr_warn("Line length is too long: Should be less than %d\n",
9771                                                 WRITE_BUFSIZE - 2);
9772                                         ret = -EINVAL;
9773                                         goto out;
9774                                 }
9775                         }
9776                         done += size;
9777
9778                         /* Remove comments */
9779                         tmp = strchr(buf, '#');
9780
9781                         if (tmp)
9782                                 *tmp = '\0';
9783
9784                         ret = createfn(buf);
9785                         if (ret)
9786                                 goto out;
9787                         buf += size;
9788
9789                 } while (done < count);
9790         }
9791         ret = done;
9792
9793 out:
9794         kfree(kbuf);
9795
9796         return ret;
9797 }
9798
9799 __init static int tracer_alloc_buffers(void)
9800 {
9801         int ring_buf_size;
9802         int ret = -ENOMEM;
9803
9804
9805         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9806                 pr_warn("Tracing disabled due to lockdown\n");
9807                 return -EPERM;
9808         }
9809
9810         /*
9811          * Make sure we don't accidentally add more trace options
9812          * than we have bits for.
9813          */
9814         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9815
9816         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9817                 goto out;
9818
9819         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9820                 goto out_free_buffer_mask;
9821
9822         /* Only allocate trace_printk buffers if a trace_printk exists */
9823         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9824                 /* Must be called before global_trace.buffer is allocated */
9825                 trace_printk_init_buffers();
9826
9827         /* To save memory, keep the ring buffer size to its minimum */
9828         if (ring_buffer_expanded)
9829                 ring_buf_size = trace_buf_size;
9830         else
9831                 ring_buf_size = 1;
9832
9833         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9834         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9835
9836         raw_spin_lock_init(&global_trace.start_lock);
9837
9838         /*
9839          * The prepare callbacks allocates some memory for the ring buffer. We
9840          * don't free the buffer if the CPU goes down. If we were to free
9841          * the buffer, then the user would lose any trace that was in the
9842          * buffer. The memory will be removed once the "instance" is removed.
9843          */
9844         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9845                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9846                                       NULL);
9847         if (ret < 0)
9848                 goto out_free_cpumask;
9849         /* Used for event triggers */
9850         ret = -ENOMEM;
9851         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9852         if (!temp_buffer)
9853                 goto out_rm_hp_state;
9854
9855         if (trace_create_savedcmd() < 0)
9856                 goto out_free_temp_buffer;
9857
9858         /* TODO: make the number of buffers hot pluggable with CPUS */
9859         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9860                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9861                 goto out_free_savedcmd;
9862         }
9863
9864         if (global_trace.buffer_disabled)
9865                 tracing_off();
9866
9867         if (trace_boot_clock) {
9868                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9869                 if (ret < 0)
9870                         pr_warn("Trace clock %s not defined, going back to default\n",
9871                                 trace_boot_clock);
9872         }
9873
9874         /*
9875          * register_tracer() might reference current_trace, so it
9876          * needs to be set before we register anything. This is
9877          * just a bootstrap of current_trace anyway.
9878          */
9879         global_trace.current_trace = &nop_trace;
9880
9881         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9882
9883         ftrace_init_global_array_ops(&global_trace);
9884
9885         init_trace_flags_index(&global_trace);
9886
9887         register_tracer(&nop_trace);
9888
9889         /* Function tracing may start here (via kernel command line) */
9890         init_function_trace();
9891
9892         /* All seems OK, enable tracing */
9893         tracing_disabled = 0;
9894
9895         atomic_notifier_chain_register(&panic_notifier_list,
9896                                        &trace_panic_notifier);
9897
9898         register_die_notifier(&trace_die_notifier);
9899
9900         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9901
9902         INIT_LIST_HEAD(&global_trace.systems);
9903         INIT_LIST_HEAD(&global_trace.events);
9904         INIT_LIST_HEAD(&global_trace.hist_vars);
9905         INIT_LIST_HEAD(&global_trace.err_log);
9906         list_add(&global_trace.list, &ftrace_trace_arrays);
9907
9908         apply_trace_boot_options();
9909
9910         register_snapshot_cmd();
9911
9912         test_can_verify();
9913
9914         return 0;
9915
9916 out_free_savedcmd:
9917         free_saved_cmdlines_buffer(savedcmd);
9918 out_free_temp_buffer:
9919         ring_buffer_free(temp_buffer);
9920 out_rm_hp_state:
9921         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9922 out_free_cpumask:
9923         free_cpumask_var(global_trace.tracing_cpumask);
9924 out_free_buffer_mask:
9925         free_cpumask_var(tracing_buffer_mask);
9926 out:
9927         return ret;
9928 }
9929
9930 void __init early_trace_init(void)
9931 {
9932         if (tracepoint_printk) {
9933                 tracepoint_print_iter =
9934                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9935                 if (MEM_FAIL(!tracepoint_print_iter,
9936                              "Failed to allocate trace iterator\n"))
9937                         tracepoint_printk = 0;
9938                 else
9939                         static_key_enable(&tracepoint_printk_key.key);
9940         }
9941         tracer_alloc_buffers();
9942 }
9943
9944 void __init trace_init(void)
9945 {
9946         trace_event_init();
9947 }
9948
9949 __init static int clear_boot_tracer(void)
9950 {
9951         /*
9952          * The default tracer at boot buffer is an init section.
9953          * This function is called in lateinit. If we did not
9954          * find the boot tracer, then clear it out, to prevent
9955          * later registration from accessing the buffer that is
9956          * about to be freed.
9957          */
9958         if (!default_bootup_tracer)
9959                 return 0;
9960
9961         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9962                default_bootup_tracer);
9963         default_bootup_tracer = NULL;
9964
9965         return 0;
9966 }
9967
9968 fs_initcall(tracer_init_tracefs);
9969 late_initcall_sync(clear_boot_tracer);
9970
9971 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9972 __init static int tracing_set_default_clock(void)
9973 {
9974         /* sched_clock_stable() is determined in late_initcall */
9975         if (!trace_boot_clock && !sched_clock_stable()) {
9976                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9977                         pr_warn("Can not set tracing clock due to lockdown\n");
9978                         return -EPERM;
9979                 }
9980
9981                 printk(KERN_WARNING
9982                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9983                        "If you want to keep using the local clock, then add:\n"
9984                        "  \"trace_clock=local\"\n"
9985                        "on the kernel command line\n");
9986                 tracing_set_clock(&global_trace, "global");
9987         }
9988
9989         return 0;
9990 }
9991 late_initcall_sync(tracing_set_default_clock);
9992 #endif