Merge tag 'tty-5.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 static int *tgid_map;
2188
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194         unsigned *map_cmdline_to_pid;
2195         unsigned cmdline_num;
2196         int cmdline_idx;
2197         char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215                                     struct saved_cmdlines_buffer *s)
2216 {
2217         s->map_cmdline_to_pid = kmalloc_array(val,
2218                                               sizeof(*s->map_cmdline_to_pid),
2219                                               GFP_KERNEL);
2220         if (!s->map_cmdline_to_pid)
2221                 return -ENOMEM;
2222
2223         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224         if (!s->saved_cmdlines) {
2225                 kfree(s->map_cmdline_to_pid);
2226                 return -ENOMEM;
2227         }
2228
2229         s->cmdline_idx = 0;
2230         s->cmdline_num = val;
2231         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232                sizeof(s->map_pid_to_cmdline));
2233         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234                val * sizeof(*s->map_cmdline_to_pid));
2235
2236         return 0;
2237 }
2238
2239 static int trace_create_savedcmd(void)
2240 {
2241         int ret;
2242
2243         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244         if (!savedcmd)
2245                 return -ENOMEM;
2246
2247         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248         if (ret < 0) {
2249                 kfree(savedcmd);
2250                 savedcmd = NULL;
2251                 return -ENOMEM;
2252         }
2253
2254         return 0;
2255 }
2256
2257 int is_tracing_stopped(void)
2258 {
2259         return global_trace.stop_count;
2260 }
2261
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270         struct trace_buffer *buffer;
2271         unsigned long flags;
2272
2273         if (tracing_disabled)
2274                 return;
2275
2276         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277         if (--global_trace.stop_count) {
2278                 if (global_trace.stop_count < 0) {
2279                         /* Someone screwed up their debugging */
2280                         WARN_ON_ONCE(1);
2281                         global_trace.stop_count = 0;
2282                 }
2283                 goto out;
2284         }
2285
2286         /* Prevent the buffers from switching */
2287         arch_spin_lock(&global_trace.max_lock);
2288
2289         buffer = global_trace.array_buffer.buffer;
2290         if (buffer)
2291                 ring_buffer_record_enable(buffer);
2292
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294         buffer = global_trace.max_buffer.buffer;
2295         if (buffer)
2296                 ring_buffer_record_enable(buffer);
2297 #endif
2298
2299         arch_spin_unlock(&global_trace.max_lock);
2300
2301  out:
2302         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307         struct trace_buffer *buffer;
2308         unsigned long flags;
2309
2310         if (tracing_disabled)
2311                 return;
2312
2313         /* If global, we need to also start the max tracer */
2314         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315                 return tracing_start();
2316
2317         raw_spin_lock_irqsave(&tr->start_lock, flags);
2318
2319         if (--tr->stop_count) {
2320                 if (tr->stop_count < 0) {
2321                         /* Someone screwed up their debugging */
2322                         WARN_ON_ONCE(1);
2323                         tr->stop_count = 0;
2324                 }
2325                 goto out;
2326         }
2327
2328         buffer = tr->array_buffer.buffer;
2329         if (buffer)
2330                 ring_buffer_record_enable(buffer);
2331
2332  out:
2333         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348         if (global_trace.stop_count++)
2349                 goto out;
2350
2351         /* Prevent the buffers from switching */
2352         arch_spin_lock(&global_trace.max_lock);
2353
2354         buffer = global_trace.array_buffer.buffer;
2355         if (buffer)
2356                 ring_buffer_record_disable(buffer);
2357
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359         buffer = global_trace.max_buffer.buffer;
2360         if (buffer)
2361                 ring_buffer_record_disable(buffer);
2362 #endif
2363
2364         arch_spin_unlock(&global_trace.max_lock);
2365
2366  out:
2367         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372         struct trace_buffer *buffer;
2373         unsigned long flags;
2374
2375         /* If global, we need to also stop the max tracer */
2376         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377                 return tracing_stop();
2378
2379         raw_spin_lock_irqsave(&tr->start_lock, flags);
2380         if (tr->stop_count++)
2381                 goto out;
2382
2383         buffer = tr->array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387  out:
2388         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393         unsigned tpid, idx;
2394
2395         /* treat recording of idle task as a success */
2396         if (!tsk->pid)
2397                 return 1;
2398
2399         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400
2401         /*
2402          * It's not the end of the world if we don't get
2403          * the lock, but we also don't want to spin
2404          * nor do we want to disable interrupts,
2405          * so if we miss here, then better luck next time.
2406          */
2407         if (!arch_spin_trylock(&trace_cmdline_lock))
2408                 return 0;
2409
2410         idx = savedcmd->map_pid_to_cmdline[tpid];
2411         if (idx == NO_CMDLINE_MAP) {
2412                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413
2414                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2415                 savedcmd->cmdline_idx = idx;
2416         }
2417
2418         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419         set_cmdline(idx, tsk->comm);
2420
2421         arch_spin_unlock(&trace_cmdline_lock);
2422
2423         return 1;
2424 }
2425
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428         unsigned map;
2429         int tpid;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         tpid = pid & (PID_MAX_DEFAULT - 1);
2442         map = savedcmd->map_pid_to_cmdline[tpid];
2443         if (map != NO_CMDLINE_MAP) {
2444                 tpid = savedcmd->map_cmdline_to_pid[map];
2445                 if (tpid == pid) {
2446                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447                         return;
2448                 }
2449         }
2450         strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2580 {
2581         unsigned int trace_flags = irqs_status;
2582         unsigned int pc;
2583
2584         pc = preempt_count();
2585
2586         if (pc & NMI_MASK)
2587                 trace_flags |= TRACE_FLAG_NMI;
2588         if (pc & HARDIRQ_MASK)
2589                 trace_flags |= TRACE_FLAG_HARDIRQ;
2590         if (in_serving_softirq())
2591                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2592
2593         if (tif_need_resched())
2594                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2595         if (test_preempt_need_resched())
2596                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2597         return (trace_flags << 16) | (pc & 0xff);
2598 }
2599
2600 struct ring_buffer_event *
2601 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2602                           int type,
2603                           unsigned long len,
2604                           unsigned int trace_ctx)
2605 {
2606         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2607 }
2608
2609 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2610 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2611 static int trace_buffered_event_ref;
2612
2613 /**
2614  * trace_buffered_event_enable - enable buffering events
2615  *
2616  * When events are being filtered, it is quicker to use a temporary
2617  * buffer to write the event data into if there's a likely chance
2618  * that it will not be committed. The discard of the ring buffer
2619  * is not as fast as committing, and is much slower than copying
2620  * a commit.
2621  *
2622  * When an event is to be filtered, allocate per cpu buffers to
2623  * write the event data into, and if the event is filtered and discarded
2624  * it is simply dropped, otherwise, the entire data is to be committed
2625  * in one shot.
2626  */
2627 void trace_buffered_event_enable(void)
2628 {
2629         struct ring_buffer_event *event;
2630         struct page *page;
2631         int cpu;
2632
2633         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2634
2635         if (trace_buffered_event_ref++)
2636                 return;
2637
2638         for_each_tracing_cpu(cpu) {
2639                 page = alloc_pages_node(cpu_to_node(cpu),
2640                                         GFP_KERNEL | __GFP_NORETRY, 0);
2641                 if (!page)
2642                         goto failed;
2643
2644                 event = page_address(page);
2645                 memset(event, 0, sizeof(*event));
2646
2647                 per_cpu(trace_buffered_event, cpu) = event;
2648
2649                 preempt_disable();
2650                 if (cpu == smp_processor_id() &&
2651                     __this_cpu_read(trace_buffered_event) !=
2652                     per_cpu(trace_buffered_event, cpu))
2653                         WARN_ON_ONCE(1);
2654                 preempt_enable();
2655         }
2656
2657         return;
2658  failed:
2659         trace_buffered_event_disable();
2660 }
2661
2662 static void enable_trace_buffered_event(void *data)
2663 {
2664         /* Probably not needed, but do it anyway */
2665         smp_rmb();
2666         this_cpu_dec(trace_buffered_event_cnt);
2667 }
2668
2669 static void disable_trace_buffered_event(void *data)
2670 {
2671         this_cpu_inc(trace_buffered_event_cnt);
2672 }
2673
2674 /**
2675  * trace_buffered_event_disable - disable buffering events
2676  *
2677  * When a filter is removed, it is faster to not use the buffered
2678  * events, and to commit directly into the ring buffer. Free up
2679  * the temp buffers when there are no more users. This requires
2680  * special synchronization with current events.
2681  */
2682 void trace_buffered_event_disable(void)
2683 {
2684         int cpu;
2685
2686         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2687
2688         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2689                 return;
2690
2691         if (--trace_buffered_event_ref)
2692                 return;
2693
2694         preempt_disable();
2695         /* For each CPU, set the buffer as used. */
2696         smp_call_function_many(tracing_buffer_mask,
2697                                disable_trace_buffered_event, NULL, 1);
2698         preempt_enable();
2699
2700         /* Wait for all current users to finish */
2701         synchronize_rcu();
2702
2703         for_each_tracing_cpu(cpu) {
2704                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2705                 per_cpu(trace_buffered_event, cpu) = NULL;
2706         }
2707         /*
2708          * Make sure trace_buffered_event is NULL before clearing
2709          * trace_buffered_event_cnt.
2710          */
2711         smp_wmb();
2712
2713         preempt_disable();
2714         /* Do the work on each cpu */
2715         smp_call_function_many(tracing_buffer_mask,
2716                                enable_trace_buffered_event, NULL, 1);
2717         preempt_enable();
2718 }
2719
2720 static struct trace_buffer *temp_buffer;
2721
2722 struct ring_buffer_event *
2723 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2724                           struct trace_event_file *trace_file,
2725                           int type, unsigned long len,
2726                           unsigned int trace_ctx)
2727 {
2728         struct ring_buffer_event *entry;
2729         struct trace_array *tr = trace_file->tr;
2730         int val;
2731
2732         *current_rb = tr->array_buffer.buffer;
2733
2734         if (!tr->no_filter_buffering_ref &&
2735             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2736             (entry = this_cpu_read(trace_buffered_event))) {
2737                 /* Try to use the per cpu buffer first */
2738                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2739                 if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2740                         trace_event_setup(entry, type, trace_ctx);
2741                         entry->array[0] = len;
2742                         return entry;
2743                 }
2744                 this_cpu_dec(trace_buffered_event_cnt);
2745         }
2746
2747         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2748                                             trace_ctx);
2749         /*
2750          * If tracing is off, but we have triggers enabled
2751          * we still need to look at the event data. Use the temp_buffer
2752          * to store the trace event for the trigger to use. It's recursive
2753          * safe and will not be recorded anywhere.
2754          */
2755         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2756                 *current_rb = temp_buffer;
2757                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2758                                                     trace_ctx);
2759         }
2760         return entry;
2761 }
2762 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2763
2764 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2765 static DEFINE_MUTEX(tracepoint_printk_mutex);
2766
2767 static void output_printk(struct trace_event_buffer *fbuffer)
2768 {
2769         struct trace_event_call *event_call;
2770         struct trace_event_file *file;
2771         struct trace_event *event;
2772         unsigned long flags;
2773         struct trace_iterator *iter = tracepoint_print_iter;
2774
2775         /* We should never get here if iter is NULL */
2776         if (WARN_ON_ONCE(!iter))
2777                 return;
2778
2779         event_call = fbuffer->trace_file->event_call;
2780         if (!event_call || !event_call->event.funcs ||
2781             !event_call->event.funcs->trace)
2782                 return;
2783
2784         file = fbuffer->trace_file;
2785         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2786             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2787              !filter_match_preds(file->filter, fbuffer->entry)))
2788                 return;
2789
2790         event = &fbuffer->trace_file->event_call->event;
2791
2792         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2793         trace_seq_init(&iter->seq);
2794         iter->ent = fbuffer->entry;
2795         event_call->event.funcs->trace(iter, 0, event);
2796         trace_seq_putc(&iter->seq, 0);
2797         printk("%s", iter->seq.buffer);
2798
2799         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2800 }
2801
2802 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2803                              void *buffer, size_t *lenp,
2804                              loff_t *ppos)
2805 {
2806         int save_tracepoint_printk;
2807         int ret;
2808
2809         mutex_lock(&tracepoint_printk_mutex);
2810         save_tracepoint_printk = tracepoint_printk;
2811
2812         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2813
2814         /*
2815          * This will force exiting early, as tracepoint_printk
2816          * is always zero when tracepoint_printk_iter is not allocated
2817          */
2818         if (!tracepoint_print_iter)
2819                 tracepoint_printk = 0;
2820
2821         if (save_tracepoint_printk == tracepoint_printk)
2822                 goto out;
2823
2824         if (tracepoint_printk)
2825                 static_key_enable(&tracepoint_printk_key.key);
2826         else
2827                 static_key_disable(&tracepoint_printk_key.key);
2828
2829  out:
2830         mutex_unlock(&tracepoint_printk_mutex);
2831
2832         return ret;
2833 }
2834
2835 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2836 {
2837         if (static_key_false(&tracepoint_printk_key.key))
2838                 output_printk(fbuffer);
2839
2840         if (static_branch_unlikely(&trace_event_exports_enabled))
2841                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2842         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2843                                     fbuffer->event, fbuffer->entry,
2844                                     fbuffer->trace_ctx, fbuffer->regs);
2845 }
2846 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2847
2848 /*
2849  * Skip 3:
2850  *
2851  *   trace_buffer_unlock_commit_regs()
2852  *   trace_event_buffer_commit()
2853  *   trace_event_raw_event_xxx()
2854  */
2855 # define STACK_SKIP 3
2856
2857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2858                                      struct trace_buffer *buffer,
2859                                      struct ring_buffer_event *event,
2860                                      unsigned int trace_ctx,
2861                                      struct pt_regs *regs)
2862 {
2863         __buffer_unlock_commit(buffer, event);
2864
2865         /*
2866          * If regs is not set, then skip the necessary functions.
2867          * Note, we can still get here via blktrace, wakeup tracer
2868          * and mmiotrace, but that's ok if they lose a function or
2869          * two. They are not that meaningful.
2870          */
2871         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2872         ftrace_trace_userstack(tr, buffer, trace_ctx);
2873 }
2874
2875 /*
2876  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2877  */
2878 void
2879 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2880                                    struct ring_buffer_event *event)
2881 {
2882         __buffer_unlock_commit(buffer, event);
2883 }
2884
2885 void
2886 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2887                parent_ip, unsigned int trace_ctx)
2888 {
2889         struct trace_event_call *call = &event_function;
2890         struct trace_buffer *buffer = tr->array_buffer.buffer;
2891         struct ring_buffer_event *event;
2892         struct ftrace_entry *entry;
2893
2894         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2895                                             trace_ctx);
2896         if (!event)
2897                 return;
2898         entry   = ring_buffer_event_data(event);
2899         entry->ip                       = ip;
2900         entry->parent_ip                = parent_ip;
2901
2902         if (!call_filter_check_discard(call, entry, buffer, event)) {
2903                 if (static_branch_unlikely(&trace_function_exports_enabled))
2904                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2905                 __buffer_unlock_commit(buffer, event);
2906         }
2907 }
2908
2909 #ifdef CONFIG_STACKTRACE
2910
2911 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2912 #define FTRACE_KSTACK_NESTING   4
2913
2914 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2915
2916 struct ftrace_stack {
2917         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2918 };
2919
2920
2921 struct ftrace_stacks {
2922         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2923 };
2924
2925 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2926 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2927
2928 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2929                                  unsigned int trace_ctx,
2930                                  int skip, struct pt_regs *regs)
2931 {
2932         struct trace_event_call *call = &event_kernel_stack;
2933         struct ring_buffer_event *event;
2934         unsigned int size, nr_entries;
2935         struct ftrace_stack *fstack;
2936         struct stack_entry *entry;
2937         int stackidx;
2938
2939         /*
2940          * Add one, for this function and the call to save_stack_trace()
2941          * If regs is set, then these functions will not be in the way.
2942          */
2943 #ifndef CONFIG_UNWINDER_ORC
2944         if (!regs)
2945                 skip++;
2946 #endif
2947
2948         preempt_disable_notrace();
2949
2950         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2951
2952         /* This should never happen. If it does, yell once and skip */
2953         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2954                 goto out;
2955
2956         /*
2957          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2958          * interrupt will either see the value pre increment or post
2959          * increment. If the interrupt happens pre increment it will have
2960          * restored the counter when it returns.  We just need a barrier to
2961          * keep gcc from moving things around.
2962          */
2963         barrier();
2964
2965         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2966         size = ARRAY_SIZE(fstack->calls);
2967
2968         if (regs) {
2969                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2970                                                    size, skip);
2971         } else {
2972                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2973         }
2974
2975         size = nr_entries * sizeof(unsigned long);
2976         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2977                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
2978                                     trace_ctx);
2979         if (!event)
2980                 goto out;
2981         entry = ring_buffer_event_data(event);
2982
2983         memcpy(&entry->caller, fstack->calls, size);
2984         entry->size = nr_entries;
2985
2986         if (!call_filter_check_discard(call, entry, buffer, event))
2987                 __buffer_unlock_commit(buffer, event);
2988
2989  out:
2990         /* Again, don't let gcc optimize things here */
2991         barrier();
2992         __this_cpu_dec(ftrace_stack_reserve);
2993         preempt_enable_notrace();
2994
2995 }
2996
2997 static inline void ftrace_trace_stack(struct trace_array *tr,
2998                                       struct trace_buffer *buffer,
2999                                       unsigned int trace_ctx,
3000                                       int skip, struct pt_regs *regs)
3001 {
3002         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3003                 return;
3004
3005         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3006 }
3007
3008 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3009                    int skip)
3010 {
3011         struct trace_buffer *buffer = tr->array_buffer.buffer;
3012
3013         if (rcu_is_watching()) {
3014                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3015                 return;
3016         }
3017
3018         /*
3019          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3020          * but if the above rcu_is_watching() failed, then the NMI
3021          * triggered someplace critical, and rcu_irq_enter() should
3022          * not be called from NMI.
3023          */
3024         if (unlikely(in_nmi()))
3025                 return;
3026
3027         rcu_irq_enter_irqson();
3028         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3029         rcu_irq_exit_irqson();
3030 }
3031
3032 /**
3033  * trace_dump_stack - record a stack back trace in the trace buffer
3034  * @skip: Number of functions to skip (helper handlers)
3035  */
3036 void trace_dump_stack(int skip)
3037 {
3038         if (tracing_disabled || tracing_selftest_running)
3039                 return;
3040
3041 #ifndef CONFIG_UNWINDER_ORC
3042         /* Skip 1 to skip this function. */
3043         skip++;
3044 #endif
3045         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3046                              tracing_gen_ctx(), skip, NULL);
3047 }
3048 EXPORT_SYMBOL_GPL(trace_dump_stack);
3049
3050 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3051 static DEFINE_PER_CPU(int, user_stack_count);
3052
3053 static void
3054 ftrace_trace_userstack(struct trace_array *tr,
3055                        struct trace_buffer *buffer, unsigned int trace_ctx)
3056 {
3057         struct trace_event_call *call = &event_user_stack;
3058         struct ring_buffer_event *event;
3059         struct userstack_entry *entry;
3060
3061         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3062                 return;
3063
3064         /*
3065          * NMIs can not handle page faults, even with fix ups.
3066          * The save user stack can (and often does) fault.
3067          */
3068         if (unlikely(in_nmi()))
3069                 return;
3070
3071         /*
3072          * prevent recursion, since the user stack tracing may
3073          * trigger other kernel events.
3074          */
3075         preempt_disable();
3076         if (__this_cpu_read(user_stack_count))
3077                 goto out;
3078
3079         __this_cpu_inc(user_stack_count);
3080
3081         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3082                                             sizeof(*entry), trace_ctx);
3083         if (!event)
3084                 goto out_drop_count;
3085         entry   = ring_buffer_event_data(event);
3086
3087         entry->tgid             = current->tgid;
3088         memset(&entry->caller, 0, sizeof(entry->caller));
3089
3090         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3091         if (!call_filter_check_discard(call, entry, buffer, event))
3092                 __buffer_unlock_commit(buffer, event);
3093
3094  out_drop_count:
3095         __this_cpu_dec(user_stack_count);
3096  out:
3097         preempt_enable();
3098 }
3099 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3100 static void ftrace_trace_userstack(struct trace_array *tr,
3101                                    struct trace_buffer *buffer,
3102                                    unsigned int trace_ctx)
3103 {
3104 }
3105 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3106
3107 #endif /* CONFIG_STACKTRACE */
3108
3109 static inline void
3110 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3111                           unsigned long long delta)
3112 {
3113         entry->bottom_delta_ts = delta & U32_MAX;
3114         entry->top_delta_ts = (delta >> 32);
3115 }
3116
3117 void trace_last_func_repeats(struct trace_array *tr,
3118                              struct trace_func_repeats *last_info,
3119                              unsigned int trace_ctx)
3120 {
3121         struct trace_buffer *buffer = tr->array_buffer.buffer;
3122         struct func_repeats_entry *entry;
3123         struct ring_buffer_event *event;
3124         u64 delta;
3125
3126         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3127                                             sizeof(*entry), trace_ctx);
3128         if (!event)
3129                 return;
3130
3131         delta = ring_buffer_event_time_stamp(buffer, event) -
3132                 last_info->ts_last_call;
3133
3134         entry = ring_buffer_event_data(event);
3135         entry->ip = last_info->ip;
3136         entry->parent_ip = last_info->parent_ip;
3137         entry->count = last_info->count;
3138         func_repeats_set_delta_ts(entry, delta);
3139
3140         __buffer_unlock_commit(buffer, event);
3141 }
3142
3143 /* created for use with alloc_percpu */
3144 struct trace_buffer_struct {
3145         int nesting;
3146         char buffer[4][TRACE_BUF_SIZE];
3147 };
3148
3149 static struct trace_buffer_struct *trace_percpu_buffer;
3150
3151 /*
3152  * This allows for lockless recording.  If we're nested too deeply, then
3153  * this returns NULL.
3154  */
3155 static char *get_trace_buf(void)
3156 {
3157         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3158
3159         if (!buffer || buffer->nesting >= 4)
3160                 return NULL;
3161
3162         buffer->nesting++;
3163
3164         /* Interrupts must see nesting incremented before we use the buffer */
3165         barrier();
3166         return &buffer->buffer[buffer->nesting - 1][0];
3167 }
3168
3169 static void put_trace_buf(void)
3170 {
3171         /* Don't let the decrement of nesting leak before this */
3172         barrier();
3173         this_cpu_dec(trace_percpu_buffer->nesting);
3174 }
3175
3176 static int alloc_percpu_trace_buffer(void)
3177 {
3178         struct trace_buffer_struct *buffers;
3179
3180         if (trace_percpu_buffer)
3181                 return 0;
3182
3183         buffers = alloc_percpu(struct trace_buffer_struct);
3184         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3185                 return -ENOMEM;
3186
3187         trace_percpu_buffer = buffers;
3188         return 0;
3189 }
3190
3191 static int buffers_allocated;
3192
3193 void trace_printk_init_buffers(void)
3194 {
3195         if (buffers_allocated)
3196                 return;
3197
3198         if (alloc_percpu_trace_buffer())
3199                 return;
3200
3201         /* trace_printk() is for debug use only. Don't use it in production. */
3202
3203         pr_warn("\n");
3204         pr_warn("**********************************************************\n");
3205         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3206         pr_warn("**                                                      **\n");
3207         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3208         pr_warn("**                                                      **\n");
3209         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3210         pr_warn("** unsafe for production use.                           **\n");
3211         pr_warn("**                                                      **\n");
3212         pr_warn("** If you see this message and you are not debugging    **\n");
3213         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3214         pr_warn("**                                                      **\n");
3215         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3216         pr_warn("**********************************************************\n");
3217
3218         /* Expand the buffers to set size */
3219         tracing_update_buffers();
3220
3221         buffers_allocated = 1;
3222
3223         /*
3224          * trace_printk_init_buffers() can be called by modules.
3225          * If that happens, then we need to start cmdline recording
3226          * directly here. If the global_trace.buffer is already
3227          * allocated here, then this was called by module code.
3228          */
3229         if (global_trace.array_buffer.buffer)
3230                 tracing_start_cmdline_record();
3231 }
3232 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3233
3234 void trace_printk_start_comm(void)
3235 {
3236         /* Start tracing comms if trace printk is set */
3237         if (!buffers_allocated)
3238                 return;
3239         tracing_start_cmdline_record();
3240 }
3241
3242 static void trace_printk_start_stop_comm(int enabled)
3243 {
3244         if (!buffers_allocated)
3245                 return;
3246
3247         if (enabled)
3248                 tracing_start_cmdline_record();
3249         else
3250                 tracing_stop_cmdline_record();
3251 }
3252
3253 /**
3254  * trace_vbprintk - write binary msg to tracing buffer
3255  * @ip:    The address of the caller
3256  * @fmt:   The string format to write to the buffer
3257  * @args:  Arguments for @fmt
3258  */
3259 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3260 {
3261         struct trace_event_call *call = &event_bprint;
3262         struct ring_buffer_event *event;
3263         struct trace_buffer *buffer;
3264         struct trace_array *tr = &global_trace;
3265         struct bprint_entry *entry;
3266         unsigned int trace_ctx;
3267         char *tbuffer;
3268         int len = 0, size;
3269
3270         if (unlikely(tracing_selftest_running || tracing_disabled))
3271                 return 0;
3272
3273         /* Don't pollute graph traces with trace_vprintk internals */
3274         pause_graph_tracing();
3275
3276         trace_ctx = tracing_gen_ctx();
3277         preempt_disable_notrace();
3278
3279         tbuffer = get_trace_buf();
3280         if (!tbuffer) {
3281                 len = 0;
3282                 goto out_nobuffer;
3283         }
3284
3285         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3286
3287         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3288                 goto out_put;
3289
3290         size = sizeof(*entry) + sizeof(u32) * len;
3291         buffer = tr->array_buffer.buffer;
3292         ring_buffer_nest_start(buffer);
3293         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3294                                             trace_ctx);
3295         if (!event)
3296                 goto out;
3297         entry = ring_buffer_event_data(event);
3298         entry->ip                       = ip;
3299         entry->fmt                      = fmt;
3300
3301         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3302         if (!call_filter_check_discard(call, entry, buffer, event)) {
3303                 __buffer_unlock_commit(buffer, event);
3304                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3305         }
3306
3307 out:
3308         ring_buffer_nest_end(buffer);
3309 out_put:
3310         put_trace_buf();
3311
3312 out_nobuffer:
3313         preempt_enable_notrace();
3314         unpause_graph_tracing();
3315
3316         return len;
3317 }
3318 EXPORT_SYMBOL_GPL(trace_vbprintk);
3319
3320 __printf(3, 0)
3321 static int
3322 __trace_array_vprintk(struct trace_buffer *buffer,
3323                       unsigned long ip, const char *fmt, va_list args)
3324 {
3325         struct trace_event_call *call = &event_print;
3326         struct ring_buffer_event *event;
3327         int len = 0, size;
3328         struct print_entry *entry;
3329         unsigned int trace_ctx;
3330         char *tbuffer;
3331
3332         if (tracing_disabled || tracing_selftest_running)
3333                 return 0;
3334
3335         /* Don't pollute graph traces with trace_vprintk internals */
3336         pause_graph_tracing();
3337
3338         trace_ctx = tracing_gen_ctx();
3339         preempt_disable_notrace();
3340
3341
3342         tbuffer = get_trace_buf();
3343         if (!tbuffer) {
3344                 len = 0;
3345                 goto out_nobuffer;
3346         }
3347
3348         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3349
3350         size = sizeof(*entry) + len + 1;
3351         ring_buffer_nest_start(buffer);
3352         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3353                                             trace_ctx);
3354         if (!event)
3355                 goto out;
3356         entry = ring_buffer_event_data(event);
3357         entry->ip = ip;
3358
3359         memcpy(&entry->buf, tbuffer, len + 1);
3360         if (!call_filter_check_discard(call, entry, buffer, event)) {
3361                 __buffer_unlock_commit(buffer, event);
3362                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3363         }
3364
3365 out:
3366         ring_buffer_nest_end(buffer);
3367         put_trace_buf();
3368
3369 out_nobuffer:
3370         preempt_enable_notrace();
3371         unpause_graph_tracing();
3372
3373         return len;
3374 }
3375
3376 __printf(3, 0)
3377 int trace_array_vprintk(struct trace_array *tr,
3378                         unsigned long ip, const char *fmt, va_list args)
3379 {
3380         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3381 }
3382
3383 /**
3384  * trace_array_printk - Print a message to a specific instance
3385  * @tr: The instance trace_array descriptor
3386  * @ip: The instruction pointer that this is called from.
3387  * @fmt: The format to print (printf format)
3388  *
3389  * If a subsystem sets up its own instance, they have the right to
3390  * printk strings into their tracing instance buffer using this
3391  * function. Note, this function will not write into the top level
3392  * buffer (use trace_printk() for that), as writing into the top level
3393  * buffer should only have events that can be individually disabled.
3394  * trace_printk() is only used for debugging a kernel, and should not
3395  * be ever incorporated in normal use.
3396  *
3397  * trace_array_printk() can be used, as it will not add noise to the
3398  * top level tracing buffer.
3399  *
3400  * Note, trace_array_init_printk() must be called on @tr before this
3401  * can be used.
3402  */
3403 __printf(3, 0)
3404 int trace_array_printk(struct trace_array *tr,
3405                        unsigned long ip, const char *fmt, ...)
3406 {
3407         int ret;
3408         va_list ap;
3409
3410         if (!tr)
3411                 return -ENOENT;
3412
3413         /* This is only allowed for created instances */
3414         if (tr == &global_trace)
3415                 return 0;
3416
3417         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3418                 return 0;
3419
3420         va_start(ap, fmt);
3421         ret = trace_array_vprintk(tr, ip, fmt, ap);
3422         va_end(ap);
3423         return ret;
3424 }
3425 EXPORT_SYMBOL_GPL(trace_array_printk);
3426
3427 /**
3428  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3429  * @tr: The trace array to initialize the buffers for
3430  *
3431  * As trace_array_printk() only writes into instances, they are OK to
3432  * have in the kernel (unlike trace_printk()). This needs to be called
3433  * before trace_array_printk() can be used on a trace_array.
3434  */
3435 int trace_array_init_printk(struct trace_array *tr)
3436 {
3437         if (!tr)
3438                 return -ENOENT;
3439
3440         /* This is only allowed for created instances */
3441         if (tr == &global_trace)
3442                 return -EINVAL;
3443
3444         return alloc_percpu_trace_buffer();
3445 }
3446 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3447
3448 __printf(3, 4)
3449 int trace_array_printk_buf(struct trace_buffer *buffer,
3450                            unsigned long ip, const char *fmt, ...)
3451 {
3452         int ret;
3453         va_list ap;
3454
3455         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3456                 return 0;
3457
3458         va_start(ap, fmt);
3459         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3460         va_end(ap);
3461         return ret;
3462 }
3463
3464 __printf(2, 0)
3465 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3466 {
3467         return trace_array_vprintk(&global_trace, ip, fmt, args);
3468 }
3469 EXPORT_SYMBOL_GPL(trace_vprintk);
3470
3471 static void trace_iterator_increment(struct trace_iterator *iter)
3472 {
3473         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3474
3475         iter->idx++;
3476         if (buf_iter)
3477                 ring_buffer_iter_advance(buf_iter);
3478 }
3479
3480 static struct trace_entry *
3481 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3482                 unsigned long *lost_events)
3483 {
3484         struct ring_buffer_event *event;
3485         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3486
3487         if (buf_iter) {
3488                 event = ring_buffer_iter_peek(buf_iter, ts);
3489                 if (lost_events)
3490                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3491                                 (unsigned long)-1 : 0;
3492         } else {
3493                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3494                                          lost_events);
3495         }
3496
3497         if (event) {
3498                 iter->ent_size = ring_buffer_event_length(event);
3499                 return ring_buffer_event_data(event);
3500         }
3501         iter->ent_size = 0;
3502         return NULL;
3503 }
3504
3505 static struct trace_entry *
3506 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3507                   unsigned long *missing_events, u64 *ent_ts)
3508 {
3509         struct trace_buffer *buffer = iter->array_buffer->buffer;
3510         struct trace_entry *ent, *next = NULL;
3511         unsigned long lost_events = 0, next_lost = 0;
3512         int cpu_file = iter->cpu_file;
3513         u64 next_ts = 0, ts;
3514         int next_cpu = -1;
3515         int next_size = 0;
3516         int cpu;
3517
3518         /*
3519          * If we are in a per_cpu trace file, don't bother by iterating over
3520          * all cpu and peek directly.
3521          */
3522         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3523                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3524                         return NULL;
3525                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3526                 if (ent_cpu)
3527                         *ent_cpu = cpu_file;
3528
3529                 return ent;
3530         }
3531
3532         for_each_tracing_cpu(cpu) {
3533
3534                 if (ring_buffer_empty_cpu(buffer, cpu))
3535                         continue;
3536
3537                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3538
3539                 /*
3540                  * Pick the entry with the smallest timestamp:
3541                  */
3542                 if (ent && (!next || ts < next_ts)) {
3543                         next = ent;
3544                         next_cpu = cpu;
3545                         next_ts = ts;
3546                         next_lost = lost_events;
3547                         next_size = iter->ent_size;
3548                 }
3549         }
3550
3551         iter->ent_size = next_size;
3552
3553         if (ent_cpu)
3554                 *ent_cpu = next_cpu;
3555
3556         if (ent_ts)
3557                 *ent_ts = next_ts;
3558
3559         if (missing_events)
3560                 *missing_events = next_lost;
3561
3562         return next;
3563 }
3564
3565 #define STATIC_FMT_BUF_SIZE     128
3566 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3567
3568 static char *trace_iter_expand_format(struct trace_iterator *iter)
3569 {
3570         char *tmp;
3571
3572         /*
3573          * iter->tr is NULL when used with tp_printk, which makes
3574          * this get called where it is not safe to call krealloc().
3575          */
3576         if (!iter->tr || iter->fmt == static_fmt_buf)
3577                 return NULL;
3578
3579         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3580                        GFP_KERNEL);
3581         if (tmp) {
3582                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3583                 iter->fmt = tmp;
3584         }
3585
3586         return tmp;
3587 }
3588
3589 /* Returns true if the string is safe to dereference from an event */
3590 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3591 {
3592         unsigned long addr = (unsigned long)str;
3593         struct trace_event *trace_event;
3594         struct trace_event_call *event;
3595
3596         /* OK if part of the event data */
3597         if ((addr >= (unsigned long)iter->ent) &&
3598             (addr < (unsigned long)iter->ent + iter->ent_size))
3599                 return true;
3600
3601         /* OK if part of the temp seq buffer */
3602         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3603             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3604                 return true;
3605
3606         /* Core rodata can not be freed */
3607         if (is_kernel_rodata(addr))
3608                 return true;
3609
3610         if (trace_is_tracepoint_string(str))
3611                 return true;
3612
3613         /*
3614          * Now this could be a module event, referencing core module
3615          * data, which is OK.
3616          */
3617         if (!iter->ent)
3618                 return false;
3619
3620         trace_event = ftrace_find_event(iter->ent->type);
3621         if (!trace_event)
3622                 return false;
3623
3624         event = container_of(trace_event, struct trace_event_call, event);
3625         if (!event->mod)
3626                 return false;
3627
3628         /* Would rather have rodata, but this will suffice */
3629         if (within_module_core(addr, event->mod))
3630                 return true;
3631
3632         return false;
3633 }
3634
3635 static const char *show_buffer(struct trace_seq *s)
3636 {
3637         struct seq_buf *seq = &s->seq;
3638
3639         seq_buf_terminate(seq);
3640
3641         return seq->buffer;
3642 }
3643
3644 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3645
3646 static int test_can_verify_check(const char *fmt, ...)
3647 {
3648         char buf[16];
3649         va_list ap;
3650         int ret;
3651
3652         /*
3653          * The verifier is dependent on vsnprintf() modifies the va_list
3654          * passed to it, where it is sent as a reference. Some architectures
3655          * (like x86_32) passes it by value, which means that vsnprintf()
3656          * does not modify the va_list passed to it, and the verifier
3657          * would then need to be able to understand all the values that
3658          * vsnprintf can use. If it is passed by value, then the verifier
3659          * is disabled.
3660          */
3661         va_start(ap, fmt);
3662         vsnprintf(buf, 16, "%d", ap);
3663         ret = va_arg(ap, int);
3664         va_end(ap);
3665
3666         return ret;
3667 }
3668
3669 static void test_can_verify(void)
3670 {
3671         if (!test_can_verify_check("%d %d", 0, 1)) {
3672                 pr_info("trace event string verifier disabled\n");
3673                 static_branch_inc(&trace_no_verify);
3674         }
3675 }
3676
3677 /**
3678  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3679  * @iter: The iterator that holds the seq buffer and the event being printed
3680  * @fmt: The format used to print the event
3681  * @ap: The va_list holding the data to print from @fmt.
3682  *
3683  * This writes the data into the @iter->seq buffer using the data from
3684  * @fmt and @ap. If the format has a %s, then the source of the string
3685  * is examined to make sure it is safe to print, otherwise it will
3686  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3687  * pointer.
3688  */
3689 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3690                          va_list ap)
3691 {
3692         const char *p = fmt;
3693         const char *str;
3694         int i, j;
3695
3696         if (WARN_ON_ONCE(!fmt))
3697                 return;
3698
3699         if (static_branch_unlikely(&trace_no_verify))
3700                 goto print;
3701
3702         /* Don't bother checking when doing a ftrace_dump() */
3703         if (iter->fmt == static_fmt_buf)
3704                 goto print;
3705
3706         while (*p) {
3707                 bool star = false;
3708                 int len = 0;
3709
3710                 j = 0;
3711
3712                 /* We only care about %s and variants */
3713                 for (i = 0; p[i]; i++) {
3714                         if (i + 1 >= iter->fmt_size) {
3715                                 /*
3716                                  * If we can't expand the copy buffer,
3717                                  * just print it.
3718                                  */
3719                                 if (!trace_iter_expand_format(iter))
3720                                         goto print;
3721                         }
3722
3723                         if (p[i] == '\\' && p[i+1]) {
3724                                 i++;
3725                                 continue;
3726                         }
3727                         if (p[i] == '%') {
3728                                 /* Need to test cases like %08.*s */
3729                                 for (j = 1; p[i+j]; j++) {
3730                                         if (isdigit(p[i+j]) ||
3731                                             p[i+j] == '.')
3732                                                 continue;
3733                                         if (p[i+j] == '*') {
3734                                                 star = true;
3735                                                 continue;
3736                                         }
3737                                         break;
3738                                 }
3739                                 if (p[i+j] == 's')
3740                                         break;
3741                                 star = false;
3742                         }
3743                         j = 0;
3744                 }
3745                 /* If no %s found then just print normally */
3746                 if (!p[i])
3747                         break;
3748
3749                 /* Copy up to the %s, and print that */
3750                 strncpy(iter->fmt, p, i);
3751                 iter->fmt[i] = '\0';
3752                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3753
3754                 if (star)
3755                         len = va_arg(ap, int);
3756
3757                 /* The ap now points to the string data of the %s */
3758                 str = va_arg(ap, const char *);
3759
3760                 /*
3761                  * If you hit this warning, it is likely that the
3762                  * trace event in question used %s on a string that
3763                  * was saved at the time of the event, but may not be
3764                  * around when the trace is read. Use __string(),
3765                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3766                  * instead. See samples/trace_events/trace-events-sample.h
3767                  * for reference.
3768                  */
3769                 if (WARN_ONCE(!trace_safe_str(iter, str),
3770                               "fmt: '%s' current_buffer: '%s'",
3771                               fmt, show_buffer(&iter->seq))) {
3772                         int ret;
3773
3774                         /* Try to safely read the string */
3775                         if (star) {
3776                                 if (len + 1 > iter->fmt_size)
3777                                         len = iter->fmt_size - 1;
3778                                 if (len < 0)
3779                                         len = 0;
3780                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3781                                 iter->fmt[len] = 0;
3782                                 star = false;
3783                         } else {
3784                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3785                                                                   iter->fmt_size);
3786                         }
3787                         if (ret < 0)
3788                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3789                         else
3790                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3791                                                  str, iter->fmt);
3792                         str = "[UNSAFE-MEMORY]";
3793                         strcpy(iter->fmt, "%s");
3794                 } else {
3795                         strncpy(iter->fmt, p + i, j + 1);
3796                         iter->fmt[j+1] = '\0';
3797                 }
3798                 if (star)
3799                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3800                 else
3801                         trace_seq_printf(&iter->seq, iter->fmt, str);
3802
3803                 p += i + j + 1;
3804         }
3805  print:
3806         if (*p)
3807                 trace_seq_vprintf(&iter->seq, p, ap);
3808 }
3809
3810 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3811 {
3812         const char *p, *new_fmt;
3813         char *q;
3814
3815         if (WARN_ON_ONCE(!fmt))
3816                 return fmt;
3817
3818         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3819                 return fmt;
3820
3821         p = fmt;
3822         new_fmt = q = iter->fmt;
3823         while (*p) {
3824                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3825                         if (!trace_iter_expand_format(iter))
3826                                 return fmt;
3827
3828                         q += iter->fmt - new_fmt;
3829                         new_fmt = iter->fmt;
3830                 }
3831
3832                 *q++ = *p++;
3833
3834                 /* Replace %p with %px */
3835                 if (p[-1] == '%') {
3836                         if (p[0] == '%') {
3837                                 *q++ = *p++;
3838                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3839                                 *q++ = *p++;
3840                                 *q++ = 'x';
3841                         }
3842                 }
3843         }
3844         *q = '\0';
3845
3846         return new_fmt;
3847 }
3848
3849 #define STATIC_TEMP_BUF_SIZE    128
3850 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3851
3852 /* Find the next real entry, without updating the iterator itself */
3853 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3854                                           int *ent_cpu, u64 *ent_ts)
3855 {
3856         /* __find_next_entry will reset ent_size */
3857         int ent_size = iter->ent_size;
3858         struct trace_entry *entry;
3859
3860         /*
3861          * If called from ftrace_dump(), then the iter->temp buffer
3862          * will be the static_temp_buf and not created from kmalloc.
3863          * If the entry size is greater than the buffer, we can
3864          * not save it. Just return NULL in that case. This is only
3865          * used to add markers when two consecutive events' time
3866          * stamps have a large delta. See trace_print_lat_context()
3867          */
3868         if (iter->temp == static_temp_buf &&
3869             STATIC_TEMP_BUF_SIZE < ent_size)
3870                 return NULL;
3871
3872         /*
3873          * The __find_next_entry() may call peek_next_entry(), which may
3874          * call ring_buffer_peek() that may make the contents of iter->ent
3875          * undefined. Need to copy iter->ent now.
3876          */
3877         if (iter->ent && iter->ent != iter->temp) {
3878                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3879                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3880                         void *temp;
3881                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3882                         if (!temp)
3883                                 return NULL;
3884                         kfree(iter->temp);
3885                         iter->temp = temp;
3886                         iter->temp_size = iter->ent_size;
3887                 }
3888                 memcpy(iter->temp, iter->ent, iter->ent_size);
3889                 iter->ent = iter->temp;
3890         }
3891         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3892         /* Put back the original ent_size */
3893         iter->ent_size = ent_size;
3894
3895         return entry;
3896 }
3897
3898 /* Find the next real entry, and increment the iterator to the next entry */
3899 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3900 {
3901         iter->ent = __find_next_entry(iter, &iter->cpu,
3902                                       &iter->lost_events, &iter->ts);
3903
3904         if (iter->ent)
3905                 trace_iterator_increment(iter);
3906
3907         return iter->ent ? iter : NULL;
3908 }
3909
3910 static void trace_consume(struct trace_iterator *iter)
3911 {
3912         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3913                             &iter->lost_events);
3914 }
3915
3916 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3917 {
3918         struct trace_iterator *iter = m->private;
3919         int i = (int)*pos;
3920         void *ent;
3921
3922         WARN_ON_ONCE(iter->leftover);
3923
3924         (*pos)++;
3925
3926         /* can't go backwards */
3927         if (iter->idx > i)
3928                 return NULL;
3929
3930         if (iter->idx < 0)
3931                 ent = trace_find_next_entry_inc(iter);
3932         else
3933                 ent = iter;
3934
3935         while (ent && iter->idx < i)
3936                 ent = trace_find_next_entry_inc(iter);
3937
3938         iter->pos = *pos;
3939
3940         return ent;
3941 }
3942
3943 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3944 {
3945         struct ring_buffer_iter *buf_iter;
3946         unsigned long entries = 0;
3947         u64 ts;
3948
3949         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3950
3951         buf_iter = trace_buffer_iter(iter, cpu);
3952         if (!buf_iter)
3953                 return;
3954
3955         ring_buffer_iter_reset(buf_iter);
3956
3957         /*
3958          * We could have the case with the max latency tracers
3959          * that a reset never took place on a cpu. This is evident
3960          * by the timestamp being before the start of the buffer.
3961          */
3962         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3963                 if (ts >= iter->array_buffer->time_start)
3964                         break;
3965                 entries++;
3966                 ring_buffer_iter_advance(buf_iter);
3967         }
3968
3969         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3970 }
3971
3972 /*
3973  * The current tracer is copied to avoid a global locking
3974  * all around.
3975  */
3976 static void *s_start(struct seq_file *m, loff_t *pos)
3977 {
3978         struct trace_iterator *iter = m->private;
3979         struct trace_array *tr = iter->tr;
3980         int cpu_file = iter->cpu_file;
3981         void *p = NULL;
3982         loff_t l = 0;
3983         int cpu;
3984
3985         /*
3986          * copy the tracer to avoid using a global lock all around.
3987          * iter->trace is a copy of current_trace, the pointer to the
3988          * name may be used instead of a strcmp(), as iter->trace->name
3989          * will point to the same string as current_trace->name.
3990          */
3991         mutex_lock(&trace_types_lock);
3992         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3993                 *iter->trace = *tr->current_trace;
3994         mutex_unlock(&trace_types_lock);
3995
3996 #ifdef CONFIG_TRACER_MAX_TRACE
3997         if (iter->snapshot && iter->trace->use_max_tr)
3998                 return ERR_PTR(-EBUSY);
3999 #endif
4000
4001         if (!iter->snapshot)
4002                 atomic_inc(&trace_record_taskinfo_disabled);
4003
4004         if (*pos != iter->pos) {
4005                 iter->ent = NULL;
4006                 iter->cpu = 0;
4007                 iter->idx = -1;
4008
4009                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4010                         for_each_tracing_cpu(cpu)
4011                                 tracing_iter_reset(iter, cpu);
4012                 } else
4013                         tracing_iter_reset(iter, cpu_file);
4014
4015                 iter->leftover = 0;
4016                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4017                         ;
4018
4019         } else {
4020                 /*
4021                  * If we overflowed the seq_file before, then we want
4022                  * to just reuse the trace_seq buffer again.
4023                  */
4024                 if (iter->leftover)
4025                         p = iter;
4026                 else {
4027                         l = *pos - 1;
4028                         p = s_next(m, p, &l);
4029                 }
4030         }
4031
4032         trace_event_read_lock();
4033         trace_access_lock(cpu_file);
4034         return p;
4035 }
4036
4037 static void s_stop(struct seq_file *m, void *p)
4038 {
4039         struct trace_iterator *iter = m->private;
4040
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042         if (iter->snapshot && iter->trace->use_max_tr)
4043                 return;
4044 #endif
4045
4046         if (!iter->snapshot)
4047                 atomic_dec(&trace_record_taskinfo_disabled);
4048
4049         trace_access_unlock(iter->cpu_file);
4050         trace_event_read_unlock();
4051 }
4052
4053 static void
4054 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4055                       unsigned long *entries, int cpu)
4056 {
4057         unsigned long count;
4058
4059         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4060         /*
4061          * If this buffer has skipped entries, then we hold all
4062          * entries for the trace and we need to ignore the
4063          * ones before the time stamp.
4064          */
4065         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4066                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4067                 /* total is the same as the entries */
4068                 *total = count;
4069         } else
4070                 *total = count +
4071                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4072         *entries = count;
4073 }
4074
4075 static void
4076 get_total_entries(struct array_buffer *buf,
4077                   unsigned long *total, unsigned long *entries)
4078 {
4079         unsigned long t, e;
4080         int cpu;
4081
4082         *total = 0;
4083         *entries = 0;
4084
4085         for_each_tracing_cpu(cpu) {
4086                 get_total_entries_cpu(buf, &t, &e, cpu);
4087                 *total += t;
4088                 *entries += e;
4089         }
4090 }
4091
4092 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4093 {
4094         unsigned long total, entries;
4095
4096         if (!tr)
4097                 tr = &global_trace;
4098
4099         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4100
4101         return entries;
4102 }
4103
4104 unsigned long trace_total_entries(struct trace_array *tr)
4105 {
4106         unsigned long total, entries;
4107
4108         if (!tr)
4109                 tr = &global_trace;
4110
4111         get_total_entries(&tr->array_buffer, &total, &entries);
4112
4113         return entries;
4114 }
4115
4116 static void print_lat_help_header(struct seq_file *m)
4117 {
4118         seq_puts(m, "#                    _------=> CPU#            \n"
4119                     "#                   / _-----=> irqs-off        \n"
4120                     "#                  | / _----=> need-resched    \n"
4121                     "#                  || / _---=> hardirq/softirq \n"
4122                     "#                  ||| / _--=> preempt-depth   \n"
4123                     "#                  |||| /     delay            \n"
4124                     "#  cmd     pid     ||||| time  |   caller      \n"
4125                     "#     \\   /        |||||  \\    |   /         \n");
4126 }
4127
4128 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4129 {
4130         unsigned long total;
4131         unsigned long entries;
4132
4133         get_total_entries(buf, &total, &entries);
4134         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4135                    entries, total, num_online_cpus());
4136         seq_puts(m, "#\n");
4137 }
4138
4139 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4140                                    unsigned int flags)
4141 {
4142         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4143
4144         print_event_info(buf, m);
4145
4146         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4147         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4148 }
4149
4150 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4151                                        unsigned int flags)
4152 {
4153         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4154         const char *space = "            ";
4155         int prec = tgid ? 12 : 2;
4156
4157         print_event_info(buf, m);
4158
4159         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4160         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4161         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4162         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4163         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4164         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4165         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4166 }
4167
4168 void
4169 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4170 {
4171         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4172         struct array_buffer *buf = iter->array_buffer;
4173         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4174         struct tracer *type = iter->trace;
4175         unsigned long entries;
4176         unsigned long total;
4177         const char *name = "preemption";
4178
4179         name = type->name;
4180
4181         get_total_entries(buf, &total, &entries);
4182
4183         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4184                    name, UTS_RELEASE);
4185         seq_puts(m, "# -----------------------------------"
4186                  "---------------------------------\n");
4187         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4188                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4189                    nsecs_to_usecs(data->saved_latency),
4190                    entries,
4191                    total,
4192                    buf->cpu,
4193 #if defined(CONFIG_PREEMPT_NONE)
4194                    "server",
4195 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4196                    "desktop",
4197 #elif defined(CONFIG_PREEMPT)
4198                    "preempt",
4199 #elif defined(CONFIG_PREEMPT_RT)
4200                    "preempt_rt",
4201 #else
4202                    "unknown",
4203 #endif
4204                    /* These are reserved for later use */
4205                    0, 0, 0, 0);
4206 #ifdef CONFIG_SMP
4207         seq_printf(m, " #P:%d)\n", num_online_cpus());
4208 #else
4209         seq_puts(m, ")\n");
4210 #endif
4211         seq_puts(m, "#    -----------------\n");
4212         seq_printf(m, "#    | task: %.16s-%d "
4213                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4214                    data->comm, data->pid,
4215                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4216                    data->policy, data->rt_priority);
4217         seq_puts(m, "#    -----------------\n");
4218
4219         if (data->critical_start) {
4220                 seq_puts(m, "#  => started at: ");
4221                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4222                 trace_print_seq(m, &iter->seq);
4223                 seq_puts(m, "\n#  => ended at:   ");
4224                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4225                 trace_print_seq(m, &iter->seq);
4226                 seq_puts(m, "\n#\n");
4227         }
4228
4229         seq_puts(m, "#\n");
4230 }
4231
4232 static void test_cpu_buff_start(struct trace_iterator *iter)
4233 {
4234         struct trace_seq *s = &iter->seq;
4235         struct trace_array *tr = iter->tr;
4236
4237         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4238                 return;
4239
4240         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4241                 return;
4242
4243         if (cpumask_available(iter->started) &&
4244             cpumask_test_cpu(iter->cpu, iter->started))
4245                 return;
4246
4247         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4248                 return;
4249
4250         if (cpumask_available(iter->started))
4251                 cpumask_set_cpu(iter->cpu, iter->started);
4252
4253         /* Don't print started cpu buffer for the first entry of the trace */
4254         if (iter->idx > 1)
4255                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4256                                 iter->cpu);
4257 }
4258
4259 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4260 {
4261         struct trace_array *tr = iter->tr;
4262         struct trace_seq *s = &iter->seq;
4263         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4264         struct trace_entry *entry;
4265         struct trace_event *event;
4266
4267         entry = iter->ent;
4268
4269         test_cpu_buff_start(iter);
4270
4271         event = ftrace_find_event(entry->type);
4272
4273         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4274                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4275                         trace_print_lat_context(iter);
4276                 else
4277                         trace_print_context(iter);
4278         }
4279
4280         if (trace_seq_has_overflowed(s))
4281                 return TRACE_TYPE_PARTIAL_LINE;
4282
4283         if (event)
4284                 return event->funcs->trace(iter, sym_flags, event);
4285
4286         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4287
4288         return trace_handle_return(s);
4289 }
4290
4291 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4292 {
4293         struct trace_array *tr = iter->tr;
4294         struct trace_seq *s = &iter->seq;
4295         struct trace_entry *entry;
4296         struct trace_event *event;
4297
4298         entry = iter->ent;
4299
4300         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4301                 trace_seq_printf(s, "%d %d %llu ",
4302                                  entry->pid, iter->cpu, iter->ts);
4303
4304         if (trace_seq_has_overflowed(s))
4305                 return TRACE_TYPE_PARTIAL_LINE;
4306
4307         event = ftrace_find_event(entry->type);
4308         if (event)
4309                 return event->funcs->raw(iter, 0, event);
4310
4311         trace_seq_printf(s, "%d ?\n", entry->type);
4312
4313         return trace_handle_return(s);
4314 }
4315
4316 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4317 {
4318         struct trace_array *tr = iter->tr;
4319         struct trace_seq *s = &iter->seq;
4320         unsigned char newline = '\n';
4321         struct trace_entry *entry;
4322         struct trace_event *event;
4323
4324         entry = iter->ent;
4325
4326         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4327                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4328                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4329                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4330                 if (trace_seq_has_overflowed(s))
4331                         return TRACE_TYPE_PARTIAL_LINE;
4332         }
4333
4334         event = ftrace_find_event(entry->type);
4335         if (event) {
4336                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4337                 if (ret != TRACE_TYPE_HANDLED)
4338                         return ret;
4339         }
4340
4341         SEQ_PUT_FIELD(s, newline);
4342
4343         return trace_handle_return(s);
4344 }
4345
4346 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4347 {
4348         struct trace_array *tr = iter->tr;
4349         struct trace_seq *s = &iter->seq;
4350         struct trace_entry *entry;
4351         struct trace_event *event;
4352
4353         entry = iter->ent;
4354
4355         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4356                 SEQ_PUT_FIELD(s, entry->pid);
4357                 SEQ_PUT_FIELD(s, iter->cpu);
4358                 SEQ_PUT_FIELD(s, iter->ts);
4359                 if (trace_seq_has_overflowed(s))
4360                         return TRACE_TYPE_PARTIAL_LINE;
4361         }
4362
4363         event = ftrace_find_event(entry->type);
4364         return event ? event->funcs->binary(iter, 0, event) :
4365                 TRACE_TYPE_HANDLED;
4366 }
4367
4368 int trace_empty(struct trace_iterator *iter)
4369 {
4370         struct ring_buffer_iter *buf_iter;
4371         int cpu;
4372
4373         /* If we are looking at one CPU buffer, only check that one */
4374         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4375                 cpu = iter->cpu_file;
4376                 buf_iter = trace_buffer_iter(iter, cpu);
4377                 if (buf_iter) {
4378                         if (!ring_buffer_iter_empty(buf_iter))
4379                                 return 0;
4380                 } else {
4381                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4382                                 return 0;
4383                 }
4384                 return 1;
4385         }
4386
4387         for_each_tracing_cpu(cpu) {
4388                 buf_iter = trace_buffer_iter(iter, cpu);
4389                 if (buf_iter) {
4390                         if (!ring_buffer_iter_empty(buf_iter))
4391                                 return 0;
4392                 } else {
4393                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4394                                 return 0;
4395                 }
4396         }
4397
4398         return 1;
4399 }
4400
4401 /*  Called with trace_event_read_lock() held. */
4402 enum print_line_t print_trace_line(struct trace_iterator *iter)
4403 {
4404         struct trace_array *tr = iter->tr;
4405         unsigned long trace_flags = tr->trace_flags;
4406         enum print_line_t ret;
4407
4408         if (iter->lost_events) {
4409                 if (iter->lost_events == (unsigned long)-1)
4410                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4411                                          iter->cpu);
4412                 else
4413                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4414                                          iter->cpu, iter->lost_events);
4415                 if (trace_seq_has_overflowed(&iter->seq))
4416                         return TRACE_TYPE_PARTIAL_LINE;
4417         }
4418
4419         if (iter->trace && iter->trace->print_line) {
4420                 ret = iter->trace->print_line(iter);
4421                 if (ret != TRACE_TYPE_UNHANDLED)
4422                         return ret;
4423         }
4424
4425         if (iter->ent->type == TRACE_BPUTS &&
4426                         trace_flags & TRACE_ITER_PRINTK &&
4427                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4428                 return trace_print_bputs_msg_only(iter);
4429
4430         if (iter->ent->type == TRACE_BPRINT &&
4431                         trace_flags & TRACE_ITER_PRINTK &&
4432                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4433                 return trace_print_bprintk_msg_only(iter);
4434
4435         if (iter->ent->type == TRACE_PRINT &&
4436                         trace_flags & TRACE_ITER_PRINTK &&
4437                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4438                 return trace_print_printk_msg_only(iter);
4439
4440         if (trace_flags & TRACE_ITER_BIN)
4441                 return print_bin_fmt(iter);
4442
4443         if (trace_flags & TRACE_ITER_HEX)
4444                 return print_hex_fmt(iter);
4445
4446         if (trace_flags & TRACE_ITER_RAW)
4447                 return print_raw_fmt(iter);
4448
4449         return print_trace_fmt(iter);
4450 }
4451
4452 void trace_latency_header(struct seq_file *m)
4453 {
4454         struct trace_iterator *iter = m->private;
4455         struct trace_array *tr = iter->tr;
4456
4457         /* print nothing if the buffers are empty */
4458         if (trace_empty(iter))
4459                 return;
4460
4461         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4462                 print_trace_header(m, iter);
4463
4464         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4465                 print_lat_help_header(m);
4466 }
4467
4468 void trace_default_header(struct seq_file *m)
4469 {
4470         struct trace_iterator *iter = m->private;
4471         struct trace_array *tr = iter->tr;
4472         unsigned long trace_flags = tr->trace_flags;
4473
4474         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4475                 return;
4476
4477         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4478                 /* print nothing if the buffers are empty */
4479                 if (trace_empty(iter))
4480                         return;
4481                 print_trace_header(m, iter);
4482                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4483                         print_lat_help_header(m);
4484         } else {
4485                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4486                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4487                                 print_func_help_header_irq(iter->array_buffer,
4488                                                            m, trace_flags);
4489                         else
4490                                 print_func_help_header(iter->array_buffer, m,
4491                                                        trace_flags);
4492                 }
4493         }
4494 }
4495
4496 static void test_ftrace_alive(struct seq_file *m)
4497 {
4498         if (!ftrace_is_dead())
4499                 return;
4500         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4501                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4502 }
4503
4504 #ifdef CONFIG_TRACER_MAX_TRACE
4505 static void show_snapshot_main_help(struct seq_file *m)
4506 {
4507         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4508                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4509                     "#                      Takes a snapshot of the main buffer.\n"
4510                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4511                     "#                      (Doesn't have to be '2' works with any number that\n"
4512                     "#                       is not a '0' or '1')\n");
4513 }
4514
4515 static void show_snapshot_percpu_help(struct seq_file *m)
4516 {
4517         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4518 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4519         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4520                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4521 #else
4522         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4523                     "#                     Must use main snapshot file to allocate.\n");
4524 #endif
4525         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4526                     "#                      (Doesn't have to be '2' works with any number that\n"
4527                     "#                       is not a '0' or '1')\n");
4528 }
4529
4530 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4531 {
4532         if (iter->tr->allocated_snapshot)
4533                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4534         else
4535                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4536
4537         seq_puts(m, "# Snapshot commands:\n");
4538         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4539                 show_snapshot_main_help(m);
4540         else
4541                 show_snapshot_percpu_help(m);
4542 }
4543 #else
4544 /* Should never be called */
4545 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4546 #endif
4547
4548 static int s_show(struct seq_file *m, void *v)
4549 {
4550         struct trace_iterator *iter = v;
4551         int ret;
4552
4553         if (iter->ent == NULL) {
4554                 if (iter->tr) {
4555                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4556                         seq_puts(m, "#\n");
4557                         test_ftrace_alive(m);
4558                 }
4559                 if (iter->snapshot && trace_empty(iter))
4560                         print_snapshot_help(m, iter);
4561                 else if (iter->trace && iter->trace->print_header)
4562                         iter->trace->print_header(m);
4563                 else
4564                         trace_default_header(m);
4565
4566         } else if (iter->leftover) {
4567                 /*
4568                  * If we filled the seq_file buffer earlier, we
4569                  * want to just show it now.
4570                  */
4571                 ret = trace_print_seq(m, &iter->seq);
4572
4573                 /* ret should this time be zero, but you never know */
4574                 iter->leftover = ret;
4575
4576         } else {
4577                 print_trace_line(iter);
4578                 ret = trace_print_seq(m, &iter->seq);
4579                 /*
4580                  * If we overflow the seq_file buffer, then it will
4581                  * ask us for this data again at start up.
4582                  * Use that instead.
4583                  *  ret is 0 if seq_file write succeeded.
4584                  *        -1 otherwise.
4585                  */
4586                 iter->leftover = ret;
4587         }
4588
4589         return 0;
4590 }
4591
4592 /*
4593  * Should be used after trace_array_get(), trace_types_lock
4594  * ensures that i_cdev was already initialized.
4595  */
4596 static inline int tracing_get_cpu(struct inode *inode)
4597 {
4598         if (inode->i_cdev) /* See trace_create_cpu_file() */
4599                 return (long)inode->i_cdev - 1;
4600         return RING_BUFFER_ALL_CPUS;
4601 }
4602
4603 static const struct seq_operations tracer_seq_ops = {
4604         .start          = s_start,
4605         .next           = s_next,
4606         .stop           = s_stop,
4607         .show           = s_show,
4608 };
4609
4610 static struct trace_iterator *
4611 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4612 {
4613         struct trace_array *tr = inode->i_private;
4614         struct trace_iterator *iter;
4615         int cpu;
4616
4617         if (tracing_disabled)
4618                 return ERR_PTR(-ENODEV);
4619
4620         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4621         if (!iter)
4622                 return ERR_PTR(-ENOMEM);
4623
4624         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4625                                     GFP_KERNEL);
4626         if (!iter->buffer_iter)
4627                 goto release;
4628
4629         /*
4630          * trace_find_next_entry() may need to save off iter->ent.
4631          * It will place it into the iter->temp buffer. As most
4632          * events are less than 128, allocate a buffer of that size.
4633          * If one is greater, then trace_find_next_entry() will
4634          * allocate a new buffer to adjust for the bigger iter->ent.
4635          * It's not critical if it fails to get allocated here.
4636          */
4637         iter->temp = kmalloc(128, GFP_KERNEL);
4638         if (iter->temp)
4639                 iter->temp_size = 128;
4640
4641         /*
4642          * trace_event_printf() may need to modify given format
4643          * string to replace %p with %px so that it shows real address
4644          * instead of hash value. However, that is only for the event
4645          * tracing, other tracer may not need. Defer the allocation
4646          * until it is needed.
4647          */
4648         iter->fmt = NULL;
4649         iter->fmt_size = 0;
4650
4651         /*
4652          * We make a copy of the current tracer to avoid concurrent
4653          * changes on it while we are reading.
4654          */
4655         mutex_lock(&trace_types_lock);
4656         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4657         if (!iter->trace)
4658                 goto fail;
4659
4660         *iter->trace = *tr->current_trace;
4661
4662         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4663                 goto fail;
4664
4665         iter->tr = tr;
4666
4667 #ifdef CONFIG_TRACER_MAX_TRACE
4668         /* Currently only the top directory has a snapshot */
4669         if (tr->current_trace->print_max || snapshot)
4670                 iter->array_buffer = &tr->max_buffer;
4671         else
4672 #endif
4673                 iter->array_buffer = &tr->array_buffer;
4674         iter->snapshot = snapshot;
4675         iter->pos = -1;
4676         iter->cpu_file = tracing_get_cpu(inode);
4677         mutex_init(&iter->mutex);
4678
4679         /* Notify the tracer early; before we stop tracing. */
4680         if (iter->trace->open)
4681                 iter->trace->open(iter);
4682
4683         /* Annotate start of buffers if we had overruns */
4684         if (ring_buffer_overruns(iter->array_buffer->buffer))
4685                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4686
4687         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4688         if (trace_clocks[tr->clock_id].in_ns)
4689                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4690
4691         /*
4692          * If pause-on-trace is enabled, then stop the trace while
4693          * dumping, unless this is the "snapshot" file
4694          */
4695         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4696                 tracing_stop_tr(tr);
4697
4698         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4699                 for_each_tracing_cpu(cpu) {
4700                         iter->buffer_iter[cpu] =
4701                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4702                                                          cpu, GFP_KERNEL);
4703                 }
4704                 ring_buffer_read_prepare_sync();
4705                 for_each_tracing_cpu(cpu) {
4706                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4707                         tracing_iter_reset(iter, cpu);
4708                 }
4709         } else {
4710                 cpu = iter->cpu_file;
4711                 iter->buffer_iter[cpu] =
4712                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4713                                                  cpu, GFP_KERNEL);
4714                 ring_buffer_read_prepare_sync();
4715                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4716                 tracing_iter_reset(iter, cpu);
4717         }
4718
4719         mutex_unlock(&trace_types_lock);
4720
4721         return iter;
4722
4723  fail:
4724         mutex_unlock(&trace_types_lock);
4725         kfree(iter->trace);
4726         kfree(iter->temp);
4727         kfree(iter->buffer_iter);
4728 release:
4729         seq_release_private(inode, file);
4730         return ERR_PTR(-ENOMEM);
4731 }
4732
4733 int tracing_open_generic(struct inode *inode, struct file *filp)
4734 {
4735         int ret;
4736
4737         ret = tracing_check_open_get_tr(NULL);
4738         if (ret)
4739                 return ret;
4740
4741         filp->private_data = inode->i_private;
4742         return 0;
4743 }
4744
4745 bool tracing_is_disabled(void)
4746 {
4747         return (tracing_disabled) ? true: false;
4748 }
4749
4750 /*
4751  * Open and update trace_array ref count.
4752  * Must have the current trace_array passed to it.
4753  */
4754 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4755 {
4756         struct trace_array *tr = inode->i_private;
4757         int ret;
4758
4759         ret = tracing_check_open_get_tr(tr);
4760         if (ret)
4761                 return ret;
4762
4763         filp->private_data = inode->i_private;
4764
4765         return 0;
4766 }
4767
4768 static int tracing_release(struct inode *inode, struct file *file)
4769 {
4770         struct trace_array *tr = inode->i_private;
4771         struct seq_file *m = file->private_data;
4772         struct trace_iterator *iter;
4773         int cpu;
4774
4775         if (!(file->f_mode & FMODE_READ)) {
4776                 trace_array_put(tr);
4777                 return 0;
4778         }
4779
4780         /* Writes do not use seq_file */
4781         iter = m->private;
4782         mutex_lock(&trace_types_lock);
4783
4784         for_each_tracing_cpu(cpu) {
4785                 if (iter->buffer_iter[cpu])
4786                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4787         }
4788
4789         if (iter->trace && iter->trace->close)
4790                 iter->trace->close(iter);
4791
4792         if (!iter->snapshot && tr->stop_count)
4793                 /* reenable tracing if it was previously enabled */
4794                 tracing_start_tr(tr);
4795
4796         __trace_array_put(tr);
4797
4798         mutex_unlock(&trace_types_lock);
4799
4800         mutex_destroy(&iter->mutex);
4801         free_cpumask_var(iter->started);
4802         kfree(iter->fmt);
4803         kfree(iter->temp);
4804         kfree(iter->trace);
4805         kfree(iter->buffer_iter);
4806         seq_release_private(inode, file);
4807
4808         return 0;
4809 }
4810
4811 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4812 {
4813         struct trace_array *tr = inode->i_private;
4814
4815         trace_array_put(tr);
4816         return 0;
4817 }
4818
4819 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4820 {
4821         struct trace_array *tr = inode->i_private;
4822
4823         trace_array_put(tr);
4824
4825         return single_release(inode, file);
4826 }
4827
4828 static int tracing_open(struct inode *inode, struct file *file)
4829 {
4830         struct trace_array *tr = inode->i_private;
4831         struct trace_iterator *iter;
4832         int ret;
4833
4834         ret = tracing_check_open_get_tr(tr);
4835         if (ret)
4836                 return ret;
4837
4838         /* If this file was open for write, then erase contents */
4839         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4840                 int cpu = tracing_get_cpu(inode);
4841                 struct array_buffer *trace_buf = &tr->array_buffer;
4842
4843 #ifdef CONFIG_TRACER_MAX_TRACE
4844                 if (tr->current_trace->print_max)
4845                         trace_buf = &tr->max_buffer;
4846 #endif
4847
4848                 if (cpu == RING_BUFFER_ALL_CPUS)
4849                         tracing_reset_online_cpus(trace_buf);
4850                 else
4851                         tracing_reset_cpu(trace_buf, cpu);
4852         }
4853
4854         if (file->f_mode & FMODE_READ) {
4855                 iter = __tracing_open(inode, file, false);
4856                 if (IS_ERR(iter))
4857                         ret = PTR_ERR(iter);
4858                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4859                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4860         }
4861
4862         if (ret < 0)
4863                 trace_array_put(tr);
4864
4865         return ret;
4866 }
4867
4868 /*
4869  * Some tracers are not suitable for instance buffers.
4870  * A tracer is always available for the global array (toplevel)
4871  * or if it explicitly states that it is.
4872  */
4873 static bool
4874 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4875 {
4876         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4877 }
4878
4879 /* Find the next tracer that this trace array may use */
4880 static struct tracer *
4881 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4882 {
4883         while (t && !trace_ok_for_array(t, tr))
4884                 t = t->next;
4885
4886         return t;
4887 }
4888
4889 static void *
4890 t_next(struct seq_file *m, void *v, loff_t *pos)
4891 {
4892         struct trace_array *tr = m->private;
4893         struct tracer *t = v;
4894
4895         (*pos)++;
4896
4897         if (t)
4898                 t = get_tracer_for_array(tr, t->next);
4899
4900         return t;
4901 }
4902
4903 static void *t_start(struct seq_file *m, loff_t *pos)
4904 {
4905         struct trace_array *tr = m->private;
4906         struct tracer *t;
4907         loff_t l = 0;
4908
4909         mutex_lock(&trace_types_lock);
4910
4911         t = get_tracer_for_array(tr, trace_types);
4912         for (; t && l < *pos; t = t_next(m, t, &l))
4913                         ;
4914
4915         return t;
4916 }
4917
4918 static void t_stop(struct seq_file *m, void *p)
4919 {
4920         mutex_unlock(&trace_types_lock);
4921 }
4922
4923 static int t_show(struct seq_file *m, void *v)
4924 {
4925         struct tracer *t = v;
4926
4927         if (!t)
4928                 return 0;
4929
4930         seq_puts(m, t->name);
4931         if (t->next)
4932                 seq_putc(m, ' ');
4933         else
4934                 seq_putc(m, '\n');
4935
4936         return 0;
4937 }
4938
4939 static const struct seq_operations show_traces_seq_ops = {
4940         .start          = t_start,
4941         .next           = t_next,
4942         .stop           = t_stop,
4943         .show           = t_show,
4944 };
4945
4946 static int show_traces_open(struct inode *inode, struct file *file)
4947 {
4948         struct trace_array *tr = inode->i_private;
4949         struct seq_file *m;
4950         int ret;
4951
4952         ret = tracing_check_open_get_tr(tr);
4953         if (ret)
4954                 return ret;
4955
4956         ret = seq_open(file, &show_traces_seq_ops);
4957         if (ret) {
4958                 trace_array_put(tr);
4959                 return ret;
4960         }
4961
4962         m = file->private_data;
4963         m->private = tr;
4964
4965         return 0;
4966 }
4967
4968 static int show_traces_release(struct inode *inode, struct file *file)
4969 {
4970         struct trace_array *tr = inode->i_private;
4971
4972         trace_array_put(tr);
4973         return seq_release(inode, file);
4974 }
4975
4976 static ssize_t
4977 tracing_write_stub(struct file *filp, const char __user *ubuf,
4978                    size_t count, loff_t *ppos)
4979 {
4980         return count;
4981 }
4982
4983 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4984 {
4985         int ret;
4986
4987         if (file->f_mode & FMODE_READ)
4988                 ret = seq_lseek(file, offset, whence);
4989         else
4990                 file->f_pos = ret = 0;
4991
4992         return ret;
4993 }
4994
4995 static const struct file_operations tracing_fops = {
4996         .open           = tracing_open,
4997         .read           = seq_read,
4998         .write          = tracing_write_stub,
4999         .llseek         = tracing_lseek,
5000         .release        = tracing_release,
5001 };
5002
5003 static const struct file_operations show_traces_fops = {
5004         .open           = show_traces_open,
5005         .read           = seq_read,
5006         .llseek         = seq_lseek,
5007         .release        = show_traces_release,
5008 };
5009
5010 static ssize_t
5011 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5012                      size_t count, loff_t *ppos)
5013 {
5014         struct trace_array *tr = file_inode(filp)->i_private;
5015         char *mask_str;
5016         int len;
5017
5018         len = snprintf(NULL, 0, "%*pb\n",
5019                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5020         mask_str = kmalloc(len, GFP_KERNEL);
5021         if (!mask_str)
5022                 return -ENOMEM;
5023
5024         len = snprintf(mask_str, len, "%*pb\n",
5025                        cpumask_pr_args(tr->tracing_cpumask));
5026         if (len >= count) {
5027                 count = -EINVAL;
5028                 goto out_err;
5029         }
5030         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5031
5032 out_err:
5033         kfree(mask_str);
5034
5035         return count;
5036 }
5037
5038 int tracing_set_cpumask(struct trace_array *tr,
5039                         cpumask_var_t tracing_cpumask_new)
5040 {
5041         int cpu;
5042
5043         if (!tr)
5044                 return -EINVAL;
5045
5046         local_irq_disable();
5047         arch_spin_lock(&tr->max_lock);
5048         for_each_tracing_cpu(cpu) {
5049                 /*
5050                  * Increase/decrease the disabled counter if we are
5051                  * about to flip a bit in the cpumask:
5052                  */
5053                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5054                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5055                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5056                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5057                 }
5058                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5059                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5060                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5061                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5062                 }
5063         }
5064         arch_spin_unlock(&tr->max_lock);
5065         local_irq_enable();
5066
5067         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5068
5069         return 0;
5070 }
5071
5072 static ssize_t
5073 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5074                       size_t count, loff_t *ppos)
5075 {
5076         struct trace_array *tr = file_inode(filp)->i_private;
5077         cpumask_var_t tracing_cpumask_new;
5078         int err;
5079
5080         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5081                 return -ENOMEM;
5082
5083         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5084         if (err)
5085                 goto err_free;
5086
5087         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5088         if (err)
5089                 goto err_free;
5090
5091         free_cpumask_var(tracing_cpumask_new);
5092
5093         return count;
5094
5095 err_free:
5096         free_cpumask_var(tracing_cpumask_new);
5097
5098         return err;
5099 }
5100
5101 static const struct file_operations tracing_cpumask_fops = {
5102         .open           = tracing_open_generic_tr,
5103         .read           = tracing_cpumask_read,
5104         .write          = tracing_cpumask_write,
5105         .release        = tracing_release_generic_tr,
5106         .llseek         = generic_file_llseek,
5107 };
5108
5109 static int tracing_trace_options_show(struct seq_file *m, void *v)
5110 {
5111         struct tracer_opt *trace_opts;
5112         struct trace_array *tr = m->private;
5113         u32 tracer_flags;
5114         int i;
5115
5116         mutex_lock(&trace_types_lock);
5117         tracer_flags = tr->current_trace->flags->val;
5118         trace_opts = tr->current_trace->flags->opts;
5119
5120         for (i = 0; trace_options[i]; i++) {
5121                 if (tr->trace_flags & (1 << i))
5122                         seq_printf(m, "%s\n", trace_options[i]);
5123                 else
5124                         seq_printf(m, "no%s\n", trace_options[i]);
5125         }
5126
5127         for (i = 0; trace_opts[i].name; i++) {
5128                 if (tracer_flags & trace_opts[i].bit)
5129                         seq_printf(m, "%s\n", trace_opts[i].name);
5130                 else
5131                         seq_printf(m, "no%s\n", trace_opts[i].name);
5132         }
5133         mutex_unlock(&trace_types_lock);
5134
5135         return 0;
5136 }
5137
5138 static int __set_tracer_option(struct trace_array *tr,
5139                                struct tracer_flags *tracer_flags,
5140                                struct tracer_opt *opts, int neg)
5141 {
5142         struct tracer *trace = tracer_flags->trace;
5143         int ret;
5144
5145         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5146         if (ret)
5147                 return ret;
5148
5149         if (neg)
5150                 tracer_flags->val &= ~opts->bit;
5151         else
5152                 tracer_flags->val |= opts->bit;
5153         return 0;
5154 }
5155
5156 /* Try to assign a tracer specific option */
5157 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5158 {
5159         struct tracer *trace = tr->current_trace;
5160         struct tracer_flags *tracer_flags = trace->flags;
5161         struct tracer_opt *opts = NULL;
5162         int i;
5163
5164         for (i = 0; tracer_flags->opts[i].name; i++) {
5165                 opts = &tracer_flags->opts[i];
5166
5167                 if (strcmp(cmp, opts->name) == 0)
5168                         return __set_tracer_option(tr, trace->flags, opts, neg);
5169         }
5170
5171         return -EINVAL;
5172 }
5173
5174 /* Some tracers require overwrite to stay enabled */
5175 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5176 {
5177         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5178                 return -1;
5179
5180         return 0;
5181 }
5182
5183 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5184 {
5185         if ((mask == TRACE_ITER_RECORD_TGID) ||
5186             (mask == TRACE_ITER_RECORD_CMD))
5187                 lockdep_assert_held(&event_mutex);
5188
5189         /* do nothing if flag is already set */
5190         if (!!(tr->trace_flags & mask) == !!enabled)
5191                 return 0;
5192
5193         /* Give the tracer a chance to approve the change */
5194         if (tr->current_trace->flag_changed)
5195                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5196                         return -EINVAL;
5197
5198         if (enabled)
5199                 tr->trace_flags |= mask;
5200         else
5201                 tr->trace_flags &= ~mask;
5202
5203         if (mask == TRACE_ITER_RECORD_CMD)
5204                 trace_event_enable_cmd_record(enabled);
5205
5206         if (mask == TRACE_ITER_RECORD_TGID) {
5207                 if (!tgid_map)
5208                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5209                                            sizeof(*tgid_map),
5210                                            GFP_KERNEL);
5211                 if (!tgid_map) {
5212                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5213                         return -ENOMEM;
5214                 }
5215
5216                 trace_event_enable_tgid_record(enabled);
5217         }
5218
5219         if (mask == TRACE_ITER_EVENT_FORK)
5220                 trace_event_follow_fork(tr, enabled);
5221
5222         if (mask == TRACE_ITER_FUNC_FORK)
5223                 ftrace_pid_follow_fork(tr, enabled);
5224
5225         if (mask == TRACE_ITER_OVERWRITE) {
5226                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5227 #ifdef CONFIG_TRACER_MAX_TRACE
5228                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5229 #endif
5230         }
5231
5232         if (mask == TRACE_ITER_PRINTK) {
5233                 trace_printk_start_stop_comm(enabled);
5234                 trace_printk_control(enabled);
5235         }
5236
5237         return 0;
5238 }
5239
5240 int trace_set_options(struct trace_array *tr, char *option)
5241 {
5242         char *cmp;
5243         int neg = 0;
5244         int ret;
5245         size_t orig_len = strlen(option);
5246         int len;
5247
5248         cmp = strstrip(option);
5249
5250         len = str_has_prefix(cmp, "no");
5251         if (len)
5252                 neg = 1;
5253
5254         cmp += len;
5255
5256         mutex_lock(&event_mutex);
5257         mutex_lock(&trace_types_lock);
5258
5259         ret = match_string(trace_options, -1, cmp);
5260         /* If no option could be set, test the specific tracer options */
5261         if (ret < 0)
5262                 ret = set_tracer_option(tr, cmp, neg);
5263         else
5264                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5265
5266         mutex_unlock(&trace_types_lock);
5267         mutex_unlock(&event_mutex);
5268
5269         /*
5270          * If the first trailing whitespace is replaced with '\0' by strstrip,
5271          * turn it back into a space.
5272          */
5273         if (orig_len > strlen(option))
5274                 option[strlen(option)] = ' ';
5275
5276         return ret;
5277 }
5278
5279 static void __init apply_trace_boot_options(void)
5280 {
5281         char *buf = trace_boot_options_buf;
5282         char *option;
5283
5284         while (true) {
5285                 option = strsep(&buf, ",");
5286
5287                 if (!option)
5288                         break;
5289
5290                 if (*option)
5291                         trace_set_options(&global_trace, option);
5292
5293                 /* Put back the comma to allow this to be called again */
5294                 if (buf)
5295                         *(buf - 1) = ',';
5296         }
5297 }
5298
5299 static ssize_t
5300 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5301                         size_t cnt, loff_t *ppos)
5302 {
5303         struct seq_file *m = filp->private_data;
5304         struct trace_array *tr = m->private;
5305         char buf[64];
5306         int ret;
5307
5308         if (cnt >= sizeof(buf))
5309                 return -EINVAL;
5310
5311         if (copy_from_user(buf, ubuf, cnt))
5312                 return -EFAULT;
5313
5314         buf[cnt] = 0;
5315
5316         ret = trace_set_options(tr, buf);
5317         if (ret < 0)
5318                 return ret;
5319
5320         *ppos += cnt;
5321
5322         return cnt;
5323 }
5324
5325 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5326 {
5327         struct trace_array *tr = inode->i_private;
5328         int ret;
5329
5330         ret = tracing_check_open_get_tr(tr);
5331         if (ret)
5332                 return ret;
5333
5334         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5335         if (ret < 0)
5336                 trace_array_put(tr);
5337
5338         return ret;
5339 }
5340
5341 static const struct file_operations tracing_iter_fops = {
5342         .open           = tracing_trace_options_open,
5343         .read           = seq_read,
5344         .llseek         = seq_lseek,
5345         .release        = tracing_single_release_tr,
5346         .write          = tracing_trace_options_write,
5347 };
5348
5349 static const char readme_msg[] =
5350         "tracing mini-HOWTO:\n\n"
5351         "# echo 0 > tracing_on : quick way to disable tracing\n"
5352         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5353         " Important files:\n"
5354         "  trace\t\t\t- The static contents of the buffer\n"
5355         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5356         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5357         "  current_tracer\t- function and latency tracers\n"
5358         "  available_tracers\t- list of configured tracers for current_tracer\n"
5359         "  error_log\t- error log for failed commands (that support it)\n"
5360         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5361         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5362         "  trace_clock\t\t-change the clock used to order events\n"
5363         "       local:   Per cpu clock but may not be synced across CPUs\n"
5364         "      global:   Synced across CPUs but slows tracing down.\n"
5365         "     counter:   Not a clock, but just an increment\n"
5366         "      uptime:   Jiffy counter from time of boot\n"
5367         "        perf:   Same clock that perf events use\n"
5368 #ifdef CONFIG_X86_64
5369         "     x86-tsc:   TSC cycle counter\n"
5370 #endif
5371         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5372         "       delta:   Delta difference against a buffer-wide timestamp\n"
5373         "    absolute:   Absolute (standalone) timestamp\n"
5374         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5375         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5376         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5377         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5378         "\t\t\t  Remove sub-buffer with rmdir\n"
5379         "  trace_options\t\t- Set format or modify how tracing happens\n"
5380         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5381         "\t\t\t  option name\n"
5382         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5383 #ifdef CONFIG_DYNAMIC_FTRACE
5384         "\n  available_filter_functions - list of functions that can be filtered on\n"
5385         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5386         "\t\t\t  functions\n"
5387         "\t     accepts: func_full_name or glob-matching-pattern\n"
5388         "\t     modules: Can select a group via module\n"
5389         "\t      Format: :mod:<module-name>\n"
5390         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5391         "\t    triggers: a command to perform when function is hit\n"
5392         "\t      Format: <function>:<trigger>[:count]\n"
5393         "\t     trigger: traceon, traceoff\n"
5394         "\t\t      enable_event:<system>:<event>\n"
5395         "\t\t      disable_event:<system>:<event>\n"
5396 #ifdef CONFIG_STACKTRACE
5397         "\t\t      stacktrace\n"
5398 #endif
5399 #ifdef CONFIG_TRACER_SNAPSHOT
5400         "\t\t      snapshot\n"
5401 #endif
5402         "\t\t      dump\n"
5403         "\t\t      cpudump\n"
5404         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5405         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5406         "\t     The first one will disable tracing every time do_fault is hit\n"
5407         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5408         "\t       The first time do trap is hit and it disables tracing, the\n"
5409         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5410         "\t       the counter will not decrement. It only decrements when the\n"
5411         "\t       trigger did work\n"
5412         "\t     To remove trigger without count:\n"
5413         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5414         "\t     To remove trigger with a count:\n"
5415         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5416         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5417         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5418         "\t    modules: Can select a group via module command :mod:\n"
5419         "\t    Does not accept triggers\n"
5420 #endif /* CONFIG_DYNAMIC_FTRACE */
5421 #ifdef CONFIG_FUNCTION_TRACER
5422         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5423         "\t\t    (function)\n"
5424         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5425         "\t\t    (function)\n"
5426 #endif
5427 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5428         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5429         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5430         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5431 #endif
5432 #ifdef CONFIG_TRACER_SNAPSHOT
5433         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5434         "\t\t\t  snapshot buffer. Read the contents for more\n"
5435         "\t\t\t  information\n"
5436 #endif
5437 #ifdef CONFIG_STACK_TRACER
5438         "  stack_trace\t\t- Shows the max stack trace when active\n"
5439         "  stack_max_size\t- Shows current max stack size that was traced\n"
5440         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5441         "\t\t\t  new trace)\n"
5442 #ifdef CONFIG_DYNAMIC_FTRACE
5443         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5444         "\t\t\t  traces\n"
5445 #endif
5446 #endif /* CONFIG_STACK_TRACER */
5447 #ifdef CONFIG_DYNAMIC_EVENTS
5448         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5449         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5450 #endif
5451 #ifdef CONFIG_KPROBE_EVENTS
5452         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5453         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5454 #endif
5455 #ifdef CONFIG_UPROBE_EVENTS
5456         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5457         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5458 #endif
5459 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5460         "\t  accepts: event-definitions (one definition per line)\n"
5461         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5462         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5463 #ifdef CONFIG_HIST_TRIGGERS
5464         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5465 #endif
5466         "\t           -:[<group>/]<event>\n"
5467 #ifdef CONFIG_KPROBE_EVENTS
5468         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5469   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5470 #endif
5471 #ifdef CONFIG_UPROBE_EVENTS
5472   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5473 #endif
5474         "\t     args: <name>=fetcharg[:type]\n"
5475         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5476 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5477         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5478 #else
5479         "\t           $stack<index>, $stack, $retval, $comm,\n"
5480 #endif
5481         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5482         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5483         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5484         "\t           <type>\\[<array-size>\\]\n"
5485 #ifdef CONFIG_HIST_TRIGGERS
5486         "\t    field: <stype> <name>;\n"
5487         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5488         "\t           [unsigned] char/int/long\n"
5489 #endif
5490 #endif
5491         "  events/\t\t- Directory containing all trace event subsystems:\n"
5492         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5493         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5494         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5495         "\t\t\t  events\n"
5496         "      filter\t\t- If set, only events passing filter are traced\n"
5497         "  events/<system>/<event>/\t- Directory containing control files for\n"
5498         "\t\t\t  <event>:\n"
5499         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5500         "      filter\t\t- If set, only events passing filter are traced\n"
5501         "      trigger\t\t- If set, a command to perform when event is hit\n"
5502         "\t    Format: <trigger>[:count][if <filter>]\n"
5503         "\t   trigger: traceon, traceoff\n"
5504         "\t            enable_event:<system>:<event>\n"
5505         "\t            disable_event:<system>:<event>\n"
5506 #ifdef CONFIG_HIST_TRIGGERS
5507         "\t            enable_hist:<system>:<event>\n"
5508         "\t            disable_hist:<system>:<event>\n"
5509 #endif
5510 #ifdef CONFIG_STACKTRACE
5511         "\t\t    stacktrace\n"
5512 #endif
5513 #ifdef CONFIG_TRACER_SNAPSHOT
5514         "\t\t    snapshot\n"
5515 #endif
5516 #ifdef CONFIG_HIST_TRIGGERS
5517         "\t\t    hist (see below)\n"
5518 #endif
5519         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5520         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5521         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5522         "\t                  events/block/block_unplug/trigger\n"
5523         "\t   The first disables tracing every time block_unplug is hit.\n"
5524         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5525         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5526         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5527         "\t   Like function triggers, the counter is only decremented if it\n"
5528         "\t    enabled or disabled tracing.\n"
5529         "\t   To remove a trigger without a count:\n"
5530         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5531         "\t   To remove a trigger with a count:\n"
5532         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5533         "\t   Filters can be ignored when removing a trigger.\n"
5534 #ifdef CONFIG_HIST_TRIGGERS
5535         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5536         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5537         "\t            [:values=<field1[,field2,...]>]\n"
5538         "\t            [:sort=<field1[,field2,...]>]\n"
5539         "\t            [:size=#entries]\n"
5540         "\t            [:pause][:continue][:clear]\n"
5541         "\t            [:name=histname1]\n"
5542         "\t            [:<handler>.<action>]\n"
5543         "\t            [if <filter>]\n\n"
5544         "\t    When a matching event is hit, an entry is added to a hash\n"
5545         "\t    table using the key(s) and value(s) named, and the value of a\n"
5546         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5547         "\t    correspond to fields in the event's format description.  Keys\n"
5548         "\t    can be any field, or the special string 'stacktrace'.\n"
5549         "\t    Compound keys consisting of up to two fields can be specified\n"
5550         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5551         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5552         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5553         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5554         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5555         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5556         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5557         "\t    its histogram data will be shared with other triggers of the\n"
5558         "\t    same name, and trigger hits will update this common data.\n\n"
5559         "\t    Reading the 'hist' file for the event will dump the hash\n"
5560         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5561         "\t    triggers attached to an event, there will be a table for each\n"
5562         "\t    trigger in the output.  The table displayed for a named\n"
5563         "\t    trigger will be the same as any other instance having the\n"
5564         "\t    same name.  The default format used to display a given field\n"
5565         "\t    can be modified by appending any of the following modifiers\n"
5566         "\t    to the field name, as applicable:\n\n"
5567         "\t            .hex        display a number as a hex value\n"
5568         "\t            .sym        display an address as a symbol\n"
5569         "\t            .sym-offset display an address as a symbol and offset\n"
5570         "\t            .execname   display a common_pid as a program name\n"
5571         "\t            .syscall    display a syscall id as a syscall name\n"
5572         "\t            .log2       display log2 value rather than raw number\n"
5573         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5574         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5575         "\t    trigger or to start a hist trigger but not log any events\n"
5576         "\t    until told to do so.  'continue' can be used to start or\n"
5577         "\t    restart a paused hist trigger.\n\n"
5578         "\t    The 'clear' parameter will clear the contents of a running\n"
5579         "\t    hist trigger and leave its current paused/active state\n"
5580         "\t    unchanged.\n\n"
5581         "\t    The enable_hist and disable_hist triggers can be used to\n"
5582         "\t    have one event conditionally start and stop another event's\n"
5583         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5584         "\t    the enable_event and disable_event triggers.\n\n"
5585         "\t    Hist trigger handlers and actions are executed whenever a\n"
5586         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5587         "\t        <handler>.<action>\n\n"
5588         "\t    The available handlers are:\n\n"
5589         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5590         "\t        onmax(var)               - invoke if var exceeds current max\n"
5591         "\t        onchange(var)            - invoke action if var changes\n\n"
5592         "\t    The available actions are:\n\n"
5593         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5594         "\t        save(field,...)                      - save current event fields\n"
5595 #ifdef CONFIG_TRACER_SNAPSHOT
5596         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5597 #endif
5598 #ifdef CONFIG_SYNTH_EVENTS
5599         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5600         "\t  Write into this file to define/undefine new synthetic events.\n"
5601         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5602 #endif
5603 #endif
5604 ;
5605
5606 static ssize_t
5607 tracing_readme_read(struct file *filp, char __user *ubuf,
5608                        size_t cnt, loff_t *ppos)
5609 {
5610         return simple_read_from_buffer(ubuf, cnt, ppos,
5611                                         readme_msg, strlen(readme_msg));
5612 }
5613
5614 static const struct file_operations tracing_readme_fops = {
5615         .open           = tracing_open_generic,
5616         .read           = tracing_readme_read,
5617         .llseek         = generic_file_llseek,
5618 };
5619
5620 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5621 {
5622         int *ptr = v;
5623
5624         if (*pos || m->count)
5625                 ptr++;
5626
5627         (*pos)++;
5628
5629         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5630                 if (trace_find_tgid(*ptr))
5631                         return ptr;
5632         }
5633
5634         return NULL;
5635 }
5636
5637 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5638 {
5639         void *v;
5640         loff_t l = 0;
5641
5642         if (!tgid_map)
5643                 return NULL;
5644
5645         v = &tgid_map[0];
5646         while (l <= *pos) {
5647                 v = saved_tgids_next(m, v, &l);
5648                 if (!v)
5649                         return NULL;
5650         }
5651
5652         return v;
5653 }
5654
5655 static void saved_tgids_stop(struct seq_file *m, void *v)
5656 {
5657 }
5658
5659 static int saved_tgids_show(struct seq_file *m, void *v)
5660 {
5661         int pid = (int *)v - tgid_map;
5662
5663         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5664         return 0;
5665 }
5666
5667 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5668         .start          = saved_tgids_start,
5669         .stop           = saved_tgids_stop,
5670         .next           = saved_tgids_next,
5671         .show           = saved_tgids_show,
5672 };
5673
5674 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5675 {
5676         int ret;
5677
5678         ret = tracing_check_open_get_tr(NULL);
5679         if (ret)
5680                 return ret;
5681
5682         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5683 }
5684
5685
5686 static const struct file_operations tracing_saved_tgids_fops = {
5687         .open           = tracing_saved_tgids_open,
5688         .read           = seq_read,
5689         .llseek         = seq_lseek,
5690         .release        = seq_release,
5691 };
5692
5693 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5694 {
5695         unsigned int *ptr = v;
5696
5697         if (*pos || m->count)
5698                 ptr++;
5699
5700         (*pos)++;
5701
5702         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5703              ptr++) {
5704                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5705                         continue;
5706
5707                 return ptr;
5708         }
5709
5710         return NULL;
5711 }
5712
5713 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5714 {
5715         void *v;
5716         loff_t l = 0;
5717
5718         preempt_disable();
5719         arch_spin_lock(&trace_cmdline_lock);
5720
5721         v = &savedcmd->map_cmdline_to_pid[0];
5722         while (l <= *pos) {
5723                 v = saved_cmdlines_next(m, v, &l);
5724                 if (!v)
5725                         return NULL;
5726         }
5727
5728         return v;
5729 }
5730
5731 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5732 {
5733         arch_spin_unlock(&trace_cmdline_lock);
5734         preempt_enable();
5735 }
5736
5737 static int saved_cmdlines_show(struct seq_file *m, void *v)
5738 {
5739         char buf[TASK_COMM_LEN];
5740         unsigned int *pid = v;
5741
5742         __trace_find_cmdline(*pid, buf);
5743         seq_printf(m, "%d %s\n", *pid, buf);
5744         return 0;
5745 }
5746
5747 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5748         .start          = saved_cmdlines_start,
5749         .next           = saved_cmdlines_next,
5750         .stop           = saved_cmdlines_stop,
5751         .show           = saved_cmdlines_show,
5752 };
5753
5754 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5755 {
5756         int ret;
5757
5758         ret = tracing_check_open_get_tr(NULL);
5759         if (ret)
5760                 return ret;
5761
5762         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5763 }
5764
5765 static const struct file_operations tracing_saved_cmdlines_fops = {
5766         .open           = tracing_saved_cmdlines_open,
5767         .read           = seq_read,
5768         .llseek         = seq_lseek,
5769         .release        = seq_release,
5770 };
5771
5772 static ssize_t
5773 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5774                                  size_t cnt, loff_t *ppos)
5775 {
5776         char buf[64];
5777         int r;
5778
5779         arch_spin_lock(&trace_cmdline_lock);
5780         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5781         arch_spin_unlock(&trace_cmdline_lock);
5782
5783         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5784 }
5785
5786 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5787 {
5788         kfree(s->saved_cmdlines);
5789         kfree(s->map_cmdline_to_pid);
5790         kfree(s);
5791 }
5792
5793 static int tracing_resize_saved_cmdlines(unsigned int val)
5794 {
5795         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5796
5797         s = kmalloc(sizeof(*s), GFP_KERNEL);
5798         if (!s)
5799                 return -ENOMEM;
5800
5801         if (allocate_cmdlines_buffer(val, s) < 0) {
5802                 kfree(s);
5803                 return -ENOMEM;
5804         }
5805
5806         arch_spin_lock(&trace_cmdline_lock);
5807         savedcmd_temp = savedcmd;
5808         savedcmd = s;
5809         arch_spin_unlock(&trace_cmdline_lock);
5810         free_saved_cmdlines_buffer(savedcmd_temp);
5811
5812         return 0;
5813 }
5814
5815 static ssize_t
5816 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5817                                   size_t cnt, loff_t *ppos)
5818 {
5819         unsigned long val;
5820         int ret;
5821
5822         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5823         if (ret)
5824                 return ret;
5825
5826         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5827         if (!val || val > PID_MAX_DEFAULT)
5828                 return -EINVAL;
5829
5830         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5831         if (ret < 0)
5832                 return ret;
5833
5834         *ppos += cnt;
5835
5836         return cnt;
5837 }
5838
5839 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5840         .open           = tracing_open_generic,
5841         .read           = tracing_saved_cmdlines_size_read,
5842         .write          = tracing_saved_cmdlines_size_write,
5843 };
5844
5845 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5846 static union trace_eval_map_item *
5847 update_eval_map(union trace_eval_map_item *ptr)
5848 {
5849         if (!ptr->map.eval_string) {
5850                 if (ptr->tail.next) {
5851                         ptr = ptr->tail.next;
5852                         /* Set ptr to the next real item (skip head) */
5853                         ptr++;
5854                 } else
5855                         return NULL;
5856         }
5857         return ptr;
5858 }
5859
5860 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5861 {
5862         union trace_eval_map_item *ptr = v;
5863
5864         /*
5865          * Paranoid! If ptr points to end, we don't want to increment past it.
5866          * This really should never happen.
5867          */
5868         (*pos)++;
5869         ptr = update_eval_map(ptr);
5870         if (WARN_ON_ONCE(!ptr))
5871                 return NULL;
5872
5873         ptr++;
5874         ptr = update_eval_map(ptr);
5875
5876         return ptr;
5877 }
5878
5879 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5880 {
5881         union trace_eval_map_item *v;
5882         loff_t l = 0;
5883
5884         mutex_lock(&trace_eval_mutex);
5885
5886         v = trace_eval_maps;
5887         if (v)
5888                 v++;
5889
5890         while (v && l < *pos) {
5891                 v = eval_map_next(m, v, &l);
5892         }
5893
5894         return v;
5895 }
5896
5897 static void eval_map_stop(struct seq_file *m, void *v)
5898 {
5899         mutex_unlock(&trace_eval_mutex);
5900 }
5901
5902 static int eval_map_show(struct seq_file *m, void *v)
5903 {
5904         union trace_eval_map_item *ptr = v;
5905
5906         seq_printf(m, "%s %ld (%s)\n",
5907                    ptr->map.eval_string, ptr->map.eval_value,
5908                    ptr->map.system);
5909
5910         return 0;
5911 }
5912
5913 static const struct seq_operations tracing_eval_map_seq_ops = {
5914         .start          = eval_map_start,
5915         .next           = eval_map_next,
5916         .stop           = eval_map_stop,
5917         .show           = eval_map_show,
5918 };
5919
5920 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5921 {
5922         int ret;
5923
5924         ret = tracing_check_open_get_tr(NULL);
5925         if (ret)
5926                 return ret;
5927
5928         return seq_open(filp, &tracing_eval_map_seq_ops);
5929 }
5930
5931 static const struct file_operations tracing_eval_map_fops = {
5932         .open           = tracing_eval_map_open,
5933         .read           = seq_read,
5934         .llseek         = seq_lseek,
5935         .release        = seq_release,
5936 };
5937
5938 static inline union trace_eval_map_item *
5939 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5940 {
5941         /* Return tail of array given the head */
5942         return ptr + ptr->head.length + 1;
5943 }
5944
5945 static void
5946 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5947                            int len)
5948 {
5949         struct trace_eval_map **stop;
5950         struct trace_eval_map **map;
5951         union trace_eval_map_item *map_array;
5952         union trace_eval_map_item *ptr;
5953
5954         stop = start + len;
5955
5956         /*
5957          * The trace_eval_maps contains the map plus a head and tail item,
5958          * where the head holds the module and length of array, and the
5959          * tail holds a pointer to the next list.
5960          */
5961         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5962         if (!map_array) {
5963                 pr_warn("Unable to allocate trace eval mapping\n");
5964                 return;
5965         }
5966
5967         mutex_lock(&trace_eval_mutex);
5968
5969         if (!trace_eval_maps)
5970                 trace_eval_maps = map_array;
5971         else {
5972                 ptr = trace_eval_maps;
5973                 for (;;) {
5974                         ptr = trace_eval_jmp_to_tail(ptr);
5975                         if (!ptr->tail.next)
5976                                 break;
5977                         ptr = ptr->tail.next;
5978
5979                 }
5980                 ptr->tail.next = map_array;
5981         }
5982         map_array->head.mod = mod;
5983         map_array->head.length = len;
5984         map_array++;
5985
5986         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5987                 map_array->map = **map;
5988                 map_array++;
5989         }
5990         memset(map_array, 0, sizeof(*map_array));
5991
5992         mutex_unlock(&trace_eval_mutex);
5993 }
5994
5995 static void trace_create_eval_file(struct dentry *d_tracer)
5996 {
5997         trace_create_file("eval_map", 0444, d_tracer,
5998                           NULL, &tracing_eval_map_fops);
5999 }
6000
6001 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6002 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6003 static inline void trace_insert_eval_map_file(struct module *mod,
6004                               struct trace_eval_map **start, int len) { }
6005 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6006
6007 static void trace_insert_eval_map(struct module *mod,
6008                                   struct trace_eval_map **start, int len)
6009 {
6010         struct trace_eval_map **map;
6011
6012         if (len <= 0)
6013                 return;
6014
6015         map = start;
6016
6017         trace_event_eval_update(map, len);
6018
6019         trace_insert_eval_map_file(mod, start, len);
6020 }
6021
6022 static ssize_t
6023 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6024                        size_t cnt, loff_t *ppos)
6025 {
6026         struct trace_array *tr = filp->private_data;
6027         char buf[MAX_TRACER_SIZE+2];
6028         int r;
6029
6030         mutex_lock(&trace_types_lock);
6031         r = sprintf(buf, "%s\n", tr->current_trace->name);
6032         mutex_unlock(&trace_types_lock);
6033
6034         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6035 }
6036
6037 int tracer_init(struct tracer *t, struct trace_array *tr)
6038 {
6039         tracing_reset_online_cpus(&tr->array_buffer);
6040         return t->init(tr);
6041 }
6042
6043 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6044 {
6045         int cpu;
6046
6047         for_each_tracing_cpu(cpu)
6048                 per_cpu_ptr(buf->data, cpu)->entries = val;
6049 }
6050
6051 #ifdef CONFIG_TRACER_MAX_TRACE
6052 /* resize @tr's buffer to the size of @size_tr's entries */
6053 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6054                                         struct array_buffer *size_buf, int cpu_id)
6055 {
6056         int cpu, ret = 0;
6057
6058         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6059                 for_each_tracing_cpu(cpu) {
6060                         ret = ring_buffer_resize(trace_buf->buffer,
6061                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6062                         if (ret < 0)
6063                                 break;
6064                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6065                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6066                 }
6067         } else {
6068                 ret = ring_buffer_resize(trace_buf->buffer,
6069                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6070                 if (ret == 0)
6071                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6072                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6073         }
6074
6075         return ret;
6076 }
6077 #endif /* CONFIG_TRACER_MAX_TRACE */
6078
6079 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6080                                         unsigned long size, int cpu)
6081 {
6082         int ret;
6083
6084         /*
6085          * If kernel or user changes the size of the ring buffer
6086          * we use the size that was given, and we can forget about
6087          * expanding it later.
6088          */
6089         ring_buffer_expanded = true;
6090
6091         /* May be called before buffers are initialized */
6092         if (!tr->array_buffer.buffer)
6093                 return 0;
6094
6095         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6096         if (ret < 0)
6097                 return ret;
6098
6099 #ifdef CONFIG_TRACER_MAX_TRACE
6100         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6101             !tr->current_trace->use_max_tr)
6102                 goto out;
6103
6104         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6105         if (ret < 0) {
6106                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6107                                                      &tr->array_buffer, cpu);
6108                 if (r < 0) {
6109                         /*
6110                          * AARGH! We are left with different
6111                          * size max buffer!!!!
6112                          * The max buffer is our "snapshot" buffer.
6113                          * When a tracer needs a snapshot (one of the
6114                          * latency tracers), it swaps the max buffer
6115                          * with the saved snap shot. We succeeded to
6116                          * update the size of the main buffer, but failed to
6117                          * update the size of the max buffer. But when we tried
6118                          * to reset the main buffer to the original size, we
6119                          * failed there too. This is very unlikely to
6120                          * happen, but if it does, warn and kill all
6121                          * tracing.
6122                          */
6123                         WARN_ON(1);
6124                         tracing_disabled = 1;
6125                 }
6126                 return ret;
6127         }
6128
6129         if (cpu == RING_BUFFER_ALL_CPUS)
6130                 set_buffer_entries(&tr->max_buffer, size);
6131         else
6132                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6133
6134  out:
6135 #endif /* CONFIG_TRACER_MAX_TRACE */
6136
6137         if (cpu == RING_BUFFER_ALL_CPUS)
6138                 set_buffer_entries(&tr->array_buffer, size);
6139         else
6140                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6141
6142         return ret;
6143 }
6144
6145 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6146                                   unsigned long size, int cpu_id)
6147 {
6148         int ret = size;
6149
6150         mutex_lock(&trace_types_lock);
6151
6152         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6153                 /* make sure, this cpu is enabled in the mask */
6154                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6155                         ret = -EINVAL;
6156                         goto out;
6157                 }
6158         }
6159
6160         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6161         if (ret < 0)
6162                 ret = -ENOMEM;
6163
6164 out:
6165         mutex_unlock(&trace_types_lock);
6166
6167         return ret;
6168 }
6169
6170
6171 /**
6172  * tracing_update_buffers - used by tracing facility to expand ring buffers
6173  *
6174  * To save on memory when the tracing is never used on a system with it
6175  * configured in. The ring buffers are set to a minimum size. But once
6176  * a user starts to use the tracing facility, then they need to grow
6177  * to their default size.
6178  *
6179  * This function is to be called when a tracer is about to be used.
6180  */
6181 int tracing_update_buffers(void)
6182 {
6183         int ret = 0;
6184
6185         mutex_lock(&trace_types_lock);
6186         if (!ring_buffer_expanded)
6187                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6188                                                 RING_BUFFER_ALL_CPUS);
6189         mutex_unlock(&trace_types_lock);
6190
6191         return ret;
6192 }
6193
6194 struct trace_option_dentry;
6195
6196 static void
6197 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6198
6199 /*
6200  * Used to clear out the tracer before deletion of an instance.
6201  * Must have trace_types_lock held.
6202  */
6203 static void tracing_set_nop(struct trace_array *tr)
6204 {
6205         if (tr->current_trace == &nop_trace)
6206                 return;
6207         
6208         tr->current_trace->enabled--;
6209
6210         if (tr->current_trace->reset)
6211                 tr->current_trace->reset(tr);
6212
6213         tr->current_trace = &nop_trace;
6214 }
6215
6216 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6217 {
6218         /* Only enable if the directory has been created already. */
6219         if (!tr->dir)
6220                 return;
6221
6222         create_trace_option_files(tr, t);
6223 }
6224
6225 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6226 {
6227         struct tracer *t;
6228 #ifdef CONFIG_TRACER_MAX_TRACE
6229         bool had_max_tr;
6230 #endif
6231         int ret = 0;
6232
6233         mutex_lock(&trace_types_lock);
6234
6235         if (!ring_buffer_expanded) {
6236                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6237                                                 RING_BUFFER_ALL_CPUS);
6238                 if (ret < 0)
6239                         goto out;
6240                 ret = 0;
6241         }
6242
6243         for (t = trace_types; t; t = t->next) {
6244                 if (strcmp(t->name, buf) == 0)
6245                         break;
6246         }
6247         if (!t) {
6248                 ret = -EINVAL;
6249                 goto out;
6250         }
6251         if (t == tr->current_trace)
6252                 goto out;
6253
6254 #ifdef CONFIG_TRACER_SNAPSHOT
6255         if (t->use_max_tr) {
6256                 arch_spin_lock(&tr->max_lock);
6257                 if (tr->cond_snapshot)
6258                         ret = -EBUSY;
6259                 arch_spin_unlock(&tr->max_lock);
6260                 if (ret)
6261                         goto out;
6262         }
6263 #endif
6264         /* Some tracers won't work on kernel command line */
6265         if (system_state < SYSTEM_RUNNING && t->noboot) {
6266                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6267                         t->name);
6268                 goto out;
6269         }
6270
6271         /* Some tracers are only allowed for the top level buffer */
6272         if (!trace_ok_for_array(t, tr)) {
6273                 ret = -EINVAL;
6274                 goto out;
6275         }
6276
6277         /* If trace pipe files are being read, we can't change the tracer */
6278         if (tr->trace_ref) {
6279                 ret = -EBUSY;
6280                 goto out;
6281         }
6282
6283         trace_branch_disable();
6284
6285         tr->current_trace->enabled--;
6286
6287         if (tr->current_trace->reset)
6288                 tr->current_trace->reset(tr);
6289
6290         /* Current trace needs to be nop_trace before synchronize_rcu */
6291         tr->current_trace = &nop_trace;
6292
6293 #ifdef CONFIG_TRACER_MAX_TRACE
6294         had_max_tr = tr->allocated_snapshot;
6295
6296         if (had_max_tr && !t->use_max_tr) {
6297                 /*
6298                  * We need to make sure that the update_max_tr sees that
6299                  * current_trace changed to nop_trace to keep it from
6300                  * swapping the buffers after we resize it.
6301                  * The update_max_tr is called from interrupts disabled
6302                  * so a synchronized_sched() is sufficient.
6303                  */
6304                 synchronize_rcu();
6305                 free_snapshot(tr);
6306         }
6307 #endif
6308
6309 #ifdef CONFIG_TRACER_MAX_TRACE
6310         if (t->use_max_tr && !had_max_tr) {
6311                 ret = tracing_alloc_snapshot_instance(tr);
6312                 if (ret < 0)
6313                         goto out;
6314         }
6315 #endif
6316
6317         if (t->init) {
6318                 ret = tracer_init(t, tr);
6319                 if (ret)
6320                         goto out;
6321         }
6322
6323         tr->current_trace = t;
6324         tr->current_trace->enabled++;
6325         trace_branch_enable(tr);
6326  out:
6327         mutex_unlock(&trace_types_lock);
6328
6329         return ret;
6330 }
6331
6332 static ssize_t
6333 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6334                         size_t cnt, loff_t *ppos)
6335 {
6336         struct trace_array *tr = filp->private_data;
6337         char buf[MAX_TRACER_SIZE+1];
6338         int i;
6339         size_t ret;
6340         int err;
6341
6342         ret = cnt;
6343
6344         if (cnt > MAX_TRACER_SIZE)
6345                 cnt = MAX_TRACER_SIZE;
6346
6347         if (copy_from_user(buf, ubuf, cnt))
6348                 return -EFAULT;
6349
6350         buf[cnt] = 0;
6351
6352         /* strip ending whitespace. */
6353         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6354                 buf[i] = 0;
6355
6356         err = tracing_set_tracer(tr, buf);
6357         if (err)
6358                 return err;
6359
6360         *ppos += ret;
6361
6362         return ret;
6363 }
6364
6365 static ssize_t
6366 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6367                    size_t cnt, loff_t *ppos)
6368 {
6369         char buf[64];
6370         int r;
6371
6372         r = snprintf(buf, sizeof(buf), "%ld\n",
6373                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6374         if (r > sizeof(buf))
6375                 r = sizeof(buf);
6376         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6377 }
6378
6379 static ssize_t
6380 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6381                     size_t cnt, loff_t *ppos)
6382 {
6383         unsigned long val;
6384         int ret;
6385
6386         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6387         if (ret)
6388                 return ret;
6389
6390         *ptr = val * 1000;
6391
6392         return cnt;
6393 }
6394
6395 static ssize_t
6396 tracing_thresh_read(struct file *filp, char __user *ubuf,
6397                     size_t cnt, loff_t *ppos)
6398 {
6399         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6400 }
6401
6402 static ssize_t
6403 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6404                      size_t cnt, loff_t *ppos)
6405 {
6406         struct trace_array *tr = filp->private_data;
6407         int ret;
6408
6409         mutex_lock(&trace_types_lock);
6410         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6411         if (ret < 0)
6412                 goto out;
6413
6414         if (tr->current_trace->update_thresh) {
6415                 ret = tr->current_trace->update_thresh(tr);
6416                 if (ret < 0)
6417                         goto out;
6418         }
6419
6420         ret = cnt;
6421 out:
6422         mutex_unlock(&trace_types_lock);
6423
6424         return ret;
6425 }
6426
6427 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6428
6429 static ssize_t
6430 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6431                      size_t cnt, loff_t *ppos)
6432 {
6433         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6434 }
6435
6436 static ssize_t
6437 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6438                       size_t cnt, loff_t *ppos)
6439 {
6440         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6441 }
6442
6443 #endif
6444
6445 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6446 {
6447         struct trace_array *tr = inode->i_private;
6448         struct trace_iterator *iter;
6449         int ret;
6450
6451         ret = tracing_check_open_get_tr(tr);
6452         if (ret)
6453                 return ret;
6454
6455         mutex_lock(&trace_types_lock);
6456
6457         /* create a buffer to store the information to pass to userspace */
6458         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6459         if (!iter) {
6460                 ret = -ENOMEM;
6461                 __trace_array_put(tr);
6462                 goto out;
6463         }
6464
6465         trace_seq_init(&iter->seq);
6466         iter->trace = tr->current_trace;
6467
6468         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6469                 ret = -ENOMEM;
6470                 goto fail;
6471         }
6472
6473         /* trace pipe does not show start of buffer */
6474         cpumask_setall(iter->started);
6475
6476         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6477                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6478
6479         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6480         if (trace_clocks[tr->clock_id].in_ns)
6481                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6482
6483         iter->tr = tr;
6484         iter->array_buffer = &tr->array_buffer;
6485         iter->cpu_file = tracing_get_cpu(inode);
6486         mutex_init(&iter->mutex);
6487         filp->private_data = iter;
6488
6489         if (iter->trace->pipe_open)
6490                 iter->trace->pipe_open(iter);
6491
6492         nonseekable_open(inode, filp);
6493
6494         tr->trace_ref++;
6495 out:
6496         mutex_unlock(&trace_types_lock);
6497         return ret;
6498
6499 fail:
6500         kfree(iter);
6501         __trace_array_put(tr);
6502         mutex_unlock(&trace_types_lock);
6503         return ret;
6504 }
6505
6506 static int tracing_release_pipe(struct inode *inode, struct file *file)
6507 {
6508         struct trace_iterator *iter = file->private_data;
6509         struct trace_array *tr = inode->i_private;
6510
6511         mutex_lock(&trace_types_lock);
6512
6513         tr->trace_ref--;
6514
6515         if (iter->trace->pipe_close)
6516                 iter->trace->pipe_close(iter);
6517
6518         mutex_unlock(&trace_types_lock);
6519
6520         free_cpumask_var(iter->started);
6521         mutex_destroy(&iter->mutex);
6522         kfree(iter);
6523
6524         trace_array_put(tr);
6525
6526         return 0;
6527 }
6528
6529 static __poll_t
6530 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6531 {
6532         struct trace_array *tr = iter->tr;
6533
6534         /* Iterators are static, they should be filled or empty */
6535         if (trace_buffer_iter(iter, iter->cpu_file))
6536                 return EPOLLIN | EPOLLRDNORM;
6537
6538         if (tr->trace_flags & TRACE_ITER_BLOCK)
6539                 /*
6540                  * Always select as readable when in blocking mode
6541                  */
6542                 return EPOLLIN | EPOLLRDNORM;
6543         else
6544                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6545                                              filp, poll_table);
6546 }
6547
6548 static __poll_t
6549 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6550 {
6551         struct trace_iterator *iter = filp->private_data;
6552
6553         return trace_poll(iter, filp, poll_table);
6554 }
6555
6556 /* Must be called with iter->mutex held. */
6557 static int tracing_wait_pipe(struct file *filp)
6558 {
6559         struct trace_iterator *iter = filp->private_data;
6560         int ret;
6561
6562         while (trace_empty(iter)) {
6563
6564                 if ((filp->f_flags & O_NONBLOCK)) {
6565                         return -EAGAIN;
6566                 }
6567
6568                 /*
6569                  * We block until we read something and tracing is disabled.
6570                  * We still block if tracing is disabled, but we have never
6571                  * read anything. This allows a user to cat this file, and
6572                  * then enable tracing. But after we have read something,
6573                  * we give an EOF when tracing is again disabled.
6574                  *
6575                  * iter->pos will be 0 if we haven't read anything.
6576                  */
6577                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6578                         break;
6579
6580                 mutex_unlock(&iter->mutex);
6581
6582                 ret = wait_on_pipe(iter, 0);
6583
6584                 mutex_lock(&iter->mutex);
6585
6586                 if (ret)
6587                         return ret;
6588         }
6589
6590         return 1;
6591 }
6592
6593 /*
6594  * Consumer reader.
6595  */
6596 static ssize_t
6597 tracing_read_pipe(struct file *filp, char __user *ubuf,
6598                   size_t cnt, loff_t *ppos)
6599 {
6600         struct trace_iterator *iter = filp->private_data;
6601         ssize_t sret;
6602
6603         /*
6604          * Avoid more than one consumer on a single file descriptor
6605          * This is just a matter of traces coherency, the ring buffer itself
6606          * is protected.
6607          */
6608         mutex_lock(&iter->mutex);
6609
6610         /* return any leftover data */
6611         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6612         if (sret != -EBUSY)
6613                 goto out;
6614
6615         trace_seq_init(&iter->seq);
6616
6617         if (iter->trace->read) {
6618                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6619                 if (sret)
6620                         goto out;
6621         }
6622
6623 waitagain:
6624         sret = tracing_wait_pipe(filp);
6625         if (sret <= 0)
6626                 goto out;
6627
6628         /* stop when tracing is finished */
6629         if (trace_empty(iter)) {
6630                 sret = 0;
6631                 goto out;
6632         }
6633
6634         if (cnt >= PAGE_SIZE)
6635                 cnt = PAGE_SIZE - 1;
6636
6637         /* reset all but tr, trace, and overruns */
6638         memset(&iter->seq, 0,
6639                sizeof(struct trace_iterator) -
6640                offsetof(struct trace_iterator, seq));
6641         cpumask_clear(iter->started);
6642         trace_seq_init(&iter->seq);
6643         iter->pos = -1;
6644
6645         trace_event_read_lock();
6646         trace_access_lock(iter->cpu_file);
6647         while (trace_find_next_entry_inc(iter) != NULL) {
6648                 enum print_line_t ret;
6649                 int save_len = iter->seq.seq.len;
6650
6651                 ret = print_trace_line(iter);
6652                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6653                         /* don't print partial lines */
6654                         iter->seq.seq.len = save_len;
6655                         break;
6656                 }
6657                 if (ret != TRACE_TYPE_NO_CONSUME)
6658                         trace_consume(iter);
6659
6660                 if (trace_seq_used(&iter->seq) >= cnt)
6661                         break;
6662
6663                 /*
6664                  * Setting the full flag means we reached the trace_seq buffer
6665                  * size and we should leave by partial output condition above.
6666                  * One of the trace_seq_* functions is not used properly.
6667                  */
6668                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6669                           iter->ent->type);
6670         }
6671         trace_access_unlock(iter->cpu_file);
6672         trace_event_read_unlock();
6673
6674         /* Now copy what we have to the user */
6675         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6676         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6677                 trace_seq_init(&iter->seq);
6678
6679         /*
6680          * If there was nothing to send to user, in spite of consuming trace
6681          * entries, go back to wait for more entries.
6682          */
6683         if (sret == -EBUSY)
6684                 goto waitagain;
6685
6686 out:
6687         mutex_unlock(&iter->mutex);
6688
6689         return sret;
6690 }
6691
6692 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6693                                      unsigned int idx)
6694 {
6695         __free_page(spd->pages[idx]);
6696 }
6697
6698 static size_t
6699 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6700 {
6701         size_t count;
6702         int save_len;
6703         int ret;
6704
6705         /* Seq buffer is page-sized, exactly what we need. */
6706         for (;;) {
6707                 save_len = iter->seq.seq.len;
6708                 ret = print_trace_line(iter);
6709
6710                 if (trace_seq_has_overflowed(&iter->seq)) {
6711                         iter->seq.seq.len = save_len;
6712                         break;
6713                 }
6714
6715                 /*
6716                  * This should not be hit, because it should only
6717                  * be set if the iter->seq overflowed. But check it
6718                  * anyway to be safe.
6719                  */
6720                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721                         iter->seq.seq.len = save_len;
6722                         break;
6723                 }
6724
6725                 count = trace_seq_used(&iter->seq) - save_len;
6726                 if (rem < count) {
6727                         rem = 0;
6728                         iter->seq.seq.len = save_len;
6729                         break;
6730                 }
6731
6732                 if (ret != TRACE_TYPE_NO_CONSUME)
6733                         trace_consume(iter);
6734                 rem -= count;
6735                 if (!trace_find_next_entry_inc(iter))   {
6736                         rem = 0;
6737                         iter->ent = NULL;
6738                         break;
6739                 }
6740         }
6741
6742         return rem;
6743 }
6744
6745 static ssize_t tracing_splice_read_pipe(struct file *filp,
6746                                         loff_t *ppos,
6747                                         struct pipe_inode_info *pipe,
6748                                         size_t len,
6749                                         unsigned int flags)
6750 {
6751         struct page *pages_def[PIPE_DEF_BUFFERS];
6752         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6753         struct trace_iterator *iter = filp->private_data;
6754         struct splice_pipe_desc spd = {
6755                 .pages          = pages_def,
6756                 .partial        = partial_def,
6757                 .nr_pages       = 0, /* This gets updated below. */
6758                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6759                 .ops            = &default_pipe_buf_ops,
6760                 .spd_release    = tracing_spd_release_pipe,
6761         };
6762         ssize_t ret;
6763         size_t rem;
6764         unsigned int i;
6765
6766         if (splice_grow_spd(pipe, &spd))
6767                 return -ENOMEM;
6768
6769         mutex_lock(&iter->mutex);
6770
6771         if (iter->trace->splice_read) {
6772                 ret = iter->trace->splice_read(iter, filp,
6773                                                ppos, pipe, len, flags);
6774                 if (ret)
6775                         goto out_err;
6776         }
6777
6778         ret = tracing_wait_pipe(filp);
6779         if (ret <= 0)
6780                 goto out_err;
6781
6782         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6783                 ret = -EFAULT;
6784                 goto out_err;
6785         }
6786
6787         trace_event_read_lock();
6788         trace_access_lock(iter->cpu_file);
6789
6790         /* Fill as many pages as possible. */
6791         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6792                 spd.pages[i] = alloc_page(GFP_KERNEL);
6793                 if (!spd.pages[i])
6794                         break;
6795
6796                 rem = tracing_fill_pipe_page(rem, iter);
6797
6798                 /* Copy the data into the page, so we can start over. */
6799                 ret = trace_seq_to_buffer(&iter->seq,
6800                                           page_address(spd.pages[i]),
6801                                           trace_seq_used(&iter->seq));
6802                 if (ret < 0) {
6803                         __free_page(spd.pages[i]);
6804                         break;
6805                 }
6806                 spd.partial[i].offset = 0;
6807                 spd.partial[i].len = trace_seq_used(&iter->seq);
6808
6809                 trace_seq_init(&iter->seq);
6810         }
6811
6812         trace_access_unlock(iter->cpu_file);
6813         trace_event_read_unlock();
6814         mutex_unlock(&iter->mutex);
6815
6816         spd.nr_pages = i;
6817
6818         if (i)
6819                 ret = splice_to_pipe(pipe, &spd);
6820         else
6821                 ret = 0;
6822 out:
6823         splice_shrink_spd(&spd);
6824         return ret;
6825
6826 out_err:
6827         mutex_unlock(&iter->mutex);
6828         goto out;
6829 }
6830
6831 static ssize_t
6832 tracing_entries_read(struct file *filp, char __user *ubuf,
6833                      size_t cnt, loff_t *ppos)
6834 {
6835         struct inode *inode = file_inode(filp);
6836         struct trace_array *tr = inode->i_private;
6837         int cpu = tracing_get_cpu(inode);
6838         char buf[64];
6839         int r = 0;
6840         ssize_t ret;
6841
6842         mutex_lock(&trace_types_lock);
6843
6844         if (cpu == RING_BUFFER_ALL_CPUS) {
6845                 int cpu, buf_size_same;
6846                 unsigned long size;
6847
6848                 size = 0;
6849                 buf_size_same = 1;
6850                 /* check if all cpu sizes are same */
6851                 for_each_tracing_cpu(cpu) {
6852                         /* fill in the size from first enabled cpu */
6853                         if (size == 0)
6854                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6855                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6856                                 buf_size_same = 0;
6857                                 break;
6858                         }
6859                 }
6860
6861                 if (buf_size_same) {
6862                         if (!ring_buffer_expanded)
6863                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6864                                             size >> 10,
6865                                             trace_buf_size >> 10);
6866                         else
6867                                 r = sprintf(buf, "%lu\n", size >> 10);
6868                 } else
6869                         r = sprintf(buf, "X\n");
6870         } else
6871                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6872
6873         mutex_unlock(&trace_types_lock);
6874
6875         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6876         return ret;
6877 }
6878
6879 static ssize_t
6880 tracing_entries_write(struct file *filp, const char __user *ubuf,
6881                       size_t cnt, loff_t *ppos)
6882 {
6883         struct inode *inode = file_inode(filp);
6884         struct trace_array *tr = inode->i_private;
6885         unsigned long val;
6886         int ret;
6887
6888         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6889         if (ret)
6890                 return ret;
6891
6892         /* must have at least 1 entry */
6893         if (!val)
6894                 return -EINVAL;
6895
6896         /* value is in KB */
6897         val <<= 10;
6898         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6899         if (ret < 0)
6900                 return ret;
6901
6902         *ppos += cnt;
6903
6904         return cnt;
6905 }
6906
6907 static ssize_t
6908 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6909                                 size_t cnt, loff_t *ppos)
6910 {
6911         struct trace_array *tr = filp->private_data;
6912         char buf[64];
6913         int r, cpu;
6914         unsigned long size = 0, expanded_size = 0;
6915
6916         mutex_lock(&trace_types_lock);
6917         for_each_tracing_cpu(cpu) {
6918                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6919                 if (!ring_buffer_expanded)
6920                         expanded_size += trace_buf_size >> 10;
6921         }
6922         if (ring_buffer_expanded)
6923                 r = sprintf(buf, "%lu\n", size);
6924         else
6925                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6926         mutex_unlock(&trace_types_lock);
6927
6928         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6929 }
6930
6931 static ssize_t
6932 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6933                           size_t cnt, loff_t *ppos)
6934 {
6935         /*
6936          * There is no need to read what the user has written, this function
6937          * is just to make sure that there is no error when "echo" is used
6938          */
6939
6940         *ppos += cnt;
6941
6942         return cnt;
6943 }
6944
6945 static int
6946 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6947 {
6948         struct trace_array *tr = inode->i_private;
6949
6950         /* disable tracing ? */
6951         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6952                 tracer_tracing_off(tr);
6953         /* resize the ring buffer to 0 */
6954         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6955
6956         trace_array_put(tr);
6957
6958         return 0;
6959 }
6960
6961 static ssize_t
6962 tracing_mark_write(struct file *filp, const char __user *ubuf,
6963                                         size_t cnt, loff_t *fpos)
6964 {
6965         struct trace_array *tr = filp->private_data;
6966         struct ring_buffer_event *event;
6967         enum event_trigger_type tt = ETT_NONE;
6968         struct trace_buffer *buffer;
6969         struct print_entry *entry;
6970         ssize_t written;
6971         int size;
6972         int len;
6973
6974 /* Used in tracing_mark_raw_write() as well */
6975 #define FAULTED_STR "<faulted>"
6976 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6977
6978         if (tracing_disabled)
6979                 return -EINVAL;
6980
6981         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6982                 return -EINVAL;
6983
6984         if (cnt > TRACE_BUF_SIZE)
6985                 cnt = TRACE_BUF_SIZE;
6986
6987         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6988
6989         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6990
6991         /* If less than "<faulted>", then make sure we can still add that */
6992         if (cnt < FAULTED_SIZE)
6993                 size += FAULTED_SIZE - cnt;
6994
6995         buffer = tr->array_buffer.buffer;
6996         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6997                                             tracing_gen_ctx());
6998         if (unlikely(!event))
6999                 /* Ring buffer disabled, return as if not open for write */
7000                 return -EBADF;
7001
7002         entry = ring_buffer_event_data(event);
7003         entry->ip = _THIS_IP_;
7004
7005         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7006         if (len) {
7007                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7008                 cnt = FAULTED_SIZE;
7009                 written = -EFAULT;
7010         } else
7011                 written = cnt;
7012
7013         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7014                 /* do not add \n before testing triggers, but add \0 */
7015                 entry->buf[cnt] = '\0';
7016                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7017         }
7018
7019         if (entry->buf[cnt - 1] != '\n') {
7020                 entry->buf[cnt] = '\n';
7021                 entry->buf[cnt + 1] = '\0';
7022         } else
7023                 entry->buf[cnt] = '\0';
7024
7025         if (static_branch_unlikely(&trace_marker_exports_enabled))
7026                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7027         __buffer_unlock_commit(buffer, event);
7028
7029         if (tt)
7030                 event_triggers_post_call(tr->trace_marker_file, tt);
7031
7032         if (written > 0)
7033                 *fpos += written;
7034
7035         return written;
7036 }
7037
7038 /* Limit it for now to 3K (including tag) */
7039 #define RAW_DATA_MAX_SIZE (1024*3)
7040
7041 static ssize_t
7042 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7043                                         size_t cnt, loff_t *fpos)
7044 {
7045         struct trace_array *tr = filp->private_data;
7046         struct ring_buffer_event *event;
7047         struct trace_buffer *buffer;
7048         struct raw_data_entry *entry;
7049         ssize_t written;
7050         int size;
7051         int len;
7052
7053 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7054
7055         if (tracing_disabled)
7056                 return -EINVAL;
7057
7058         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7059                 return -EINVAL;
7060
7061         /* The marker must at least have a tag id */
7062         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7063                 return -EINVAL;
7064
7065         if (cnt > TRACE_BUF_SIZE)
7066                 cnt = TRACE_BUF_SIZE;
7067
7068         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7069
7070         size = sizeof(*entry) + cnt;
7071         if (cnt < FAULT_SIZE_ID)
7072                 size += FAULT_SIZE_ID - cnt;
7073
7074         buffer = tr->array_buffer.buffer;
7075         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7076                                             tracing_gen_ctx());
7077         if (!event)
7078                 /* Ring buffer disabled, return as if not open for write */
7079                 return -EBADF;
7080
7081         entry = ring_buffer_event_data(event);
7082
7083         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7084         if (len) {
7085                 entry->id = -1;
7086                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7087                 written = -EFAULT;
7088         } else
7089                 written = cnt;
7090
7091         __buffer_unlock_commit(buffer, event);
7092
7093         if (written > 0)
7094                 *fpos += written;
7095
7096         return written;
7097 }
7098
7099 static int tracing_clock_show(struct seq_file *m, void *v)
7100 {
7101         struct trace_array *tr = m->private;
7102         int i;
7103
7104         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7105                 seq_printf(m,
7106                         "%s%s%s%s", i ? " " : "",
7107                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7108                         i == tr->clock_id ? "]" : "");
7109         seq_putc(m, '\n');
7110
7111         return 0;
7112 }
7113
7114 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7115 {
7116         int i;
7117
7118         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7119                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7120                         break;
7121         }
7122         if (i == ARRAY_SIZE(trace_clocks))
7123                 return -EINVAL;
7124
7125         mutex_lock(&trace_types_lock);
7126
7127         tr->clock_id = i;
7128
7129         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7130
7131         /*
7132          * New clock may not be consistent with the previous clock.
7133          * Reset the buffer so that it doesn't have incomparable timestamps.
7134          */
7135         tracing_reset_online_cpus(&tr->array_buffer);
7136
7137 #ifdef CONFIG_TRACER_MAX_TRACE
7138         if (tr->max_buffer.buffer)
7139                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7140         tracing_reset_online_cpus(&tr->max_buffer);
7141 #endif
7142
7143         mutex_unlock(&trace_types_lock);
7144
7145         return 0;
7146 }
7147
7148 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7149                                    size_t cnt, loff_t *fpos)
7150 {
7151         struct seq_file *m = filp->private_data;
7152         struct trace_array *tr = m->private;
7153         char buf[64];
7154         const char *clockstr;
7155         int ret;
7156
7157         if (cnt >= sizeof(buf))
7158                 return -EINVAL;
7159
7160         if (copy_from_user(buf, ubuf, cnt))
7161                 return -EFAULT;
7162
7163         buf[cnt] = 0;
7164
7165         clockstr = strstrip(buf);
7166
7167         ret = tracing_set_clock(tr, clockstr);
7168         if (ret)
7169                 return ret;
7170
7171         *fpos += cnt;
7172
7173         return cnt;
7174 }
7175
7176 static int tracing_clock_open(struct inode *inode, struct file *file)
7177 {
7178         struct trace_array *tr = inode->i_private;
7179         int ret;
7180
7181         ret = tracing_check_open_get_tr(tr);
7182         if (ret)
7183                 return ret;
7184
7185         ret = single_open(file, tracing_clock_show, inode->i_private);
7186         if (ret < 0)
7187                 trace_array_put(tr);
7188
7189         return ret;
7190 }
7191
7192 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7193 {
7194         struct trace_array *tr = m->private;
7195
7196         mutex_lock(&trace_types_lock);
7197
7198         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7199                 seq_puts(m, "delta [absolute]\n");
7200         else
7201                 seq_puts(m, "[delta] absolute\n");
7202
7203         mutex_unlock(&trace_types_lock);
7204
7205         return 0;
7206 }
7207
7208 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7209 {
7210         struct trace_array *tr = inode->i_private;
7211         int ret;
7212
7213         ret = tracing_check_open_get_tr(tr);
7214         if (ret)
7215                 return ret;
7216
7217         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7218         if (ret < 0)
7219                 trace_array_put(tr);
7220
7221         return ret;
7222 }
7223
7224 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7225 {
7226         if (rbe == this_cpu_read(trace_buffered_event))
7227                 return ring_buffer_time_stamp(buffer);
7228
7229         return ring_buffer_event_time_stamp(buffer, rbe);
7230 }
7231
7232 /*
7233  * Set or disable using the per CPU trace_buffer_event when possible.
7234  */
7235 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7236 {
7237         int ret = 0;
7238
7239         mutex_lock(&trace_types_lock);
7240
7241         if (set && tr->no_filter_buffering_ref++)
7242                 goto out;
7243
7244         if (!set) {
7245                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7246                         ret = -EINVAL;
7247                         goto out;
7248                 }
7249
7250                 --tr->no_filter_buffering_ref;
7251         }
7252  out:
7253         mutex_unlock(&trace_types_lock);
7254
7255         return ret;
7256 }
7257
7258 struct ftrace_buffer_info {
7259         struct trace_iterator   iter;
7260         void                    *spare;
7261         unsigned int            spare_cpu;
7262         unsigned int            read;
7263 };
7264
7265 #ifdef CONFIG_TRACER_SNAPSHOT
7266 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7267 {
7268         struct trace_array *tr = inode->i_private;
7269         struct trace_iterator *iter;
7270         struct seq_file *m;
7271         int ret;
7272
7273         ret = tracing_check_open_get_tr(tr);
7274         if (ret)
7275                 return ret;
7276
7277         if (file->f_mode & FMODE_READ) {
7278                 iter = __tracing_open(inode, file, true);
7279                 if (IS_ERR(iter))
7280                         ret = PTR_ERR(iter);
7281         } else {
7282                 /* Writes still need the seq_file to hold the private data */
7283                 ret = -ENOMEM;
7284                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7285                 if (!m)
7286                         goto out;
7287                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7288                 if (!iter) {
7289                         kfree(m);
7290                         goto out;
7291                 }
7292                 ret = 0;
7293
7294                 iter->tr = tr;
7295                 iter->array_buffer = &tr->max_buffer;
7296                 iter->cpu_file = tracing_get_cpu(inode);
7297                 m->private = iter;
7298                 file->private_data = m;
7299         }
7300 out:
7301         if (ret < 0)
7302                 trace_array_put(tr);
7303
7304         return ret;
7305 }
7306
7307 static ssize_t
7308 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7309                        loff_t *ppos)
7310 {
7311         struct seq_file *m = filp->private_data;
7312         struct trace_iterator *iter = m->private;
7313         struct trace_array *tr = iter->tr;
7314         unsigned long val;
7315         int ret;
7316
7317         ret = tracing_update_buffers();
7318         if (ret < 0)
7319                 return ret;
7320
7321         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7322         if (ret)
7323                 return ret;
7324
7325         mutex_lock(&trace_types_lock);
7326
7327         if (tr->current_trace->use_max_tr) {
7328                 ret = -EBUSY;
7329                 goto out;
7330         }
7331
7332         arch_spin_lock(&tr->max_lock);
7333         if (tr->cond_snapshot)
7334                 ret = -EBUSY;
7335         arch_spin_unlock(&tr->max_lock);
7336         if (ret)
7337                 goto out;
7338
7339         switch (val) {
7340         case 0:
7341                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7342                         ret = -EINVAL;
7343                         break;
7344                 }
7345                 if (tr->allocated_snapshot)
7346                         free_snapshot(tr);
7347                 break;
7348         case 1:
7349 /* Only allow per-cpu swap if the ring buffer supports it */
7350 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7351                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7352                         ret = -EINVAL;
7353                         break;
7354                 }
7355 #endif
7356                 if (tr->allocated_snapshot)
7357                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7358                                         &tr->array_buffer, iter->cpu_file);
7359                 else
7360                         ret = tracing_alloc_snapshot_instance(tr);
7361                 if (ret < 0)
7362                         break;
7363                 local_irq_disable();
7364                 /* Now, we're going to swap */
7365                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7366                         update_max_tr(tr, current, smp_processor_id(), NULL);
7367                 else
7368                         update_max_tr_single(tr, current, iter->cpu_file);
7369                 local_irq_enable();
7370                 break;
7371         default:
7372                 if (tr->allocated_snapshot) {
7373                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7374                                 tracing_reset_online_cpus(&tr->max_buffer);
7375                         else
7376                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7377                 }
7378                 break;
7379         }
7380
7381         if (ret >= 0) {
7382                 *ppos += cnt;
7383                 ret = cnt;
7384         }
7385 out:
7386         mutex_unlock(&trace_types_lock);
7387         return ret;
7388 }
7389
7390 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7391 {
7392         struct seq_file *m = file->private_data;
7393         int ret;
7394
7395         ret = tracing_release(inode, file);
7396
7397         if (file->f_mode & FMODE_READ)
7398                 return ret;
7399
7400         /* If write only, the seq_file is just a stub */
7401         if (m)
7402                 kfree(m->private);
7403         kfree(m);
7404
7405         return 0;
7406 }
7407
7408 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7409 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7410                                     size_t count, loff_t *ppos);
7411 static int tracing_buffers_release(struct inode *inode, struct file *file);
7412 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7413                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7414
7415 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7416 {
7417         struct ftrace_buffer_info *info;
7418         int ret;
7419
7420         /* The following checks for tracefs lockdown */
7421         ret = tracing_buffers_open(inode, filp);
7422         if (ret < 0)
7423                 return ret;
7424
7425         info = filp->private_data;
7426
7427         if (info->iter.trace->use_max_tr) {
7428                 tracing_buffers_release(inode, filp);
7429                 return -EBUSY;
7430         }
7431
7432         info->iter.snapshot = true;
7433         info->iter.array_buffer = &info->iter.tr->max_buffer;
7434
7435         return ret;
7436 }
7437
7438 #endif /* CONFIG_TRACER_SNAPSHOT */
7439
7440
7441 static const struct file_operations tracing_thresh_fops = {
7442         .open           = tracing_open_generic,
7443         .read           = tracing_thresh_read,
7444         .write          = tracing_thresh_write,
7445         .llseek         = generic_file_llseek,
7446 };
7447
7448 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7449 static const struct file_operations tracing_max_lat_fops = {
7450         .open           = tracing_open_generic,
7451         .read           = tracing_max_lat_read,
7452         .write          = tracing_max_lat_write,
7453         .llseek         = generic_file_llseek,
7454 };
7455 #endif
7456
7457 static const struct file_operations set_tracer_fops = {
7458         .open           = tracing_open_generic,
7459         .read           = tracing_set_trace_read,
7460         .write          = tracing_set_trace_write,
7461         .llseek         = generic_file_llseek,
7462 };
7463
7464 static const struct file_operations tracing_pipe_fops = {
7465         .open           = tracing_open_pipe,
7466         .poll           = tracing_poll_pipe,
7467         .read           = tracing_read_pipe,
7468         .splice_read    = tracing_splice_read_pipe,
7469         .release        = tracing_release_pipe,
7470         .llseek         = no_llseek,
7471 };
7472
7473 static const struct file_operations tracing_entries_fops = {
7474         .open           = tracing_open_generic_tr,
7475         .read           = tracing_entries_read,
7476         .write          = tracing_entries_write,
7477         .llseek         = generic_file_llseek,
7478         .release        = tracing_release_generic_tr,
7479 };
7480
7481 static const struct file_operations tracing_total_entries_fops = {
7482         .open           = tracing_open_generic_tr,
7483         .read           = tracing_total_entries_read,
7484         .llseek         = generic_file_llseek,
7485         .release        = tracing_release_generic_tr,
7486 };
7487
7488 static const struct file_operations tracing_free_buffer_fops = {
7489         .open           = tracing_open_generic_tr,
7490         .write          = tracing_free_buffer_write,
7491         .release        = tracing_free_buffer_release,
7492 };
7493
7494 static const struct file_operations tracing_mark_fops = {
7495         .open           = tracing_open_generic_tr,
7496         .write          = tracing_mark_write,
7497         .llseek         = generic_file_llseek,
7498         .release        = tracing_release_generic_tr,
7499 };
7500
7501 static const struct file_operations tracing_mark_raw_fops = {
7502         .open           = tracing_open_generic_tr,
7503         .write          = tracing_mark_raw_write,
7504         .llseek         = generic_file_llseek,
7505         .release        = tracing_release_generic_tr,
7506 };
7507
7508 static const struct file_operations trace_clock_fops = {
7509         .open           = tracing_clock_open,
7510         .read           = seq_read,
7511         .llseek         = seq_lseek,
7512         .release        = tracing_single_release_tr,
7513         .write          = tracing_clock_write,
7514 };
7515
7516 static const struct file_operations trace_time_stamp_mode_fops = {
7517         .open           = tracing_time_stamp_mode_open,
7518         .read           = seq_read,
7519         .llseek         = seq_lseek,
7520         .release        = tracing_single_release_tr,
7521 };
7522
7523 #ifdef CONFIG_TRACER_SNAPSHOT
7524 static const struct file_operations snapshot_fops = {
7525         .open           = tracing_snapshot_open,
7526         .read           = seq_read,
7527         .write          = tracing_snapshot_write,
7528         .llseek         = tracing_lseek,
7529         .release        = tracing_snapshot_release,
7530 };
7531
7532 static const struct file_operations snapshot_raw_fops = {
7533         .open           = snapshot_raw_open,
7534         .read           = tracing_buffers_read,
7535         .release        = tracing_buffers_release,
7536         .splice_read    = tracing_buffers_splice_read,
7537         .llseek         = no_llseek,
7538 };
7539
7540 #endif /* CONFIG_TRACER_SNAPSHOT */
7541
7542 #define TRACING_LOG_ERRS_MAX    8
7543 #define TRACING_LOG_LOC_MAX     128
7544
7545 #define CMD_PREFIX "  Command: "
7546
7547 struct err_info {
7548         const char      **errs; /* ptr to loc-specific array of err strings */
7549         u8              type;   /* index into errs -> specific err string */
7550         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7551         u64             ts;
7552 };
7553
7554 struct tracing_log_err {
7555         struct list_head        list;
7556         struct err_info         info;
7557         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7558         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7559 };
7560
7561 static DEFINE_MUTEX(tracing_err_log_lock);
7562
7563 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7564 {
7565         struct tracing_log_err *err;
7566
7567         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7568                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7569                 if (!err)
7570                         err = ERR_PTR(-ENOMEM);
7571                 tr->n_err_log_entries++;
7572
7573                 return err;
7574         }
7575
7576         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7577         list_del(&err->list);
7578
7579         return err;
7580 }
7581
7582 /**
7583  * err_pos - find the position of a string within a command for error careting
7584  * @cmd: The tracing command that caused the error
7585  * @str: The string to position the caret at within @cmd
7586  *
7587  * Finds the position of the first occurrence of @str within @cmd.  The
7588  * return value can be passed to tracing_log_err() for caret placement
7589  * within @cmd.
7590  *
7591  * Returns the index within @cmd of the first occurrence of @str or 0
7592  * if @str was not found.
7593  */
7594 unsigned int err_pos(char *cmd, const char *str)
7595 {
7596         char *found;
7597
7598         if (WARN_ON(!strlen(cmd)))
7599                 return 0;
7600
7601         found = strstr(cmd, str);
7602         if (found)
7603                 return found - cmd;
7604
7605         return 0;
7606 }
7607
7608 /**
7609  * tracing_log_err - write an error to the tracing error log
7610  * @tr: The associated trace array for the error (NULL for top level array)
7611  * @loc: A string describing where the error occurred
7612  * @cmd: The tracing command that caused the error
7613  * @errs: The array of loc-specific static error strings
7614  * @type: The index into errs[], which produces the specific static err string
7615  * @pos: The position the caret should be placed in the cmd
7616  *
7617  * Writes an error into tracing/error_log of the form:
7618  *
7619  * <loc>: error: <text>
7620  *   Command: <cmd>
7621  *              ^
7622  *
7623  * tracing/error_log is a small log file containing the last
7624  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7625  * unless there has been a tracing error, and the error log can be
7626  * cleared and have its memory freed by writing the empty string in
7627  * truncation mode to it i.e. echo > tracing/error_log.
7628  *
7629  * NOTE: the @errs array along with the @type param are used to
7630  * produce a static error string - this string is not copied and saved
7631  * when the error is logged - only a pointer to it is saved.  See
7632  * existing callers for examples of how static strings are typically
7633  * defined for use with tracing_log_err().
7634  */
7635 void tracing_log_err(struct trace_array *tr,
7636                      const char *loc, const char *cmd,
7637                      const char **errs, u8 type, u8 pos)
7638 {
7639         struct tracing_log_err *err;
7640
7641         if (!tr)
7642                 tr = &global_trace;
7643
7644         mutex_lock(&tracing_err_log_lock);
7645         err = get_tracing_log_err(tr);
7646         if (PTR_ERR(err) == -ENOMEM) {
7647                 mutex_unlock(&tracing_err_log_lock);
7648                 return;
7649         }
7650
7651         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7652         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7653
7654         err->info.errs = errs;
7655         err->info.type = type;
7656         err->info.pos = pos;
7657         err->info.ts = local_clock();
7658
7659         list_add_tail(&err->list, &tr->err_log);
7660         mutex_unlock(&tracing_err_log_lock);
7661 }
7662
7663 static void clear_tracing_err_log(struct trace_array *tr)
7664 {
7665         struct tracing_log_err *err, *next;
7666
7667         mutex_lock(&tracing_err_log_lock);
7668         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7669                 list_del(&err->list);
7670                 kfree(err);
7671         }
7672
7673         tr->n_err_log_entries = 0;
7674         mutex_unlock(&tracing_err_log_lock);
7675 }
7676
7677 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7678 {
7679         struct trace_array *tr = m->private;
7680
7681         mutex_lock(&tracing_err_log_lock);
7682
7683         return seq_list_start(&tr->err_log, *pos);
7684 }
7685
7686 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7687 {
7688         struct trace_array *tr = m->private;
7689
7690         return seq_list_next(v, &tr->err_log, pos);
7691 }
7692
7693 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7694 {
7695         mutex_unlock(&tracing_err_log_lock);
7696 }
7697
7698 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7699 {
7700         u8 i;
7701
7702         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7703                 seq_putc(m, ' ');
7704         for (i = 0; i < pos; i++)
7705                 seq_putc(m, ' ');
7706         seq_puts(m, "^\n");
7707 }
7708
7709 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7710 {
7711         struct tracing_log_err *err = v;
7712
7713         if (err) {
7714                 const char *err_text = err->info.errs[err->info.type];
7715                 u64 sec = err->info.ts;
7716                 u32 nsec;
7717
7718                 nsec = do_div(sec, NSEC_PER_SEC);
7719                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7720                            err->loc, err_text);
7721                 seq_printf(m, "%s", err->cmd);
7722                 tracing_err_log_show_pos(m, err->info.pos);
7723         }
7724
7725         return 0;
7726 }
7727
7728 static const struct seq_operations tracing_err_log_seq_ops = {
7729         .start  = tracing_err_log_seq_start,
7730         .next   = tracing_err_log_seq_next,
7731         .stop   = tracing_err_log_seq_stop,
7732         .show   = tracing_err_log_seq_show
7733 };
7734
7735 static int tracing_err_log_open(struct inode *inode, struct file *file)
7736 {
7737         struct trace_array *tr = inode->i_private;
7738         int ret = 0;
7739
7740         ret = tracing_check_open_get_tr(tr);
7741         if (ret)
7742                 return ret;
7743
7744         /* If this file was opened for write, then erase contents */
7745         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7746                 clear_tracing_err_log(tr);
7747
7748         if (file->f_mode & FMODE_READ) {
7749                 ret = seq_open(file, &tracing_err_log_seq_ops);
7750                 if (!ret) {
7751                         struct seq_file *m = file->private_data;
7752                         m->private = tr;
7753                 } else {
7754                         trace_array_put(tr);
7755                 }
7756         }
7757         return ret;
7758 }
7759
7760 static ssize_t tracing_err_log_write(struct file *file,
7761                                      const char __user *buffer,
7762                                      size_t count, loff_t *ppos)
7763 {
7764         return count;
7765 }
7766
7767 static int tracing_err_log_release(struct inode *inode, struct file *file)
7768 {
7769         struct trace_array *tr = inode->i_private;
7770
7771         trace_array_put(tr);
7772
7773         if (file->f_mode & FMODE_READ)
7774                 seq_release(inode, file);
7775
7776         return 0;
7777 }
7778
7779 static const struct file_operations tracing_err_log_fops = {
7780         .open           = tracing_err_log_open,
7781         .write          = tracing_err_log_write,
7782         .read           = seq_read,
7783         .llseek         = seq_lseek,
7784         .release        = tracing_err_log_release,
7785 };
7786
7787 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7788 {
7789         struct trace_array *tr = inode->i_private;
7790         struct ftrace_buffer_info *info;
7791         int ret;
7792
7793         ret = tracing_check_open_get_tr(tr);
7794         if (ret)
7795                 return ret;
7796
7797         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7798         if (!info) {
7799                 trace_array_put(tr);
7800                 return -ENOMEM;
7801         }
7802
7803         mutex_lock(&trace_types_lock);
7804
7805         info->iter.tr           = tr;
7806         info->iter.cpu_file     = tracing_get_cpu(inode);
7807         info->iter.trace        = tr->current_trace;
7808         info->iter.array_buffer = &tr->array_buffer;
7809         info->spare             = NULL;
7810         /* Force reading ring buffer for first read */
7811         info->read              = (unsigned int)-1;
7812
7813         filp->private_data = info;
7814
7815         tr->trace_ref++;
7816
7817         mutex_unlock(&trace_types_lock);
7818
7819         ret = nonseekable_open(inode, filp);
7820         if (ret < 0)
7821                 trace_array_put(tr);
7822
7823         return ret;
7824 }
7825
7826 static __poll_t
7827 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7828 {
7829         struct ftrace_buffer_info *info = filp->private_data;
7830         struct trace_iterator *iter = &info->iter;
7831
7832         return trace_poll(iter, filp, poll_table);
7833 }
7834
7835 static ssize_t
7836 tracing_buffers_read(struct file *filp, char __user *ubuf,
7837                      size_t count, loff_t *ppos)
7838 {
7839         struct ftrace_buffer_info *info = filp->private_data;
7840         struct trace_iterator *iter = &info->iter;
7841         ssize_t ret = 0;
7842         ssize_t size;
7843
7844         if (!count)
7845                 return 0;
7846
7847 #ifdef CONFIG_TRACER_MAX_TRACE
7848         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7849                 return -EBUSY;
7850 #endif
7851
7852         if (!info->spare) {
7853                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7854                                                           iter->cpu_file);
7855                 if (IS_ERR(info->spare)) {
7856                         ret = PTR_ERR(info->spare);
7857                         info->spare = NULL;
7858                 } else {
7859                         info->spare_cpu = iter->cpu_file;
7860                 }
7861         }
7862         if (!info->spare)
7863                 return ret;
7864
7865         /* Do we have previous read data to read? */
7866         if (info->read < PAGE_SIZE)
7867                 goto read;
7868
7869  again:
7870         trace_access_lock(iter->cpu_file);
7871         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7872                                     &info->spare,
7873                                     count,
7874                                     iter->cpu_file, 0);
7875         trace_access_unlock(iter->cpu_file);
7876
7877         if (ret < 0) {
7878                 if (trace_empty(iter)) {
7879                         if ((filp->f_flags & O_NONBLOCK))
7880                                 return -EAGAIN;
7881
7882                         ret = wait_on_pipe(iter, 0);
7883                         if (ret)
7884                                 return ret;
7885
7886                         goto again;
7887                 }
7888                 return 0;
7889         }
7890
7891         info->read = 0;
7892  read:
7893         size = PAGE_SIZE - info->read;
7894         if (size > count)
7895                 size = count;
7896
7897         ret = copy_to_user(ubuf, info->spare + info->read, size);
7898         if (ret == size)
7899                 return -EFAULT;
7900
7901         size -= ret;
7902
7903         *ppos += size;
7904         info->read += size;
7905
7906         return size;
7907 }
7908
7909 static int tracing_buffers_release(struct inode *inode, struct file *file)
7910 {
7911         struct ftrace_buffer_info *info = file->private_data;
7912         struct trace_iterator *iter = &info->iter;
7913
7914         mutex_lock(&trace_types_lock);
7915
7916         iter->tr->trace_ref--;
7917
7918         __trace_array_put(iter->tr);
7919
7920         if (info->spare)
7921                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7922                                            info->spare_cpu, info->spare);
7923         kvfree(info);
7924
7925         mutex_unlock(&trace_types_lock);
7926
7927         return 0;
7928 }
7929
7930 struct buffer_ref {
7931         struct trace_buffer     *buffer;
7932         void                    *page;
7933         int                     cpu;
7934         refcount_t              refcount;
7935 };
7936
7937 static void buffer_ref_release(struct buffer_ref *ref)
7938 {
7939         if (!refcount_dec_and_test(&ref->refcount))
7940                 return;
7941         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7942         kfree(ref);
7943 }
7944
7945 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7946                                     struct pipe_buffer *buf)
7947 {
7948         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7949
7950         buffer_ref_release(ref);
7951         buf->private = 0;
7952 }
7953
7954 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7955                                 struct pipe_buffer *buf)
7956 {
7957         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7958
7959         if (refcount_read(&ref->refcount) > INT_MAX/2)
7960                 return false;
7961
7962         refcount_inc(&ref->refcount);
7963         return true;
7964 }
7965
7966 /* Pipe buffer operations for a buffer. */
7967 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7968         .release                = buffer_pipe_buf_release,
7969         .get                    = buffer_pipe_buf_get,
7970 };
7971
7972 /*
7973  * Callback from splice_to_pipe(), if we need to release some pages
7974  * at the end of the spd in case we error'ed out in filling the pipe.
7975  */
7976 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7977 {
7978         struct buffer_ref *ref =
7979                 (struct buffer_ref *)spd->partial[i].private;
7980
7981         buffer_ref_release(ref);
7982         spd->partial[i].private = 0;
7983 }
7984
7985 static ssize_t
7986 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7987                             struct pipe_inode_info *pipe, size_t len,
7988                             unsigned int flags)
7989 {
7990         struct ftrace_buffer_info *info = file->private_data;
7991         struct trace_iterator *iter = &info->iter;
7992         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7993         struct page *pages_def[PIPE_DEF_BUFFERS];
7994         struct splice_pipe_desc spd = {
7995                 .pages          = pages_def,
7996                 .partial        = partial_def,
7997                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7998                 .ops            = &buffer_pipe_buf_ops,
7999                 .spd_release    = buffer_spd_release,
8000         };
8001         struct buffer_ref *ref;
8002         int entries, i;
8003         ssize_t ret = 0;
8004
8005 #ifdef CONFIG_TRACER_MAX_TRACE
8006         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8007                 return -EBUSY;
8008 #endif
8009
8010         if (*ppos & (PAGE_SIZE - 1))
8011                 return -EINVAL;
8012
8013         if (len & (PAGE_SIZE - 1)) {
8014                 if (len < PAGE_SIZE)
8015                         return -EINVAL;
8016                 len &= PAGE_MASK;
8017         }
8018
8019         if (splice_grow_spd(pipe, &spd))
8020                 return -ENOMEM;
8021
8022  again:
8023         trace_access_lock(iter->cpu_file);
8024         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8025
8026         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8027                 struct page *page;
8028                 int r;
8029
8030                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8031                 if (!ref) {
8032                         ret = -ENOMEM;
8033                         break;
8034                 }
8035
8036                 refcount_set(&ref->refcount, 1);
8037                 ref->buffer = iter->array_buffer->buffer;
8038                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8039                 if (IS_ERR(ref->page)) {
8040                         ret = PTR_ERR(ref->page);
8041                         ref->page = NULL;
8042                         kfree(ref);
8043                         break;
8044                 }
8045                 ref->cpu = iter->cpu_file;
8046
8047                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8048                                           len, iter->cpu_file, 1);
8049                 if (r < 0) {
8050                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8051                                                    ref->page);
8052                         kfree(ref);
8053                         break;
8054                 }
8055
8056                 page = virt_to_page(ref->page);
8057
8058                 spd.pages[i] = page;
8059                 spd.partial[i].len = PAGE_SIZE;
8060                 spd.partial[i].offset = 0;
8061                 spd.partial[i].private = (unsigned long)ref;
8062                 spd.nr_pages++;
8063                 *ppos += PAGE_SIZE;
8064
8065                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8066         }
8067
8068         trace_access_unlock(iter->cpu_file);
8069         spd.nr_pages = i;
8070
8071         /* did we read anything? */
8072         if (!spd.nr_pages) {
8073                 if (ret)
8074                         goto out;
8075
8076                 ret = -EAGAIN;
8077                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8078                         goto out;
8079
8080                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8081                 if (ret)
8082                         goto out;
8083
8084                 goto again;
8085         }
8086
8087         ret = splice_to_pipe(pipe, &spd);
8088 out:
8089         splice_shrink_spd(&spd);
8090
8091         return ret;
8092 }
8093
8094 static const struct file_operations tracing_buffers_fops = {
8095         .open           = tracing_buffers_open,
8096         .read           = tracing_buffers_read,
8097         .poll           = tracing_buffers_poll,
8098         .release        = tracing_buffers_release,
8099         .splice_read    = tracing_buffers_splice_read,
8100         .llseek         = no_llseek,
8101 };
8102
8103 static ssize_t
8104 tracing_stats_read(struct file *filp, char __user *ubuf,
8105                    size_t count, loff_t *ppos)
8106 {
8107         struct inode *inode = file_inode(filp);
8108         struct trace_array *tr = inode->i_private;
8109         struct array_buffer *trace_buf = &tr->array_buffer;
8110         int cpu = tracing_get_cpu(inode);
8111         struct trace_seq *s;
8112         unsigned long cnt;
8113         unsigned long long t;
8114         unsigned long usec_rem;
8115
8116         s = kmalloc(sizeof(*s), GFP_KERNEL);
8117         if (!s)
8118                 return -ENOMEM;
8119
8120         trace_seq_init(s);
8121
8122         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8123         trace_seq_printf(s, "entries: %ld\n", cnt);
8124
8125         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8126         trace_seq_printf(s, "overrun: %ld\n", cnt);
8127
8128         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8129         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8130
8131         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8132         trace_seq_printf(s, "bytes: %ld\n", cnt);
8133
8134         if (trace_clocks[tr->clock_id].in_ns) {
8135                 /* local or global for trace_clock */
8136                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8137                 usec_rem = do_div(t, USEC_PER_SEC);
8138                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8139                                                                 t, usec_rem);
8140
8141                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8142                 usec_rem = do_div(t, USEC_PER_SEC);
8143                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8144         } else {
8145                 /* counter or tsc mode for trace_clock */
8146                 trace_seq_printf(s, "oldest event ts: %llu\n",
8147                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8148
8149                 trace_seq_printf(s, "now ts: %llu\n",
8150                                 ring_buffer_time_stamp(trace_buf->buffer));
8151         }
8152
8153         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8154         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8155
8156         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8157         trace_seq_printf(s, "read events: %ld\n", cnt);
8158
8159         count = simple_read_from_buffer(ubuf, count, ppos,
8160                                         s->buffer, trace_seq_used(s));
8161
8162         kfree(s);
8163
8164         return count;
8165 }
8166
8167 static const struct file_operations tracing_stats_fops = {
8168         .open           = tracing_open_generic_tr,
8169         .read           = tracing_stats_read,
8170         .llseek         = generic_file_llseek,
8171         .release        = tracing_release_generic_tr,
8172 };
8173
8174 #ifdef CONFIG_DYNAMIC_FTRACE
8175
8176 static ssize_t
8177 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8178                   size_t cnt, loff_t *ppos)
8179 {
8180         ssize_t ret;
8181         char *buf;
8182         int r;
8183
8184         /* 256 should be plenty to hold the amount needed */
8185         buf = kmalloc(256, GFP_KERNEL);
8186         if (!buf)
8187                 return -ENOMEM;
8188
8189         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8190                       ftrace_update_tot_cnt,
8191                       ftrace_number_of_pages,
8192                       ftrace_number_of_groups);
8193
8194         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8195         kfree(buf);
8196         return ret;
8197 }
8198
8199 static const struct file_operations tracing_dyn_info_fops = {
8200         .open           = tracing_open_generic,
8201         .read           = tracing_read_dyn_info,
8202         .llseek         = generic_file_llseek,
8203 };
8204 #endif /* CONFIG_DYNAMIC_FTRACE */
8205
8206 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8207 static void
8208 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8209                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8210                 void *data)
8211 {
8212         tracing_snapshot_instance(tr);
8213 }
8214
8215 static void
8216 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8217                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8218                       void *data)
8219 {
8220         struct ftrace_func_mapper *mapper = data;
8221         long *count = NULL;
8222
8223         if (mapper)
8224                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8225
8226         if (count) {
8227
8228                 if (*count <= 0)
8229                         return;
8230
8231                 (*count)--;
8232         }
8233
8234         tracing_snapshot_instance(tr);
8235 }
8236
8237 static int
8238 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8239                       struct ftrace_probe_ops *ops, void *data)
8240 {
8241         struct ftrace_func_mapper *mapper = data;
8242         long *count = NULL;
8243
8244         seq_printf(m, "%ps:", (void *)ip);
8245
8246         seq_puts(m, "snapshot");
8247
8248         if (mapper)
8249                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8250
8251         if (count)
8252                 seq_printf(m, ":count=%ld\n", *count);
8253         else
8254                 seq_puts(m, ":unlimited\n");
8255
8256         return 0;
8257 }
8258
8259 static int
8260 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8261                      unsigned long ip, void *init_data, void **data)
8262 {
8263         struct ftrace_func_mapper *mapper = *data;
8264
8265         if (!mapper) {
8266                 mapper = allocate_ftrace_func_mapper();
8267                 if (!mapper)
8268                         return -ENOMEM;
8269                 *data = mapper;
8270         }
8271
8272         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8273 }
8274
8275 static void
8276 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8277                      unsigned long ip, void *data)
8278 {
8279         struct ftrace_func_mapper *mapper = data;
8280
8281         if (!ip) {
8282                 if (!mapper)
8283                         return;
8284                 free_ftrace_func_mapper(mapper, NULL);
8285                 return;
8286         }
8287
8288         ftrace_func_mapper_remove_ip(mapper, ip);
8289 }
8290
8291 static struct ftrace_probe_ops snapshot_probe_ops = {
8292         .func                   = ftrace_snapshot,
8293         .print                  = ftrace_snapshot_print,
8294 };
8295
8296 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8297         .func                   = ftrace_count_snapshot,
8298         .print                  = ftrace_snapshot_print,
8299         .init                   = ftrace_snapshot_init,
8300         .free                   = ftrace_snapshot_free,
8301 };
8302
8303 static int
8304 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8305                                char *glob, char *cmd, char *param, int enable)
8306 {
8307         struct ftrace_probe_ops *ops;
8308         void *count = (void *)-1;
8309         char *number;
8310         int ret;
8311
8312         if (!tr)
8313                 return -ENODEV;
8314
8315         /* hash funcs only work with set_ftrace_filter */
8316         if (!enable)
8317                 return -EINVAL;
8318
8319         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8320
8321         if (glob[0] == '!')
8322                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8323
8324         if (!param)
8325                 goto out_reg;
8326
8327         number = strsep(&param, ":");
8328
8329         if (!strlen(number))
8330                 goto out_reg;
8331
8332         /*
8333          * We use the callback data field (which is a pointer)
8334          * as our counter.
8335          */
8336         ret = kstrtoul(number, 0, (unsigned long *)&count);
8337         if (ret)
8338                 return ret;
8339
8340  out_reg:
8341         ret = tracing_alloc_snapshot_instance(tr);
8342         if (ret < 0)
8343                 goto out;
8344
8345         ret = register_ftrace_function_probe(glob, tr, ops, count);
8346
8347  out:
8348         return ret < 0 ? ret : 0;
8349 }
8350
8351 static struct ftrace_func_command ftrace_snapshot_cmd = {
8352         .name                   = "snapshot",
8353         .func                   = ftrace_trace_snapshot_callback,
8354 };
8355
8356 static __init int register_snapshot_cmd(void)
8357 {
8358         return register_ftrace_command(&ftrace_snapshot_cmd);
8359 }
8360 #else
8361 static inline __init int register_snapshot_cmd(void) { return 0; }
8362 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8363
8364 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8365 {
8366         if (WARN_ON(!tr->dir))
8367                 return ERR_PTR(-ENODEV);
8368
8369         /* Top directory uses NULL as the parent */
8370         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8371                 return NULL;
8372
8373         /* All sub buffers have a descriptor */
8374         return tr->dir;
8375 }
8376
8377 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8378 {
8379         struct dentry *d_tracer;
8380
8381         if (tr->percpu_dir)
8382                 return tr->percpu_dir;
8383
8384         d_tracer = tracing_get_dentry(tr);
8385         if (IS_ERR(d_tracer))
8386                 return NULL;
8387
8388         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8389
8390         MEM_FAIL(!tr->percpu_dir,
8391                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8392
8393         return tr->percpu_dir;
8394 }
8395
8396 static struct dentry *
8397 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8398                       void *data, long cpu, const struct file_operations *fops)
8399 {
8400         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8401
8402         if (ret) /* See tracing_get_cpu() */
8403                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8404         return ret;
8405 }
8406
8407 static void
8408 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8409 {
8410         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8411         struct dentry *d_cpu;
8412         char cpu_dir[30]; /* 30 characters should be more than enough */
8413
8414         if (!d_percpu)
8415                 return;
8416
8417         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8418         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8419         if (!d_cpu) {
8420                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8421                 return;
8422         }
8423
8424         /* per cpu trace_pipe */
8425         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8426                                 tr, cpu, &tracing_pipe_fops);
8427
8428         /* per cpu trace */
8429         trace_create_cpu_file("trace", 0644, d_cpu,
8430                                 tr, cpu, &tracing_fops);
8431
8432         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8433                                 tr, cpu, &tracing_buffers_fops);
8434
8435         trace_create_cpu_file("stats", 0444, d_cpu,
8436                                 tr, cpu, &tracing_stats_fops);
8437
8438         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8439                                 tr, cpu, &tracing_entries_fops);
8440
8441 #ifdef CONFIG_TRACER_SNAPSHOT
8442         trace_create_cpu_file("snapshot", 0644, d_cpu,
8443                                 tr, cpu, &snapshot_fops);
8444
8445         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8446                                 tr, cpu, &snapshot_raw_fops);
8447 #endif
8448 }
8449
8450 #ifdef CONFIG_FTRACE_SELFTEST
8451 /* Let selftest have access to static functions in this file */
8452 #include "trace_selftest.c"
8453 #endif
8454
8455 static ssize_t
8456 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8457                         loff_t *ppos)
8458 {
8459         struct trace_option_dentry *topt = filp->private_data;
8460         char *buf;
8461
8462         if (topt->flags->val & topt->opt->bit)
8463                 buf = "1\n";
8464         else
8465                 buf = "0\n";
8466
8467         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8468 }
8469
8470 static ssize_t
8471 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8472                          loff_t *ppos)
8473 {
8474         struct trace_option_dentry *topt = filp->private_data;
8475         unsigned long val;
8476         int ret;
8477
8478         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8479         if (ret)
8480                 return ret;
8481
8482         if (val != 0 && val != 1)
8483                 return -EINVAL;
8484
8485         if (!!(topt->flags->val & topt->opt->bit) != val) {
8486                 mutex_lock(&trace_types_lock);
8487                 ret = __set_tracer_option(topt->tr, topt->flags,
8488                                           topt->opt, !val);
8489                 mutex_unlock(&trace_types_lock);
8490                 if (ret)
8491                         return ret;
8492         }
8493
8494         *ppos += cnt;
8495
8496         return cnt;
8497 }
8498
8499
8500 static const struct file_operations trace_options_fops = {
8501         .open = tracing_open_generic,
8502         .read = trace_options_read,
8503         .write = trace_options_write,
8504         .llseek = generic_file_llseek,
8505 };
8506
8507 /*
8508  * In order to pass in both the trace_array descriptor as well as the index
8509  * to the flag that the trace option file represents, the trace_array
8510  * has a character array of trace_flags_index[], which holds the index
8511  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8512  * The address of this character array is passed to the flag option file
8513  * read/write callbacks.
8514  *
8515  * In order to extract both the index and the trace_array descriptor,
8516  * get_tr_index() uses the following algorithm.
8517  *
8518  *   idx = *ptr;
8519  *
8520  * As the pointer itself contains the address of the index (remember
8521  * index[1] == 1).
8522  *
8523  * Then to get the trace_array descriptor, by subtracting that index
8524  * from the ptr, we get to the start of the index itself.
8525  *
8526  *   ptr - idx == &index[0]
8527  *
8528  * Then a simple container_of() from that pointer gets us to the
8529  * trace_array descriptor.
8530  */
8531 static void get_tr_index(void *data, struct trace_array **ptr,
8532                          unsigned int *pindex)
8533 {
8534         *pindex = *(unsigned char *)data;
8535
8536         *ptr = container_of(data - *pindex, struct trace_array,
8537                             trace_flags_index);
8538 }
8539
8540 static ssize_t
8541 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8542                         loff_t *ppos)
8543 {
8544         void *tr_index = filp->private_data;
8545         struct trace_array *tr;
8546         unsigned int index;
8547         char *buf;
8548
8549         get_tr_index(tr_index, &tr, &index);
8550
8551         if (tr->trace_flags & (1 << index))
8552                 buf = "1\n";
8553         else
8554                 buf = "0\n";
8555
8556         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8557 }
8558
8559 static ssize_t
8560 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8561                          loff_t *ppos)
8562 {
8563         void *tr_index = filp->private_data;
8564         struct trace_array *tr;
8565         unsigned int index;
8566         unsigned long val;
8567         int ret;
8568
8569         get_tr_index(tr_index, &tr, &index);
8570
8571         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8572         if (ret)
8573                 return ret;
8574
8575         if (val != 0 && val != 1)
8576                 return -EINVAL;
8577
8578         mutex_lock(&event_mutex);
8579         mutex_lock(&trace_types_lock);
8580         ret = set_tracer_flag(tr, 1 << index, val);
8581         mutex_unlock(&trace_types_lock);
8582         mutex_unlock(&event_mutex);
8583
8584         if (ret < 0)
8585                 return ret;
8586
8587         *ppos += cnt;
8588
8589         return cnt;
8590 }
8591
8592 static const struct file_operations trace_options_core_fops = {
8593         .open = tracing_open_generic,
8594         .read = trace_options_core_read,
8595         .write = trace_options_core_write,
8596         .llseek = generic_file_llseek,
8597 };
8598
8599 struct dentry *trace_create_file(const char *name,
8600                                  umode_t mode,
8601                                  struct dentry *parent,
8602                                  void *data,
8603                                  const struct file_operations *fops)
8604 {
8605         struct dentry *ret;
8606
8607         ret = tracefs_create_file(name, mode, parent, data, fops);
8608         if (!ret)
8609                 pr_warn("Could not create tracefs '%s' entry\n", name);
8610
8611         return ret;
8612 }
8613
8614
8615 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8616 {
8617         struct dentry *d_tracer;
8618
8619         if (tr->options)
8620                 return tr->options;
8621
8622         d_tracer = tracing_get_dentry(tr);
8623         if (IS_ERR(d_tracer))
8624                 return NULL;
8625
8626         tr->options = tracefs_create_dir("options", d_tracer);
8627         if (!tr->options) {
8628                 pr_warn("Could not create tracefs directory 'options'\n");
8629                 return NULL;
8630         }
8631
8632         return tr->options;
8633 }
8634
8635 static void
8636 create_trace_option_file(struct trace_array *tr,
8637                          struct trace_option_dentry *topt,
8638                          struct tracer_flags *flags,
8639                          struct tracer_opt *opt)
8640 {
8641         struct dentry *t_options;
8642
8643         t_options = trace_options_init_dentry(tr);
8644         if (!t_options)
8645                 return;
8646
8647         topt->flags = flags;
8648         topt->opt = opt;
8649         topt->tr = tr;
8650
8651         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8652                                     &trace_options_fops);
8653
8654 }
8655
8656 static void
8657 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8658 {
8659         struct trace_option_dentry *topts;
8660         struct trace_options *tr_topts;
8661         struct tracer_flags *flags;
8662         struct tracer_opt *opts;
8663         int cnt;
8664         int i;
8665
8666         if (!tracer)
8667                 return;
8668
8669         flags = tracer->flags;
8670
8671         if (!flags || !flags->opts)
8672                 return;
8673
8674         /*
8675          * If this is an instance, only create flags for tracers
8676          * the instance may have.
8677          */
8678         if (!trace_ok_for_array(tracer, tr))
8679                 return;
8680
8681         for (i = 0; i < tr->nr_topts; i++) {
8682                 /* Make sure there's no duplicate flags. */
8683                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8684                         return;
8685         }
8686
8687         opts = flags->opts;
8688
8689         for (cnt = 0; opts[cnt].name; cnt++)
8690                 ;
8691
8692         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8693         if (!topts)
8694                 return;
8695
8696         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8697                             GFP_KERNEL);
8698         if (!tr_topts) {
8699                 kfree(topts);
8700                 return;
8701         }
8702
8703         tr->topts = tr_topts;
8704         tr->topts[tr->nr_topts].tracer = tracer;
8705         tr->topts[tr->nr_topts].topts = topts;
8706         tr->nr_topts++;
8707
8708         for (cnt = 0; opts[cnt].name; cnt++) {
8709                 create_trace_option_file(tr, &topts[cnt], flags,
8710                                          &opts[cnt]);
8711                 MEM_FAIL(topts[cnt].entry == NULL,
8712                           "Failed to create trace option: %s",
8713                           opts[cnt].name);
8714         }
8715 }
8716
8717 static struct dentry *
8718 create_trace_option_core_file(struct trace_array *tr,
8719                               const char *option, long index)
8720 {
8721         struct dentry *t_options;
8722
8723         t_options = trace_options_init_dentry(tr);
8724         if (!t_options)
8725                 return NULL;
8726
8727         return trace_create_file(option, 0644, t_options,
8728                                  (void *)&tr->trace_flags_index[index],
8729                                  &trace_options_core_fops);
8730 }
8731
8732 static void create_trace_options_dir(struct trace_array *tr)
8733 {
8734         struct dentry *t_options;
8735         bool top_level = tr == &global_trace;
8736         int i;
8737
8738         t_options = trace_options_init_dentry(tr);
8739         if (!t_options)
8740                 return;
8741
8742         for (i = 0; trace_options[i]; i++) {
8743                 if (top_level ||
8744                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8745                         create_trace_option_core_file(tr, trace_options[i], i);
8746         }
8747 }
8748
8749 static ssize_t
8750 rb_simple_read(struct file *filp, char __user *ubuf,
8751                size_t cnt, loff_t *ppos)
8752 {
8753         struct trace_array *tr = filp->private_data;
8754         char buf[64];
8755         int r;
8756
8757         r = tracer_tracing_is_on(tr);
8758         r = sprintf(buf, "%d\n", r);
8759
8760         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8761 }
8762
8763 static ssize_t
8764 rb_simple_write(struct file *filp, const char __user *ubuf,
8765                 size_t cnt, loff_t *ppos)
8766 {
8767         struct trace_array *tr = filp->private_data;
8768         struct trace_buffer *buffer = tr->array_buffer.buffer;
8769         unsigned long val;
8770         int ret;
8771
8772         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8773         if (ret)
8774                 return ret;
8775
8776         if (buffer) {
8777                 mutex_lock(&trace_types_lock);
8778                 if (!!val == tracer_tracing_is_on(tr)) {
8779                         val = 0; /* do nothing */
8780                 } else if (val) {
8781                         tracer_tracing_on(tr);
8782                         if (tr->current_trace->start)
8783                                 tr->current_trace->start(tr);
8784                 } else {
8785                         tracer_tracing_off(tr);
8786                         if (tr->current_trace->stop)
8787                                 tr->current_trace->stop(tr);
8788                 }
8789                 mutex_unlock(&trace_types_lock);
8790         }
8791
8792         (*ppos)++;
8793
8794         return cnt;
8795 }
8796
8797 static const struct file_operations rb_simple_fops = {
8798         .open           = tracing_open_generic_tr,
8799         .read           = rb_simple_read,
8800         .write          = rb_simple_write,
8801         .release        = tracing_release_generic_tr,
8802         .llseek         = default_llseek,
8803 };
8804
8805 static ssize_t
8806 buffer_percent_read(struct file *filp, char __user *ubuf,
8807                     size_t cnt, loff_t *ppos)
8808 {
8809         struct trace_array *tr = filp->private_data;
8810         char buf[64];
8811         int r;
8812
8813         r = tr->buffer_percent;
8814         r = sprintf(buf, "%d\n", r);
8815
8816         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8817 }
8818
8819 static ssize_t
8820 buffer_percent_write(struct file *filp, const char __user *ubuf,
8821                      size_t cnt, loff_t *ppos)
8822 {
8823         struct trace_array *tr = filp->private_data;
8824         unsigned long val;
8825         int ret;
8826
8827         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8828         if (ret)
8829                 return ret;
8830
8831         if (val > 100)
8832                 return -EINVAL;
8833
8834         if (!val)
8835                 val = 1;
8836
8837         tr->buffer_percent = val;
8838
8839         (*ppos)++;
8840
8841         return cnt;
8842 }
8843
8844 static const struct file_operations buffer_percent_fops = {
8845         .open           = tracing_open_generic_tr,
8846         .read           = buffer_percent_read,
8847         .write          = buffer_percent_write,
8848         .release        = tracing_release_generic_tr,
8849         .llseek         = default_llseek,
8850 };
8851
8852 static struct dentry *trace_instance_dir;
8853
8854 static void
8855 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8856
8857 static int
8858 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8859 {
8860         enum ring_buffer_flags rb_flags;
8861
8862         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8863
8864         buf->tr = tr;
8865
8866         buf->buffer = ring_buffer_alloc(size, rb_flags);
8867         if (!buf->buffer)
8868                 return -ENOMEM;
8869
8870         buf->data = alloc_percpu(struct trace_array_cpu);
8871         if (!buf->data) {
8872                 ring_buffer_free(buf->buffer);
8873                 buf->buffer = NULL;
8874                 return -ENOMEM;
8875         }
8876
8877         /* Allocate the first page for all buffers */
8878         set_buffer_entries(&tr->array_buffer,
8879                            ring_buffer_size(tr->array_buffer.buffer, 0));
8880
8881         return 0;
8882 }
8883
8884 static int allocate_trace_buffers(struct trace_array *tr, int size)
8885 {
8886         int ret;
8887
8888         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8889         if (ret)
8890                 return ret;
8891
8892 #ifdef CONFIG_TRACER_MAX_TRACE
8893         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8894                                     allocate_snapshot ? size : 1);
8895         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8896                 ring_buffer_free(tr->array_buffer.buffer);
8897                 tr->array_buffer.buffer = NULL;
8898                 free_percpu(tr->array_buffer.data);
8899                 tr->array_buffer.data = NULL;
8900                 return -ENOMEM;
8901         }
8902         tr->allocated_snapshot = allocate_snapshot;
8903
8904         /*
8905          * Only the top level trace array gets its snapshot allocated
8906          * from the kernel command line.
8907          */
8908         allocate_snapshot = false;
8909 #endif
8910
8911         return 0;
8912 }
8913
8914 static void free_trace_buffer(struct array_buffer *buf)
8915 {
8916         if (buf->buffer) {
8917                 ring_buffer_free(buf->buffer);
8918                 buf->buffer = NULL;
8919                 free_percpu(buf->data);
8920                 buf->data = NULL;
8921         }
8922 }
8923
8924 static void free_trace_buffers(struct trace_array *tr)
8925 {
8926         if (!tr)
8927                 return;
8928
8929         free_trace_buffer(&tr->array_buffer);
8930
8931 #ifdef CONFIG_TRACER_MAX_TRACE
8932         free_trace_buffer(&tr->max_buffer);
8933 #endif
8934 }
8935
8936 static void init_trace_flags_index(struct trace_array *tr)
8937 {
8938         int i;
8939
8940         /* Used by the trace options files */
8941         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8942                 tr->trace_flags_index[i] = i;
8943 }
8944
8945 static void __update_tracer_options(struct trace_array *tr)
8946 {
8947         struct tracer *t;
8948
8949         for (t = trace_types; t; t = t->next)
8950                 add_tracer_options(tr, t);
8951 }
8952
8953 static void update_tracer_options(struct trace_array *tr)
8954 {
8955         mutex_lock(&trace_types_lock);
8956         __update_tracer_options(tr);
8957         mutex_unlock(&trace_types_lock);
8958 }
8959
8960 /* Must have trace_types_lock held */
8961 struct trace_array *trace_array_find(const char *instance)
8962 {
8963         struct trace_array *tr, *found = NULL;
8964
8965         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8966                 if (tr->name && strcmp(tr->name, instance) == 0) {
8967                         found = tr;
8968                         break;
8969                 }
8970         }
8971
8972         return found;
8973 }
8974
8975 struct trace_array *trace_array_find_get(const char *instance)
8976 {
8977         struct trace_array *tr;
8978
8979         mutex_lock(&trace_types_lock);
8980         tr = trace_array_find(instance);
8981         if (tr)
8982                 tr->ref++;
8983         mutex_unlock(&trace_types_lock);
8984
8985         return tr;
8986 }
8987
8988 static int trace_array_create_dir(struct trace_array *tr)
8989 {
8990         int ret;
8991
8992         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8993         if (!tr->dir)
8994                 return -EINVAL;
8995
8996         ret = event_trace_add_tracer(tr->dir, tr);
8997         if (ret)
8998                 tracefs_remove(tr->dir);
8999
9000         init_tracer_tracefs(tr, tr->dir);
9001         __update_tracer_options(tr);
9002
9003         return ret;
9004 }
9005
9006 static struct trace_array *trace_array_create(const char *name)
9007 {
9008         struct trace_array *tr;
9009         int ret;
9010
9011         ret = -ENOMEM;
9012         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9013         if (!tr)
9014                 return ERR_PTR(ret);
9015
9016         tr->name = kstrdup(name, GFP_KERNEL);
9017         if (!tr->name)
9018                 goto out_free_tr;
9019
9020         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9021                 goto out_free_tr;
9022
9023         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9024
9025         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9026
9027         raw_spin_lock_init(&tr->start_lock);
9028
9029         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9030
9031         tr->current_trace = &nop_trace;
9032
9033         INIT_LIST_HEAD(&tr->systems);
9034         INIT_LIST_HEAD(&tr->events);
9035         INIT_LIST_HEAD(&tr->hist_vars);
9036         INIT_LIST_HEAD(&tr->err_log);
9037
9038         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9039                 goto out_free_tr;
9040
9041         if (ftrace_allocate_ftrace_ops(tr) < 0)
9042                 goto out_free_tr;
9043
9044         ftrace_init_trace_array(tr);
9045
9046         init_trace_flags_index(tr);
9047
9048         if (trace_instance_dir) {
9049                 ret = trace_array_create_dir(tr);
9050                 if (ret)
9051                         goto out_free_tr;
9052         } else
9053                 __trace_early_add_events(tr);
9054
9055         list_add(&tr->list, &ftrace_trace_arrays);
9056
9057         tr->ref++;
9058
9059         return tr;
9060
9061  out_free_tr:
9062         ftrace_free_ftrace_ops(tr);
9063         free_trace_buffers(tr);
9064         free_cpumask_var(tr->tracing_cpumask);
9065         kfree(tr->name);
9066         kfree(tr);
9067
9068         return ERR_PTR(ret);
9069 }
9070
9071 static int instance_mkdir(const char *name)
9072 {
9073         struct trace_array *tr;
9074         int ret;
9075
9076         mutex_lock(&event_mutex);
9077         mutex_lock(&trace_types_lock);
9078
9079         ret = -EEXIST;
9080         if (trace_array_find(name))
9081                 goto out_unlock;
9082
9083         tr = trace_array_create(name);
9084
9085         ret = PTR_ERR_OR_ZERO(tr);
9086
9087 out_unlock:
9088         mutex_unlock(&trace_types_lock);
9089         mutex_unlock(&event_mutex);
9090         return ret;
9091 }
9092
9093 /**
9094  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9095  * @name: The name of the trace array to be looked up/created.
9096  *
9097  * Returns pointer to trace array with given name.
9098  * NULL, if it cannot be created.
9099  *
9100  * NOTE: This function increments the reference counter associated with the
9101  * trace array returned. This makes sure it cannot be freed while in use.
9102  * Use trace_array_put() once the trace array is no longer needed.
9103  * If the trace_array is to be freed, trace_array_destroy() needs to
9104  * be called after the trace_array_put(), or simply let user space delete
9105  * it from the tracefs instances directory. But until the
9106  * trace_array_put() is called, user space can not delete it.
9107  *
9108  */
9109 struct trace_array *trace_array_get_by_name(const char *name)
9110 {
9111         struct trace_array *tr;
9112
9113         mutex_lock(&event_mutex);
9114         mutex_lock(&trace_types_lock);
9115
9116         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9117                 if (tr->name && strcmp(tr->name, name) == 0)
9118                         goto out_unlock;
9119         }
9120
9121         tr = trace_array_create(name);
9122
9123         if (IS_ERR(tr))
9124                 tr = NULL;
9125 out_unlock:
9126         if (tr)
9127                 tr->ref++;
9128
9129         mutex_unlock(&trace_types_lock);
9130         mutex_unlock(&event_mutex);
9131         return tr;
9132 }
9133 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9134
9135 static int __remove_instance(struct trace_array *tr)
9136 {
9137         int i;
9138
9139         /* Reference counter for a newly created trace array = 1. */
9140         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9141                 return -EBUSY;
9142
9143         list_del(&tr->list);
9144
9145         /* Disable all the flags that were enabled coming in */
9146         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9147                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9148                         set_tracer_flag(tr, 1 << i, 0);
9149         }
9150
9151         tracing_set_nop(tr);
9152         clear_ftrace_function_probes(tr);
9153         event_trace_del_tracer(tr);
9154         ftrace_clear_pids(tr);
9155         ftrace_destroy_function_files(tr);
9156         tracefs_remove(tr->dir);
9157         free_percpu(tr->last_func_repeats);
9158         free_trace_buffers(tr);
9159
9160         for (i = 0; i < tr->nr_topts; i++) {
9161                 kfree(tr->topts[i].topts);
9162         }
9163         kfree(tr->topts);
9164
9165         free_cpumask_var(tr->tracing_cpumask);
9166         kfree(tr->name);
9167         kfree(tr);
9168
9169         return 0;
9170 }
9171
9172 int trace_array_destroy(struct trace_array *this_tr)
9173 {
9174         struct trace_array *tr;
9175         int ret;
9176
9177         if (!this_tr)
9178                 return -EINVAL;
9179
9180         mutex_lock(&event_mutex);
9181         mutex_lock(&trace_types_lock);
9182
9183         ret = -ENODEV;
9184
9185         /* Making sure trace array exists before destroying it. */
9186         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9187                 if (tr == this_tr) {
9188                         ret = __remove_instance(tr);
9189                         break;
9190                 }
9191         }
9192
9193         mutex_unlock(&trace_types_lock);
9194         mutex_unlock(&event_mutex);
9195
9196         return ret;
9197 }
9198 EXPORT_SYMBOL_GPL(trace_array_destroy);
9199
9200 static int instance_rmdir(const char *name)
9201 {
9202         struct trace_array *tr;
9203         int ret;
9204
9205         mutex_lock(&event_mutex);
9206         mutex_lock(&trace_types_lock);
9207
9208         ret = -ENODEV;
9209         tr = trace_array_find(name);
9210         if (tr)
9211                 ret = __remove_instance(tr);
9212
9213         mutex_unlock(&trace_types_lock);
9214         mutex_unlock(&event_mutex);
9215
9216         return ret;
9217 }
9218
9219 static __init void create_trace_instances(struct dentry *d_tracer)
9220 {
9221         struct trace_array *tr;
9222
9223         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9224                                                          instance_mkdir,
9225                                                          instance_rmdir);
9226         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9227                 return;
9228
9229         mutex_lock(&event_mutex);
9230         mutex_lock(&trace_types_lock);
9231
9232         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9233                 if (!tr->name)
9234                         continue;
9235                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9236                              "Failed to create instance directory\n"))
9237                         break;
9238         }
9239
9240         mutex_unlock(&trace_types_lock);
9241         mutex_unlock(&event_mutex);
9242 }
9243
9244 static void
9245 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9246 {
9247         struct trace_event_file *file;
9248         int cpu;
9249
9250         trace_create_file("available_tracers", 0444, d_tracer,
9251                         tr, &show_traces_fops);
9252
9253         trace_create_file("current_tracer", 0644, d_tracer,
9254                         tr, &set_tracer_fops);
9255
9256         trace_create_file("tracing_cpumask", 0644, d_tracer,
9257                           tr, &tracing_cpumask_fops);
9258
9259         trace_create_file("trace_options", 0644, d_tracer,
9260                           tr, &tracing_iter_fops);
9261
9262         trace_create_file("trace", 0644, d_tracer,
9263                           tr, &tracing_fops);
9264
9265         trace_create_file("trace_pipe", 0444, d_tracer,
9266                           tr, &tracing_pipe_fops);
9267
9268         trace_create_file("buffer_size_kb", 0644, d_tracer,
9269                           tr, &tracing_entries_fops);
9270
9271         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9272                           tr, &tracing_total_entries_fops);
9273
9274         trace_create_file("free_buffer", 0200, d_tracer,
9275                           tr, &tracing_free_buffer_fops);
9276
9277         trace_create_file("trace_marker", 0220, d_tracer,
9278                           tr, &tracing_mark_fops);
9279
9280         file = __find_event_file(tr, "ftrace", "print");
9281         if (file && file->dir)
9282                 trace_create_file("trigger", 0644, file->dir, file,
9283                                   &event_trigger_fops);
9284         tr->trace_marker_file = file;
9285
9286         trace_create_file("trace_marker_raw", 0220, d_tracer,
9287                           tr, &tracing_mark_raw_fops);
9288
9289         trace_create_file("trace_clock", 0644, d_tracer, tr,
9290                           &trace_clock_fops);
9291
9292         trace_create_file("tracing_on", 0644, d_tracer,
9293                           tr, &rb_simple_fops);
9294
9295         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9296                           &trace_time_stamp_mode_fops);
9297
9298         tr->buffer_percent = 50;
9299
9300         trace_create_file("buffer_percent", 0444, d_tracer,
9301                         tr, &buffer_percent_fops);
9302
9303         create_trace_options_dir(tr);
9304
9305 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9306         trace_create_maxlat_file(tr, d_tracer);
9307 #endif
9308
9309         if (ftrace_create_function_files(tr, d_tracer))
9310                 MEM_FAIL(1, "Could not allocate function filter files");
9311
9312 #ifdef CONFIG_TRACER_SNAPSHOT
9313         trace_create_file("snapshot", 0644, d_tracer,
9314                           tr, &snapshot_fops);
9315 #endif
9316
9317         trace_create_file("error_log", 0644, d_tracer,
9318                           tr, &tracing_err_log_fops);
9319
9320         for_each_tracing_cpu(cpu)
9321                 tracing_init_tracefs_percpu(tr, cpu);
9322
9323         ftrace_init_tracefs(tr, d_tracer);
9324 }
9325
9326 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9327 {
9328         struct vfsmount *mnt;
9329         struct file_system_type *type;
9330
9331         /*
9332          * To maintain backward compatibility for tools that mount
9333          * debugfs to get to the tracing facility, tracefs is automatically
9334          * mounted to the debugfs/tracing directory.
9335          */
9336         type = get_fs_type("tracefs");
9337         if (!type)
9338                 return NULL;
9339         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9340         put_filesystem(type);
9341         if (IS_ERR(mnt))
9342                 return NULL;
9343         mntget(mnt);
9344
9345         return mnt;
9346 }
9347
9348 /**
9349  * tracing_init_dentry - initialize top level trace array
9350  *
9351  * This is called when creating files or directories in the tracing
9352  * directory. It is called via fs_initcall() by any of the boot up code
9353  * and expects to return the dentry of the top level tracing directory.
9354  */
9355 int tracing_init_dentry(void)
9356 {
9357         struct trace_array *tr = &global_trace;
9358
9359         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9360                 pr_warn("Tracing disabled due to lockdown\n");
9361                 return -EPERM;
9362         }
9363
9364         /* The top level trace array uses  NULL as parent */
9365         if (tr->dir)
9366                 return 0;
9367
9368         if (WARN_ON(!tracefs_initialized()))
9369                 return -ENODEV;
9370
9371         /*
9372          * As there may still be users that expect the tracing
9373          * files to exist in debugfs/tracing, we must automount
9374          * the tracefs file system there, so older tools still
9375          * work with the newer kernel.
9376          */
9377         tr->dir = debugfs_create_automount("tracing", NULL,
9378                                            trace_automount, NULL);
9379
9380         return 0;
9381 }
9382
9383 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9384 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9385
9386 static struct workqueue_struct *eval_map_wq __initdata;
9387 static struct work_struct eval_map_work __initdata;
9388
9389 static void __init eval_map_work_func(struct work_struct *work)
9390 {
9391         int len;
9392
9393         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9394         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9395 }
9396
9397 static int __init trace_eval_init(void)
9398 {
9399         INIT_WORK(&eval_map_work, eval_map_work_func);
9400
9401         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9402         if (!eval_map_wq) {
9403                 pr_err("Unable to allocate eval_map_wq\n");
9404                 /* Do work here */
9405                 eval_map_work_func(&eval_map_work);
9406                 return -ENOMEM;
9407         }
9408
9409         queue_work(eval_map_wq, &eval_map_work);
9410         return 0;
9411 }
9412
9413 static int __init trace_eval_sync(void)
9414 {
9415         /* Make sure the eval map updates are finished */
9416         if (eval_map_wq)
9417                 destroy_workqueue(eval_map_wq);
9418         return 0;
9419 }
9420
9421 late_initcall_sync(trace_eval_sync);
9422
9423
9424 #ifdef CONFIG_MODULES
9425 static void trace_module_add_evals(struct module *mod)
9426 {
9427         if (!mod->num_trace_evals)
9428                 return;
9429
9430         /*
9431          * Modules with bad taint do not have events created, do
9432          * not bother with enums either.
9433          */
9434         if (trace_module_has_bad_taint(mod))
9435                 return;
9436
9437         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9438 }
9439
9440 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9441 static void trace_module_remove_evals(struct module *mod)
9442 {
9443         union trace_eval_map_item *map;
9444         union trace_eval_map_item **last = &trace_eval_maps;
9445
9446         if (!mod->num_trace_evals)
9447                 return;
9448
9449         mutex_lock(&trace_eval_mutex);
9450
9451         map = trace_eval_maps;
9452
9453         while (map) {
9454                 if (map->head.mod == mod)
9455                         break;
9456                 map = trace_eval_jmp_to_tail(map);
9457                 last = &map->tail.next;
9458                 map = map->tail.next;
9459         }
9460         if (!map)
9461                 goto out;
9462
9463         *last = trace_eval_jmp_to_tail(map)->tail.next;
9464         kfree(map);
9465  out:
9466         mutex_unlock(&trace_eval_mutex);
9467 }
9468 #else
9469 static inline void trace_module_remove_evals(struct module *mod) { }
9470 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9471
9472 static int trace_module_notify(struct notifier_block *self,
9473                                unsigned long val, void *data)
9474 {
9475         struct module *mod = data;
9476
9477         switch (val) {
9478         case MODULE_STATE_COMING:
9479                 trace_module_add_evals(mod);
9480                 break;
9481         case MODULE_STATE_GOING:
9482                 trace_module_remove_evals(mod);
9483                 break;
9484         }
9485
9486         return NOTIFY_OK;
9487 }
9488
9489 static struct notifier_block trace_module_nb = {
9490         .notifier_call = trace_module_notify,
9491         .priority = 0,
9492 };
9493 #endif /* CONFIG_MODULES */
9494
9495 static __init int tracer_init_tracefs(void)
9496 {
9497         int ret;
9498
9499         trace_access_lock_init();
9500
9501         ret = tracing_init_dentry();
9502         if (ret)
9503                 return 0;
9504
9505         event_trace_init();
9506
9507         init_tracer_tracefs(&global_trace, NULL);
9508         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9509
9510         trace_create_file("tracing_thresh", 0644, NULL,
9511                         &global_trace, &tracing_thresh_fops);
9512
9513         trace_create_file("README", 0444, NULL,
9514                         NULL, &tracing_readme_fops);
9515
9516         trace_create_file("saved_cmdlines", 0444, NULL,
9517                         NULL, &tracing_saved_cmdlines_fops);
9518
9519         trace_create_file("saved_cmdlines_size", 0644, NULL,
9520                           NULL, &tracing_saved_cmdlines_size_fops);
9521
9522         trace_create_file("saved_tgids", 0444, NULL,
9523                         NULL, &tracing_saved_tgids_fops);
9524
9525         trace_eval_init();
9526
9527         trace_create_eval_file(NULL);
9528
9529 #ifdef CONFIG_MODULES
9530         register_module_notifier(&trace_module_nb);
9531 #endif
9532
9533 #ifdef CONFIG_DYNAMIC_FTRACE
9534         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9535                         NULL, &tracing_dyn_info_fops);
9536 #endif
9537
9538         create_trace_instances(NULL);
9539
9540         update_tracer_options(&global_trace);
9541
9542         return 0;
9543 }
9544
9545 static int trace_panic_handler(struct notifier_block *this,
9546                                unsigned long event, void *unused)
9547 {
9548         if (ftrace_dump_on_oops)
9549                 ftrace_dump(ftrace_dump_on_oops);
9550         return NOTIFY_OK;
9551 }
9552
9553 static struct notifier_block trace_panic_notifier = {
9554         .notifier_call  = trace_panic_handler,
9555         .next           = NULL,
9556         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9557 };
9558
9559 static int trace_die_handler(struct notifier_block *self,
9560                              unsigned long val,
9561                              void *data)
9562 {
9563         switch (val) {
9564         case DIE_OOPS:
9565                 if (ftrace_dump_on_oops)
9566                         ftrace_dump(ftrace_dump_on_oops);
9567                 break;
9568         default:
9569                 break;
9570         }
9571         return NOTIFY_OK;
9572 }
9573
9574 static struct notifier_block trace_die_notifier = {
9575         .notifier_call = trace_die_handler,
9576         .priority = 200
9577 };
9578
9579 /*
9580  * printk is set to max of 1024, we really don't need it that big.
9581  * Nothing should be printing 1000 characters anyway.
9582  */
9583 #define TRACE_MAX_PRINT         1000
9584
9585 /*
9586  * Define here KERN_TRACE so that we have one place to modify
9587  * it if we decide to change what log level the ftrace dump
9588  * should be at.
9589  */
9590 #define KERN_TRACE              KERN_EMERG
9591
9592 void
9593 trace_printk_seq(struct trace_seq *s)
9594 {
9595         /* Probably should print a warning here. */
9596         if (s->seq.len >= TRACE_MAX_PRINT)
9597                 s->seq.len = TRACE_MAX_PRINT;
9598
9599         /*
9600          * More paranoid code. Although the buffer size is set to
9601          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9602          * an extra layer of protection.
9603          */
9604         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9605                 s->seq.len = s->seq.size - 1;
9606
9607         /* should be zero ended, but we are paranoid. */
9608         s->buffer[s->seq.len] = 0;
9609
9610         printk(KERN_TRACE "%s", s->buffer);
9611
9612         trace_seq_init(s);
9613 }
9614
9615 void trace_init_global_iter(struct trace_iterator *iter)
9616 {
9617         iter->tr = &global_trace;
9618         iter->trace = iter->tr->current_trace;
9619         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9620         iter->array_buffer = &global_trace.array_buffer;
9621
9622         if (iter->trace && iter->trace->open)
9623                 iter->trace->open(iter);
9624
9625         /* Annotate start of buffers if we had overruns */
9626         if (ring_buffer_overruns(iter->array_buffer->buffer))
9627                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9628
9629         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9630         if (trace_clocks[iter->tr->clock_id].in_ns)
9631                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9632 }
9633
9634 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9635 {
9636         /* use static because iter can be a bit big for the stack */
9637         static struct trace_iterator iter;
9638         static atomic_t dump_running;
9639         struct trace_array *tr = &global_trace;
9640         unsigned int old_userobj;
9641         unsigned long flags;
9642         int cnt = 0, cpu;
9643
9644         /* Only allow one dump user at a time. */
9645         if (atomic_inc_return(&dump_running) != 1) {
9646                 atomic_dec(&dump_running);
9647                 return;
9648         }
9649
9650         /*
9651          * Always turn off tracing when we dump.
9652          * We don't need to show trace output of what happens
9653          * between multiple crashes.
9654          *
9655          * If the user does a sysrq-z, then they can re-enable
9656          * tracing with echo 1 > tracing_on.
9657          */
9658         tracing_off();
9659
9660         local_irq_save(flags);
9661         printk_nmi_direct_enter();
9662
9663         /* Simulate the iterator */
9664         trace_init_global_iter(&iter);
9665         /* Can not use kmalloc for iter.temp and iter.fmt */
9666         iter.temp = static_temp_buf;
9667         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9668         iter.fmt = static_fmt_buf;
9669         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9670
9671         for_each_tracing_cpu(cpu) {
9672                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9673         }
9674
9675         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9676
9677         /* don't look at user memory in panic mode */
9678         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9679
9680         switch (oops_dump_mode) {
9681         case DUMP_ALL:
9682                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9683                 break;
9684         case DUMP_ORIG:
9685                 iter.cpu_file = raw_smp_processor_id();
9686                 break;
9687         case DUMP_NONE:
9688                 goto out_enable;
9689         default:
9690                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9691                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9692         }
9693
9694         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9695
9696         /* Did function tracer already get disabled? */
9697         if (ftrace_is_dead()) {
9698                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9699                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9700         }
9701
9702         /*
9703          * We need to stop all tracing on all CPUS to read
9704          * the next buffer. This is a bit expensive, but is
9705          * not done often. We fill all what we can read,
9706          * and then release the locks again.
9707          */
9708
9709         while (!trace_empty(&iter)) {
9710
9711                 if (!cnt)
9712                         printk(KERN_TRACE "---------------------------------\n");
9713
9714                 cnt++;
9715
9716                 trace_iterator_reset(&iter);
9717                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9718
9719                 if (trace_find_next_entry_inc(&iter) != NULL) {
9720                         int ret;
9721
9722                         ret = print_trace_line(&iter);
9723                         if (ret != TRACE_TYPE_NO_CONSUME)
9724                                 trace_consume(&iter);
9725                 }
9726                 touch_nmi_watchdog();
9727
9728                 trace_printk_seq(&iter.seq);
9729         }
9730
9731         if (!cnt)
9732                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9733         else
9734                 printk(KERN_TRACE "---------------------------------\n");
9735
9736  out_enable:
9737         tr->trace_flags |= old_userobj;
9738
9739         for_each_tracing_cpu(cpu) {
9740                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9741         }
9742         atomic_dec(&dump_running);
9743         printk_nmi_direct_exit();
9744         local_irq_restore(flags);
9745 }
9746 EXPORT_SYMBOL_GPL(ftrace_dump);
9747
9748 #define WRITE_BUFSIZE  4096
9749
9750 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9751                                 size_t count, loff_t *ppos,
9752                                 int (*createfn)(const char *))
9753 {
9754         char *kbuf, *buf, *tmp;
9755         int ret = 0;
9756         size_t done = 0;
9757         size_t size;
9758
9759         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9760         if (!kbuf)
9761                 return -ENOMEM;
9762
9763         while (done < count) {
9764                 size = count - done;
9765
9766                 if (size >= WRITE_BUFSIZE)
9767                         size = WRITE_BUFSIZE - 1;
9768
9769                 if (copy_from_user(kbuf, buffer + done, size)) {
9770                         ret = -EFAULT;
9771                         goto out;
9772                 }
9773                 kbuf[size] = '\0';
9774                 buf = kbuf;
9775                 do {
9776                         tmp = strchr(buf, '\n');
9777                         if (tmp) {
9778                                 *tmp = '\0';
9779                                 size = tmp - buf + 1;
9780                         } else {
9781                                 size = strlen(buf);
9782                                 if (done + size < count) {
9783                                         if (buf != kbuf)
9784                                                 break;
9785                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9786                                         pr_warn("Line length is too long: Should be less than %d\n",
9787                                                 WRITE_BUFSIZE - 2);
9788                                         ret = -EINVAL;
9789                                         goto out;
9790                                 }
9791                         }
9792                         done += size;
9793
9794                         /* Remove comments */
9795                         tmp = strchr(buf, '#');
9796
9797                         if (tmp)
9798                                 *tmp = '\0';
9799
9800                         ret = createfn(buf);
9801                         if (ret)
9802                                 goto out;
9803                         buf += size;
9804
9805                 } while (done < count);
9806         }
9807         ret = done;
9808
9809 out:
9810         kfree(kbuf);
9811
9812         return ret;
9813 }
9814
9815 __init static int tracer_alloc_buffers(void)
9816 {
9817         int ring_buf_size;
9818         int ret = -ENOMEM;
9819
9820
9821         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9822                 pr_warn("Tracing disabled due to lockdown\n");
9823                 return -EPERM;
9824         }
9825
9826         /*
9827          * Make sure we don't accidentally add more trace options
9828          * than we have bits for.
9829          */
9830         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9831
9832         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9833                 goto out;
9834
9835         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9836                 goto out_free_buffer_mask;
9837
9838         /* Only allocate trace_printk buffers if a trace_printk exists */
9839         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9840                 /* Must be called before global_trace.buffer is allocated */
9841                 trace_printk_init_buffers();
9842
9843         /* To save memory, keep the ring buffer size to its minimum */
9844         if (ring_buffer_expanded)
9845                 ring_buf_size = trace_buf_size;
9846         else
9847                 ring_buf_size = 1;
9848
9849         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9850         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9851
9852         raw_spin_lock_init(&global_trace.start_lock);
9853
9854         /*
9855          * The prepare callbacks allocates some memory for the ring buffer. We
9856          * don't free the buffer if the CPU goes down. If we were to free
9857          * the buffer, then the user would lose any trace that was in the
9858          * buffer. The memory will be removed once the "instance" is removed.
9859          */
9860         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9861                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9862                                       NULL);
9863         if (ret < 0)
9864                 goto out_free_cpumask;
9865         /* Used for event triggers */
9866         ret = -ENOMEM;
9867         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9868         if (!temp_buffer)
9869                 goto out_rm_hp_state;
9870
9871         if (trace_create_savedcmd() < 0)
9872                 goto out_free_temp_buffer;
9873
9874         /* TODO: make the number of buffers hot pluggable with CPUS */
9875         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9876                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9877                 goto out_free_savedcmd;
9878         }
9879
9880         if (global_trace.buffer_disabled)
9881                 tracing_off();
9882
9883         if (trace_boot_clock) {
9884                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9885                 if (ret < 0)
9886                         pr_warn("Trace clock %s not defined, going back to default\n",
9887                                 trace_boot_clock);
9888         }
9889
9890         /*
9891          * register_tracer() might reference current_trace, so it
9892          * needs to be set before we register anything. This is
9893          * just a bootstrap of current_trace anyway.
9894          */
9895         global_trace.current_trace = &nop_trace;
9896
9897         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9898
9899         ftrace_init_global_array_ops(&global_trace);
9900
9901         init_trace_flags_index(&global_trace);
9902
9903         register_tracer(&nop_trace);
9904
9905         /* Function tracing may start here (via kernel command line) */
9906         init_function_trace();
9907
9908         /* All seems OK, enable tracing */
9909         tracing_disabled = 0;
9910
9911         atomic_notifier_chain_register(&panic_notifier_list,
9912                                        &trace_panic_notifier);
9913
9914         register_die_notifier(&trace_die_notifier);
9915
9916         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9917
9918         INIT_LIST_HEAD(&global_trace.systems);
9919         INIT_LIST_HEAD(&global_trace.events);
9920         INIT_LIST_HEAD(&global_trace.hist_vars);
9921         INIT_LIST_HEAD(&global_trace.err_log);
9922         list_add(&global_trace.list, &ftrace_trace_arrays);
9923
9924         apply_trace_boot_options();
9925
9926         register_snapshot_cmd();
9927
9928         test_can_verify();
9929
9930         return 0;
9931
9932 out_free_savedcmd:
9933         free_saved_cmdlines_buffer(savedcmd);
9934 out_free_temp_buffer:
9935         ring_buffer_free(temp_buffer);
9936 out_rm_hp_state:
9937         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9938 out_free_cpumask:
9939         free_cpumask_var(global_trace.tracing_cpumask);
9940 out_free_buffer_mask:
9941         free_cpumask_var(tracing_buffer_mask);
9942 out:
9943         return ret;
9944 }
9945
9946 void __init early_trace_init(void)
9947 {
9948         if (tracepoint_printk) {
9949                 tracepoint_print_iter =
9950                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9951                 if (MEM_FAIL(!tracepoint_print_iter,
9952                              "Failed to allocate trace iterator\n"))
9953                         tracepoint_printk = 0;
9954                 else
9955                         static_key_enable(&tracepoint_printk_key.key);
9956         }
9957         tracer_alloc_buffers();
9958 }
9959
9960 void __init trace_init(void)
9961 {
9962         trace_event_init();
9963 }
9964
9965 __init static int clear_boot_tracer(void)
9966 {
9967         /*
9968          * The default tracer at boot buffer is an init section.
9969          * This function is called in lateinit. If we did not
9970          * find the boot tracer, then clear it out, to prevent
9971          * later registration from accessing the buffer that is
9972          * about to be freed.
9973          */
9974         if (!default_bootup_tracer)
9975                 return 0;
9976
9977         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9978                default_bootup_tracer);
9979         default_bootup_tracer = NULL;
9980
9981         return 0;
9982 }
9983
9984 fs_initcall(tracer_init_tracefs);
9985 late_initcall_sync(clear_boot_tracer);
9986
9987 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9988 __init static int tracing_set_default_clock(void)
9989 {
9990         /* sched_clock_stable() is determined in late_initcall */
9991         if (!trace_boot_clock && !sched_clock_stable()) {
9992                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9993                         pr_warn("Can not set tracing clock due to lockdown\n");
9994                         return -EPERM;
9995                 }
9996
9997                 printk(KERN_WARNING
9998                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9999                        "If you want to keep using the local clock, then add:\n"
10000                        "  \"trace_clock=local\"\n"
10001                        "on the kernel command line\n");
10002                 tracing_set_clock(&global_trace, "global");
10003         }
10004
10005         return 0;
10006 }
10007 late_initcall_sync(tracing_set_default_clock);
10008 #endif