tracing: Add better comments for the filtering temp buffer use case
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
412          TRACE_ITER_HASH_PTR)
413
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
416                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
421
422 /*
423  * The global_trace is the descriptor that holds the top-level tracing
424  * buffers for the live tracing.
425  */
426 static struct trace_array global_trace = {
427         .trace_flags = TRACE_DEFAULT_FLAGS,
428 };
429
430 LIST_HEAD(ftrace_trace_arrays);
431
432 int trace_array_get(struct trace_array *this_tr)
433 {
434         struct trace_array *tr;
435         int ret = -ENODEV;
436
437         mutex_lock(&trace_types_lock);
438         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
439                 if (tr == this_tr) {
440                         tr->ref++;
441                         ret = 0;
442                         break;
443                 }
444         }
445         mutex_unlock(&trace_types_lock);
446
447         return ret;
448 }
449
450 static void __trace_array_put(struct trace_array *this_tr)
451 {
452         WARN_ON(!this_tr->ref);
453         this_tr->ref--;
454 }
455
456 /**
457  * trace_array_put - Decrement the reference counter for this trace array.
458  * @this_tr : pointer to the trace array
459  *
460  * NOTE: Use this when we no longer need the trace array returned by
461  * trace_array_get_by_name(). This ensures the trace array can be later
462  * destroyed.
463  *
464  */
465 void trace_array_put(struct trace_array *this_tr)
466 {
467         if (!this_tr)
468                 return;
469
470         mutex_lock(&trace_types_lock);
471         __trace_array_put(this_tr);
472         mutex_unlock(&trace_types_lock);
473 }
474 EXPORT_SYMBOL_GPL(trace_array_put);
475
476 int tracing_check_open_get_tr(struct trace_array *tr)
477 {
478         int ret;
479
480         ret = security_locked_down(LOCKDOWN_TRACEFS);
481         if (ret)
482                 return ret;
483
484         if (tracing_disabled)
485                 return -ENODEV;
486
487         if (tr && trace_array_get(tr) < 0)
488                 return -ENODEV;
489
490         return 0;
491 }
492
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494                               struct trace_buffer *buffer,
495                               struct ring_buffer_event *event)
496 {
497         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498             !filter_match_preds(call->filter, rec)) {
499                 __trace_event_discard_commit(buffer, event);
500                 return 1;
501         }
502
503         return 0;
504 }
505
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 {
508         vfree(pid_list->pids);
509         kfree(pid_list);
510 }
511
512 /**
513  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514  * @filtered_pids: The list of pids to check
515  * @search_pid: The PID to find in @filtered_pids
516  *
517  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
518  */
519 bool
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
521 {
522         /*
523          * If pid_max changed after filtered_pids was created, we
524          * by default ignore all pids greater than the previous pid_max.
525          */
526         if (search_pid >= filtered_pids->pid_max)
527                 return false;
528
529         return test_bit(search_pid, filtered_pids->pids);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* Sorry, but we don't support pid_max changing after setting */
587         if (task->pid >= pid_list->pid_max)
588                 return;
589
590         /* "self" is set for forks, and NULL for exits */
591         if (self)
592                 set_bit(task->pid, pid_list->pids);
593         else
594                 clear_bit(task->pid, pid_list->pids);
595 }
596
597 /**
598  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599  * @pid_list: The pid list to show
600  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601  * @pos: The position of the file
602  *
603  * This is used by the seq_file "next" operation to iterate the pids
604  * listed in a trace_pid_list structure.
605  *
606  * Returns the pid+1 as we want to display pid of zero, but NULL would
607  * stop the iteration.
608  */
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 {
611         unsigned long pid = (unsigned long)v;
612
613         (*pos)++;
614
615         /* pid already is +1 of the actual previous bit */
616         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617
618         /* Return pid + 1 to allow zero to be represented */
619         if (pid < pid_list->pid_max)
620                 return (void *)(pid + 1);
621
622         return NULL;
623 }
624
625 /**
626  * trace_pid_start - Used for seq_file to start reading pid lists
627  * @pid_list: The pid list to show
628  * @pos: The position of the file
629  *
630  * This is used by seq_file "start" operation to start the iteration
631  * of listing pids.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
637 {
638         unsigned long pid;
639         loff_t l = 0;
640
641         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642         if (pid >= pid_list->pid_max)
643                 return NULL;
644
645         /* Return pid + 1 so that zero can be the exit value */
646         for (pid++; pid && l < *pos;
647              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
648                 ;
649         return (void *)pid;
650 }
651
652 /**
653  * trace_pid_show - show the current pid in seq_file processing
654  * @m: The seq_file structure to write into
655  * @v: A void pointer of the pid (+1) value to display
656  *
657  * Can be directly used by seq_file operations to display the current
658  * pid value.
659  */
660 int trace_pid_show(struct seq_file *m, void *v)
661 {
662         unsigned long pid = (unsigned long)v - 1;
663
664         seq_printf(m, "%lu\n", pid);
665         return 0;
666 }
667
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE            127
670
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672                     struct trace_pid_list **new_pid_list,
673                     const char __user *ubuf, size_t cnt)
674 {
675         struct trace_pid_list *pid_list;
676         struct trace_parser parser;
677         unsigned long val;
678         int nr_pids = 0;
679         ssize_t read = 0;
680         ssize_t ret = 0;
681         loff_t pos;
682         pid_t pid;
683
684         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
685                 return -ENOMEM;
686
687         /*
688          * Always recreate a new array. The write is an all or nothing
689          * operation. Always create a new array when adding new pids by
690          * the user. If the operation fails, then the current list is
691          * not modified.
692          */
693         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694         if (!pid_list) {
695                 trace_parser_put(&parser);
696                 return -ENOMEM;
697         }
698
699         pid_list->pid_max = READ_ONCE(pid_max);
700
701         /* Only truncating will shrink pid_max */
702         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703                 pid_list->pid_max = filtered_pids->pid_max;
704
705         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706         if (!pid_list->pids) {
707                 trace_parser_put(&parser);
708                 kfree(pid_list);
709                 return -ENOMEM;
710         }
711
712         if (filtered_pids) {
713                 /* copy the current bits to the new max */
714                 for_each_set_bit(pid, filtered_pids->pids,
715                                  filtered_pids->pid_max) {
716                         set_bit(pid, pid_list->pids);
717                         nr_pids++;
718                 }
719         }
720
721         while (cnt > 0) {
722
723                 pos = 0;
724
725                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726                 if (ret < 0 || !trace_parser_loaded(&parser))
727                         break;
728
729                 read += ret;
730                 ubuf += ret;
731                 cnt -= ret;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736                 if (val >= pid_list->pid_max)
737                         break;
738
739                 pid = (pid_t)val;
740
741                 set_bit(pid, pid_list->pids);
742                 nr_pids++;
743
744                 trace_parser_clear(&parser);
745                 ret = 0;
746         }
747         trace_parser_put(&parser);
748
749         if (ret < 0) {
750                 trace_free_pid_list(pid_list);
751                 return ret;
752         }
753
754         if (!nr_pids) {
755                 /* Cleared the list of pids */
756                 trace_free_pid_list(pid_list);
757                 read = ret;
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998         } else
999                 ring_buffer_unlock_commit(buffer, event);
1000 }
1001
1002 /**
1003  * __trace_puts - write a constant string into the trace buffer.
1004  * @ip:    The address of the caller
1005  * @str:   The constant string to write
1006  * @size:  The size of the string.
1007  */
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1009 {
1010         struct ring_buffer_event *event;
1011         struct trace_buffer *buffer;
1012         struct print_entry *entry;
1013         unsigned int trace_ctx;
1014         int alloc;
1015
1016         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1017                 return 0;
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         trace_ctx = tracing_gen_ctx();
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028                                             trace_ctx);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned int trace_ctx;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067
1068         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069                 return 0;
1070
1071         if (unlikely(tracing_selftest_running || tracing_disabled))
1072                 return 0;
1073
1074         trace_ctx = tracing_gen_ctx();
1075         buffer = global_trace.array_buffer.buffer;
1076
1077         ring_buffer_nest_start(buffer);
1078         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079                                             trace_ctx);
1080         if (!event)
1081                 goto out;
1082
1083         entry = ring_buffer_event_data(event);
1084         entry->ip                       = ip;
1085         entry->str                      = str;
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089
1090         ret = 1;
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099                                            void *cond_data)
1100 {
1101         struct tracer *tracer = tr->current_trace;
1102         unsigned long flags;
1103
1104         if (in_nmi()) {
1105                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1107                 return;
1108         }
1109
1110         if (!tr->allocated_snapshot) {
1111                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112                 internal_trace_puts("*** stopping trace here!   ***\n");
1113                 tracing_off();
1114                 return;
1115         }
1116
1117         /* Note, snapshot can not be used when the tracer uses it */
1118         if (tracer->use_max_tr) {
1119                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1121                 return;
1122         }
1123
1124         local_irq_save(flags);
1125         update_max_tr(tr, current, smp_processor_id(), cond_data);
1126         local_irq_restore(flags);
1127 }
1128
1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131         tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
1148 void tracing_snapshot(void)
1149 {
1150         struct trace_array *tr = &global_trace;
1151
1152         tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:         The tracing instance to snapshot
1159  * @cond_data:  The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171         tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:         The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191         void *cond_data = NULL;
1192
1193         arch_spin_lock(&tr->max_lock);
1194
1195         if (tr->cond_snapshot)
1196                 cond_data = tr->cond_snapshot->cond_data;
1197
1198         arch_spin_unlock(&tr->max_lock);
1199
1200         return cond_data;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205                                         struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1209 {
1210         int ret;
1211
1212         if (!tr->allocated_snapshot) {
1213
1214                 /* allocate spare buffer */
1215                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1217                 if (ret < 0)
1218                         return ret;
1219
1220                 tr->allocated_snapshot = true;
1221         }
1222
1223         return 0;
1224 }
1225
1226 static void free_snapshot(struct trace_array *tr)
1227 {
1228         /*
1229          * We don't free the ring buffer. instead, resize it because
1230          * The max_tr ring buffer has some state (e.g. ring->clock) and
1231          * we want preserve it.
1232          */
1233         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234         set_buffer_entries(&tr->max_buffer, 1);
1235         tracing_reset_online_cpus(&tr->max_buffer);
1236         tr->allocated_snapshot = false;
1237 }
1238
1239 /**
1240  * tracing_alloc_snapshot - allocate snapshot buffer.
1241  *
1242  * This only allocates the snapshot buffer if it isn't already
1243  * allocated - it doesn't also take a snapshot.
1244  *
1245  * This is meant to be used in cases where the snapshot buffer needs
1246  * to be set up for events that can't sleep but need to be able to
1247  * trigger a snapshot.
1248  */
1249 int tracing_alloc_snapshot(void)
1250 {
1251         struct trace_array *tr = &global_trace;
1252         int ret;
1253
1254         ret = tracing_alloc_snapshot_instance(tr);
1255         WARN_ON(ret < 0);
1256
1257         return ret;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1260
1261 /**
1262  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263  *
1264  * This is similar to tracing_snapshot(), but it will allocate the
1265  * snapshot buffer if it isn't already allocated. Use this only
1266  * where it is safe to sleep, as the allocation may sleep.
1267  *
1268  * This causes a swap between the snapshot buffer and the current live
1269  * tracing buffer. You can use this to take snapshots of the live
1270  * trace when some condition is triggered, but continue to trace.
1271  */
1272 void tracing_snapshot_alloc(void)
1273 {
1274         int ret;
1275
1276         ret = tracing_alloc_snapshot();
1277         if (ret < 0)
1278                 return;
1279
1280         tracing_snapshot();
1281 }
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1283
1284 /**
1285  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286  * @tr:         The tracing instance
1287  * @cond_data:  User data to associate with the snapshot
1288  * @update:     Implementation of the cond_snapshot update function
1289  *
1290  * Check whether the conditional snapshot for the given instance has
1291  * already been enabled, or if the current tracer is already using a
1292  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293  * save the cond_data and update function inside.
1294  *
1295  * Returns 0 if successful, error otherwise.
1296  */
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298                                  cond_update_fn_t update)
1299 {
1300         struct cond_snapshot *cond_snapshot;
1301         int ret = 0;
1302
1303         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1304         if (!cond_snapshot)
1305                 return -ENOMEM;
1306
1307         cond_snapshot->cond_data = cond_data;
1308         cond_snapshot->update = update;
1309
1310         mutex_lock(&trace_types_lock);
1311
1312         ret = tracing_alloc_snapshot_instance(tr);
1313         if (ret)
1314                 goto fail_unlock;
1315
1316         if (tr->current_trace->use_max_tr) {
1317                 ret = -EBUSY;
1318                 goto fail_unlock;
1319         }
1320
1321         /*
1322          * The cond_snapshot can only change to NULL without the
1323          * trace_types_lock. We don't care if we race with it going
1324          * to NULL, but we want to make sure that it's not set to
1325          * something other than NULL when we get here, which we can
1326          * do safely with only holding the trace_types_lock and not
1327          * having to take the max_lock.
1328          */
1329         if (tr->cond_snapshot) {
1330                 ret = -EBUSY;
1331                 goto fail_unlock;
1332         }
1333
1334         arch_spin_lock(&tr->max_lock);
1335         tr->cond_snapshot = cond_snapshot;
1336         arch_spin_unlock(&tr->max_lock);
1337
1338         mutex_unlock(&trace_types_lock);
1339
1340         return ret;
1341
1342  fail_unlock:
1343         mutex_unlock(&trace_types_lock);
1344         kfree(cond_snapshot);
1345         return ret;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1348
1349 /**
1350  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  *
1353  * Check whether the conditional snapshot for the given instance is
1354  * enabled; if so, free the cond_snapshot associated with it,
1355  * otherwise return -EINVAL.
1356  *
1357  * Returns 0 if successful, error otherwise.
1358  */
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1360 {
1361         int ret = 0;
1362
1363         arch_spin_lock(&tr->max_lock);
1364
1365         if (!tr->cond_snapshot)
1366                 ret = -EINVAL;
1367         else {
1368                 kfree(tr->cond_snapshot);
1369                 tr->cond_snapshot = NULL;
1370         }
1371
1372         arch_spin_unlock(&tr->max_lock);
1373
1374         return ret;
1375 }
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 #else
1378 void tracing_snapshot(void)
1379 {
1380         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 {
1385         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 }
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1389 {
1390         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1391         return -ENODEV;
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1395 {
1396         /* Give warning */
1397         tracing_snapshot();
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1401 {
1402         return NULL;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1406 {
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1411 {
1412         return false;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1416
1417 void tracer_tracing_off(struct trace_array *tr)
1418 {
1419         if (tr->array_buffer.buffer)
1420                 ring_buffer_record_off(tr->array_buffer.buffer);
1421         /*
1422          * This flag is looked at when buffers haven't been allocated
1423          * yet, or by some tracers (like irqsoff), that just want to
1424          * know if the ring buffer has been disabled, but it can handle
1425          * races of where it gets disabled but we still do a record.
1426          * As the check is in the fast path of the tracers, it is more
1427          * important to be fast than accurate.
1428          */
1429         tr->buffer_disabled = 1;
1430         /* Make the flag seen by readers */
1431         smp_wmb();
1432 }
1433
1434 /**
1435  * tracing_off - turn off tracing buffers
1436  *
1437  * This function stops the tracing buffers from recording data.
1438  * It does not disable any overhead the tracers themselves may
1439  * be causing. This function simply causes all recording to
1440  * the ring buffers to fail.
1441  */
1442 void tracing_off(void)
1443 {
1444         tracer_tracing_off(&global_trace);
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_off);
1447
1448 void disable_trace_on_warning(void)
1449 {
1450         if (__disable_trace_on_warning) {
1451                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452                         "Disabling tracing due to warning\n");
1453                 tracing_off();
1454         }
1455 }
1456
1457 /**
1458  * tracer_tracing_is_on - show real state of ring buffer enabled
1459  * @tr : the trace array to know if ring buffer is enabled
1460  *
1461  * Shows real state of the ring buffer if it is enabled or not.
1462  */
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1464 {
1465         if (tr->array_buffer.buffer)
1466                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467         return !tr->buffer_disabled;
1468 }
1469
1470 /**
1471  * tracing_is_on - show state of ring buffers enabled
1472  */
1473 int tracing_is_on(void)
1474 {
1475         return tracer_tracing_is_on(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1478
1479 static int __init set_buf_size(char *str)
1480 {
1481         unsigned long buf_size;
1482
1483         if (!str)
1484                 return 0;
1485         buf_size = memparse(str, &str);
1486         /* nr_entries can not be zero */
1487         if (buf_size == 0)
1488                 return 0;
1489         trace_buf_size = buf_size;
1490         return 1;
1491 }
1492 __setup("trace_buf_size=", set_buf_size);
1493
1494 static int __init set_tracing_thresh(char *str)
1495 {
1496         unsigned long threshold;
1497         int ret;
1498
1499         if (!str)
1500                 return 0;
1501         ret = kstrtoul(str, 0, &threshold);
1502         if (ret < 0)
1503                 return 0;
1504         tracing_thresh = threshold * 1000;
1505         return 1;
1506 }
1507 __setup("tracing_thresh=", set_tracing_thresh);
1508
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 {
1511         return nsecs / 1000;
1512 }
1513
1514 /*
1515  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518  * of strings in the order that the evals (enum) were defined.
1519  */
1520 #undef C
1521 #define C(a, b) b
1522
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1525         TRACE_FLAGS
1526         NULL
1527 };
1528
1529 static struct {
1530         u64 (*func)(void);
1531         const char *name;
1532         int in_ns;              /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534         { trace_clock_local,            "local",        1 },
1535         { trace_clock_global,           "global",       1 },
1536         { trace_clock_counter,          "counter",      0 },
1537         { trace_clock_jiffies,          "uptime",       0 },
1538         { trace_clock,                  "perf",         1 },
1539         { ktime_get_mono_fast_ns,       "mono",         1 },
1540         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1541         { ktime_get_boot_fast_ns,       "boot",         1 },
1542         ARCH_TRACE_CLOCKS
1543 };
1544
1545 bool trace_clock_in_ns(struct trace_array *tr)
1546 {
1547         if (trace_clocks[tr->clock_id].in_ns)
1548                 return true;
1549
1550         return false;
1551 }
1552
1553 /*
1554  * trace_parser_get_init - gets the buffer for trace parser
1555  */
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 {
1558         memset(parser, 0, sizeof(*parser));
1559
1560         parser->buffer = kmalloc(size, GFP_KERNEL);
1561         if (!parser->buffer)
1562                 return 1;
1563
1564         parser->size = size;
1565         return 0;
1566 }
1567
1568 /*
1569  * trace_parser_put - frees the buffer for trace parser
1570  */
1571 void trace_parser_put(struct trace_parser *parser)
1572 {
1573         kfree(parser->buffer);
1574         parser->buffer = NULL;
1575 }
1576
1577 /*
1578  * trace_get_user - reads the user input string separated by  space
1579  * (matched by isspace(ch))
1580  *
1581  * For each string found the 'struct trace_parser' is updated,
1582  * and the function returns.
1583  *
1584  * Returns number of bytes read.
1585  *
1586  * See kernel/trace/trace.h for 'struct trace_parser' details.
1587  */
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589         size_t cnt, loff_t *ppos)
1590 {
1591         char ch;
1592         size_t read = 0;
1593         ssize_t ret;
1594
1595         if (!*ppos)
1596                 trace_parser_clear(parser);
1597
1598         ret = get_user(ch, ubuf++);
1599         if (ret)
1600                 goto out;
1601
1602         read++;
1603         cnt--;
1604
1605         /*
1606          * The parser is not finished with the last write,
1607          * continue reading the user input without skipping spaces.
1608          */
1609         if (!parser->cont) {
1610                 /* skip white space */
1611                 while (cnt && isspace(ch)) {
1612                         ret = get_user(ch, ubuf++);
1613                         if (ret)
1614                                 goto out;
1615                         read++;
1616                         cnt--;
1617                 }
1618
1619                 parser->idx = 0;
1620
1621                 /* only spaces were written */
1622                 if (isspace(ch) || !ch) {
1623                         *ppos += read;
1624                         ret = read;
1625                         goto out;
1626                 }
1627         }
1628
1629         /* read the non-space input */
1630         while (cnt && !isspace(ch) && ch) {
1631                 if (parser->idx < parser->size - 1)
1632                         parser->buffer[parser->idx++] = ch;
1633                 else {
1634                         ret = -EINVAL;
1635                         goto out;
1636                 }
1637                 ret = get_user(ch, ubuf++);
1638                 if (ret)
1639                         goto out;
1640                 read++;
1641                 cnt--;
1642         }
1643
1644         /* We either got finished input or we have to wait for another call. */
1645         if (isspace(ch) || !ch) {
1646                 parser->buffer[parser->idx] = 0;
1647                 parser->cont = false;
1648         } else if (parser->idx < parser->size - 1) {
1649                 parser->cont = true;
1650                 parser->buffer[parser->idx++] = ch;
1651                 /* Make sure the parsed string always terminates with '\0'. */
1652                 parser->buffer[parser->idx] = 0;
1653         } else {
1654                 ret = -EINVAL;
1655                 goto out;
1656         }
1657
1658         *ppos += read;
1659         ret = read;
1660
1661 out:
1662         return ret;
1663 }
1664
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1667 {
1668         int len;
1669
1670         if (trace_seq_used(s) <= s->seq.readpos)
1671                 return -EBUSY;
1672
1673         len = trace_seq_used(s) - s->seq.readpos;
1674         if (cnt > len)
1675                 cnt = len;
1676         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677
1678         s->seq.readpos += cnt;
1679         return cnt;
1680 }
1681
1682 unsigned long __read_mostly     tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1684
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686         defined(CONFIG_FSNOTIFY)
1687
1688 static struct workqueue_struct *fsnotify_wq;
1689
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1691 {
1692         struct trace_array *tr = container_of(work, struct trace_array,
1693                                               fsnotify_work);
1694         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1695 }
1696
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 {
1699         struct trace_array *tr = container_of(iwork, struct trace_array,
1700                                               fsnotify_irqwork);
1701         queue_work(fsnotify_wq, &tr->fsnotify_work);
1702 }
1703
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705                                      struct dentry *d_tracer)
1706 {
1707         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710                                               d_tracer, &tr->max_latency,
1711                                               &tracing_max_lat_fops);
1712 }
1713
1714 __init static int latency_fsnotify_init(void)
1715 {
1716         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1718         if (!fsnotify_wq) {
1719                 pr_err("Unable to allocate tr_max_lat_wq\n");
1720                 return -ENOMEM;
1721         }
1722         return 0;
1723 }
1724
1725 late_initcall_sync(latency_fsnotify_init);
1726
1727 void latency_fsnotify(struct trace_array *tr)
1728 {
1729         if (!fsnotify_wq)
1730                 return;
1731         /*
1732          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733          * possible that we are called from __schedule() or do_idle(), which
1734          * could cause a deadlock.
1735          */
1736         irq_work_queue(&tr->fsnotify_irqwork);
1737 }
1738
1739 /*
1740  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741  *  defined(CONFIG_FSNOTIFY)
1742  */
1743 #else
1744
1745 #define trace_create_maxlat_file(tr, d_tracer)                          \
1746         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1747                           &tr->max_latency, &tracing_max_lat_fops)
1748
1749 #endif
1750
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1752 /*
1753  * Copy the new maximum trace into the separate maximum-trace
1754  * structure. (this way the maximum trace is permanently saved,
1755  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1756  */
1757 static void
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 {
1760         struct array_buffer *trace_buf = &tr->array_buffer;
1761         struct array_buffer *max_buf = &tr->max_buffer;
1762         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1764
1765         max_buf->cpu = cpu;
1766         max_buf->time_start = data->preempt_timestamp;
1767
1768         max_data->saved_latency = tr->max_latency;
1769         max_data->critical_start = data->critical_start;
1770         max_data->critical_end = data->critical_end;
1771
1772         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773         max_data->pid = tsk->pid;
1774         /*
1775          * If tsk == current, then use current_uid(), as that does not use
1776          * RCU. The irq tracer can be called out of RCU scope.
1777          */
1778         if (tsk == current)
1779                 max_data->uid = current_uid();
1780         else
1781                 max_data->uid = task_uid(tsk);
1782
1783         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784         max_data->policy = tsk->policy;
1785         max_data->rt_priority = tsk->rt_priority;
1786
1787         /* record this tasks comm */
1788         tracing_record_cmdline(tsk);
1789         latency_fsnotify(tr);
1790 }
1791
1792 /**
1793  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794  * @tr: tracer
1795  * @tsk: the task with the latency
1796  * @cpu: The cpu that initiated the trace.
1797  * @cond_data: User data associated with a conditional snapshot
1798  *
1799  * Flip the buffers between the @tr and the max_tr and record information
1800  * about which task was the cause of this latency.
1801  */
1802 void
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1804               void *cond_data)
1805 {
1806         if (tr->stop_count)
1807                 return;
1808
1809         WARN_ON_ONCE(!irqs_disabled());
1810
1811         if (!tr->allocated_snapshot) {
1812                 /* Only the nop tracer should hit this when disabling */
1813                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1814                 return;
1815         }
1816
1817         arch_spin_lock(&tr->max_lock);
1818
1819         /* Inherit the recordable setting from array_buffer */
1820         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821                 ring_buffer_record_on(tr->max_buffer.buffer);
1822         else
1823                 ring_buffer_record_off(tr->max_buffer.buffer);
1824
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1827                 goto out_unlock;
1828 #endif
1829         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830
1831         __update_max_tr(tr, tsk, cpu);
1832
1833  out_unlock:
1834         arch_spin_unlock(&tr->max_lock);
1835 }
1836
1837 /**
1838  * update_max_tr_single - only copy one trace over, and reset the rest
1839  * @tr: tracer
1840  * @tsk: task with the latency
1841  * @cpu: the cpu of the buffer to copy.
1842  *
1843  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1844  */
1845 void
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1847 {
1848         int ret;
1849
1850         if (tr->stop_count)
1851                 return;
1852
1853         WARN_ON_ONCE(!irqs_disabled());
1854         if (!tr->allocated_snapshot) {
1855                 /* Only the nop tracer should hit this when disabling */
1856                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1857                 return;
1858         }
1859
1860         arch_spin_lock(&tr->max_lock);
1861
1862         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863
1864         if (ret == -EBUSY) {
1865                 /*
1866                  * We failed to swap the buffer due to a commit taking
1867                  * place on this CPU. We fail to record, but we reset
1868                  * the max trace buffer (no one writes directly to it)
1869                  * and flag that it failed.
1870                  */
1871                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872                         "Failed to swap buffers due to commit in progress\n");
1873         }
1874
1875         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876
1877         __update_max_tr(tr, tsk, cpu);
1878         arch_spin_unlock(&tr->max_lock);
1879 }
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1881
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 {
1884         /* Iterators are static, they should be filled or empty */
1885         if (trace_buffer_iter(iter, iter->cpu_file))
1886                 return 0;
1887
1888         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1889                                 full);
1890 }
1891
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1894
1895 struct trace_selftests {
1896         struct list_head                list;
1897         struct tracer                   *type;
1898 };
1899
1900 static LIST_HEAD(postponed_selftests);
1901
1902 static int save_selftest(struct tracer *type)
1903 {
1904         struct trace_selftests *selftest;
1905
1906         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1907         if (!selftest)
1908                 return -ENOMEM;
1909
1910         selftest->type = type;
1911         list_add(&selftest->list, &postponed_selftests);
1912         return 0;
1913 }
1914
1915 static int run_tracer_selftest(struct tracer *type)
1916 {
1917         struct trace_array *tr = &global_trace;
1918         struct tracer *saved_tracer = tr->current_trace;
1919         int ret;
1920
1921         if (!type->selftest || tracing_selftest_disabled)
1922                 return 0;
1923
1924         /*
1925          * If a tracer registers early in boot up (before scheduling is
1926          * initialized and such), then do not run its selftests yet.
1927          * Instead, run it a little later in the boot process.
1928          */
1929         if (!selftests_can_run)
1930                 return save_selftest(type);
1931
1932         if (!tracing_is_on()) {
1933                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1934                         type->name);
1935                 return 0;
1936         }
1937
1938         /*
1939          * Run a selftest on this tracer.
1940          * Here we reset the trace buffer, and set the current
1941          * tracer to be this tracer. The tracer can then run some
1942          * internal tracing to verify that everything is in order.
1943          * If we fail, we do not register this tracer.
1944          */
1945         tracing_reset_online_cpus(&tr->array_buffer);
1946
1947         tr->current_trace = type;
1948
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950         if (type->use_max_tr) {
1951                 /* If we expanded the buffers, make sure the max is expanded too */
1952                 if (ring_buffer_expanded)
1953                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954                                            RING_BUFFER_ALL_CPUS);
1955                 tr->allocated_snapshot = true;
1956         }
1957 #endif
1958
1959         /* the test is responsible for initializing and enabling */
1960         pr_info("Testing tracer %s: ", type->name);
1961         ret = type->selftest(type, tr);
1962         /* the test is responsible for resetting too */
1963         tr->current_trace = saved_tracer;
1964         if (ret) {
1965                 printk(KERN_CONT "FAILED!\n");
1966                 /* Add the warning after printing 'FAILED' */
1967                 WARN_ON(1);
1968                 return -1;
1969         }
1970         /* Only reset on passing, to avoid touching corrupted buffers */
1971         tracing_reset_online_cpus(&tr->array_buffer);
1972
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974         if (type->use_max_tr) {
1975                 tr->allocated_snapshot = false;
1976
1977                 /* Shrink the max buffer again */
1978                 if (ring_buffer_expanded)
1979                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1980                                            RING_BUFFER_ALL_CPUS);
1981         }
1982 #endif
1983
1984         printk(KERN_CONT "PASSED\n");
1985         return 0;
1986 }
1987
1988 static __init int init_trace_selftests(void)
1989 {
1990         struct trace_selftests *p, *n;
1991         struct tracer *t, **last;
1992         int ret;
1993
1994         selftests_can_run = true;
1995
1996         mutex_lock(&trace_types_lock);
1997
1998         if (list_empty(&postponed_selftests))
1999                 goto out;
2000
2001         pr_info("Running postponed tracer tests:\n");
2002
2003         tracing_selftest_running = true;
2004         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005                 /* This loop can take minutes when sanitizers are enabled, so
2006                  * lets make sure we allow RCU processing.
2007                  */
2008                 cond_resched();
2009                 ret = run_tracer_selftest(p->type);
2010                 /* If the test fails, then warn and remove from available_tracers */
2011                 if (ret < 0) {
2012                         WARN(1, "tracer: %s failed selftest, disabling\n",
2013                              p->type->name);
2014                         last = &trace_types;
2015                         for (t = trace_types; t; t = t->next) {
2016                                 if (t == p->type) {
2017                                         *last = t->next;
2018                                         break;
2019                                 }
2020                                 last = &t->next;
2021                         }
2022                 }
2023                 list_del(&p->list);
2024                 kfree(p);
2025         }
2026         tracing_selftest_running = false;
2027
2028  out:
2029         mutex_unlock(&trace_types_lock);
2030
2031         return 0;
2032 }
2033 core_initcall(init_trace_selftests);
2034 #else
2035 static inline int run_tracer_selftest(struct tracer *type)
2036 {
2037         return 0;
2038 }
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2040
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2042
2043 static void __init apply_trace_boot_options(void);
2044
2045 /**
2046  * register_tracer - register a tracer with the ftrace system.
2047  * @type: the plugin for the tracer
2048  *
2049  * Register a new plugin tracer.
2050  */
2051 int __init register_tracer(struct tracer *type)
2052 {
2053         struct tracer *t;
2054         int ret = 0;
2055
2056         if (!type->name) {
2057                 pr_info("Tracer must have a name\n");
2058                 return -1;
2059         }
2060
2061         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2063                 return -1;
2064         }
2065
2066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067                 pr_warn("Can not register tracer %s due to lockdown\n",
2068                            type->name);
2069                 return -EPERM;
2070         }
2071
2072         mutex_lock(&trace_types_lock);
2073
2074         tracing_selftest_running = true;
2075
2076         for (t = trace_types; t; t = t->next) {
2077                 if (strcmp(type->name, t->name) == 0) {
2078                         /* already found */
2079                         pr_info("Tracer %s already registered\n",
2080                                 type->name);
2081                         ret = -1;
2082                         goto out;
2083                 }
2084         }
2085
2086         if (!type->set_flag)
2087                 type->set_flag = &dummy_set_flag;
2088         if (!type->flags) {
2089                 /*allocate a dummy tracer_flags*/
2090                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2091                 if (!type->flags) {
2092                         ret = -ENOMEM;
2093                         goto out;
2094                 }
2095                 type->flags->val = 0;
2096                 type->flags->opts = dummy_tracer_opt;
2097         } else
2098                 if (!type->flags->opts)
2099                         type->flags->opts = dummy_tracer_opt;
2100
2101         /* store the tracer for __set_tracer_option */
2102         type->flags->trace = type;
2103
2104         ret = run_tracer_selftest(type);
2105         if (ret < 0)
2106                 goto out;
2107
2108         type->next = trace_types;
2109         trace_types = type;
2110         add_tracer_options(&global_trace, type);
2111
2112  out:
2113         tracing_selftest_running = false;
2114         mutex_unlock(&trace_types_lock);
2115
2116         if (ret || !default_bootup_tracer)
2117                 goto out_unlock;
2118
2119         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2120                 goto out_unlock;
2121
2122         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123         /* Do we want this tracer to start on bootup? */
2124         tracing_set_tracer(&global_trace, type->name);
2125         default_bootup_tracer = NULL;
2126
2127         apply_trace_boot_options();
2128
2129         /* disable other selftests, since this will break it. */
2130         disable_tracing_selftest("running a tracer");
2131
2132  out_unlock:
2133         return ret;
2134 }
2135
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147         ring_buffer_reset_cpu(buffer, cpu);
2148
2149         ring_buffer_record_enable(buffer);
2150 }
2151
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2153 {
2154         struct trace_buffer *buffer = buf->buffer;
2155
2156         if (!buffer)
2157                 return;
2158
2159         ring_buffer_record_disable(buffer);
2160
2161         /* Make sure all commits have finished */
2162         synchronize_rcu();
2163
2164         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2165
2166         ring_buffer_reset_online_cpus(buffer);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2173 {
2174         struct trace_array *tr;
2175
2176         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177                 if (!tr->clear_trace)
2178                         continue;
2179                 tr->clear_trace = false;
2180                 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182                 tracing_reset_online_cpus(&tr->max_buffer);
2183 #endif
2184         }
2185 }
2186
2187 static int *tgid_map;
2188
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194         unsigned *map_cmdline_to_pid;
2195         unsigned cmdline_num;
2196         int cmdline_idx;
2197         char *saved_cmdlines;
2198 };
2199 static struct saved_cmdlines_buffer *savedcmd;
2200
2201 /* temporary disable recording */
2202 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2203
2204 static inline char *get_saved_cmdlines(int idx)
2205 {
2206         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2207 }
2208
2209 static inline void set_cmdline(int idx, const char *cmdline)
2210 {
2211         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2212 }
2213
2214 static int allocate_cmdlines_buffer(unsigned int val,
2215                                     struct saved_cmdlines_buffer *s)
2216 {
2217         s->map_cmdline_to_pid = kmalloc_array(val,
2218                                               sizeof(*s->map_cmdline_to_pid),
2219                                               GFP_KERNEL);
2220         if (!s->map_cmdline_to_pid)
2221                 return -ENOMEM;
2222
2223         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2224         if (!s->saved_cmdlines) {
2225                 kfree(s->map_cmdline_to_pid);
2226                 return -ENOMEM;
2227         }
2228
2229         s->cmdline_idx = 0;
2230         s->cmdline_num = val;
2231         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2232                sizeof(s->map_pid_to_cmdline));
2233         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2234                val * sizeof(*s->map_cmdline_to_pid));
2235
2236         return 0;
2237 }
2238
2239 static int trace_create_savedcmd(void)
2240 {
2241         int ret;
2242
2243         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244         if (!savedcmd)
2245                 return -ENOMEM;
2246
2247         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2248         if (ret < 0) {
2249                 kfree(savedcmd);
2250                 savedcmd = NULL;
2251                 return -ENOMEM;
2252         }
2253
2254         return 0;
2255 }
2256
2257 int is_tracing_stopped(void)
2258 {
2259         return global_trace.stop_count;
2260 }
2261
2262 /**
2263  * tracing_start - quick start of the tracer
2264  *
2265  * If tracing is enabled but was stopped by tracing_stop,
2266  * this will start the tracer back up.
2267  */
2268 void tracing_start(void)
2269 {
2270         struct trace_buffer *buffer;
2271         unsigned long flags;
2272
2273         if (tracing_disabled)
2274                 return;
2275
2276         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2277         if (--global_trace.stop_count) {
2278                 if (global_trace.stop_count < 0) {
2279                         /* Someone screwed up their debugging */
2280                         WARN_ON_ONCE(1);
2281                         global_trace.stop_count = 0;
2282                 }
2283                 goto out;
2284         }
2285
2286         /* Prevent the buffers from switching */
2287         arch_spin_lock(&global_trace.max_lock);
2288
2289         buffer = global_trace.array_buffer.buffer;
2290         if (buffer)
2291                 ring_buffer_record_enable(buffer);
2292
2293 #ifdef CONFIG_TRACER_MAX_TRACE
2294         buffer = global_trace.max_buffer.buffer;
2295         if (buffer)
2296                 ring_buffer_record_enable(buffer);
2297 #endif
2298
2299         arch_spin_unlock(&global_trace.max_lock);
2300
2301  out:
2302         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2303 }
2304
2305 static void tracing_start_tr(struct trace_array *tr)
2306 {
2307         struct trace_buffer *buffer;
2308         unsigned long flags;
2309
2310         if (tracing_disabled)
2311                 return;
2312
2313         /* If global, we need to also start the max tracer */
2314         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2315                 return tracing_start();
2316
2317         raw_spin_lock_irqsave(&tr->start_lock, flags);
2318
2319         if (--tr->stop_count) {
2320                 if (tr->stop_count < 0) {
2321                         /* Someone screwed up their debugging */
2322                         WARN_ON_ONCE(1);
2323                         tr->stop_count = 0;
2324                 }
2325                 goto out;
2326         }
2327
2328         buffer = tr->array_buffer.buffer;
2329         if (buffer)
2330                 ring_buffer_record_enable(buffer);
2331
2332  out:
2333         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335
2336 /**
2337  * tracing_stop - quick stop of the tracer
2338  *
2339  * Light weight way to stop tracing. Use in conjunction with
2340  * tracing_start.
2341  */
2342 void tracing_stop(void)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2348         if (global_trace.stop_count++)
2349                 goto out;
2350
2351         /* Prevent the buffers from switching */
2352         arch_spin_lock(&global_trace.max_lock);
2353
2354         buffer = global_trace.array_buffer.buffer;
2355         if (buffer)
2356                 ring_buffer_record_disable(buffer);
2357
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359         buffer = global_trace.max_buffer.buffer;
2360         if (buffer)
2361                 ring_buffer_record_disable(buffer);
2362 #endif
2363
2364         arch_spin_unlock(&global_trace.max_lock);
2365
2366  out:
2367         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2368 }
2369
2370 static void tracing_stop_tr(struct trace_array *tr)
2371 {
2372         struct trace_buffer *buffer;
2373         unsigned long flags;
2374
2375         /* If global, we need to also stop the max tracer */
2376         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2377                 return tracing_stop();
2378
2379         raw_spin_lock_irqsave(&tr->start_lock, flags);
2380         if (tr->stop_count++)
2381                 goto out;
2382
2383         buffer = tr->array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387  out:
2388         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390
2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393         unsigned tpid, idx;
2394
2395         /* treat recording of idle task as a success */
2396         if (!tsk->pid)
2397                 return 1;
2398
2399         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400
2401         /*
2402          * It's not the end of the world if we don't get
2403          * the lock, but we also don't want to spin
2404          * nor do we want to disable interrupts,
2405          * so if we miss here, then better luck next time.
2406          */
2407         if (!arch_spin_trylock(&trace_cmdline_lock))
2408                 return 0;
2409
2410         idx = savedcmd->map_pid_to_cmdline[tpid];
2411         if (idx == NO_CMDLINE_MAP) {
2412                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413
2414                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2415                 savedcmd->cmdline_idx = idx;
2416         }
2417
2418         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419         set_cmdline(idx, tsk->comm);
2420
2421         arch_spin_unlock(&trace_cmdline_lock);
2422
2423         return 1;
2424 }
2425
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428         unsigned map;
2429         int tpid;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         tpid = pid & (PID_MAX_DEFAULT - 1);
2442         map = savedcmd->map_pid_to_cmdline[tpid];
2443         if (map != NO_CMDLINE_MAP) {
2444                 tpid = savedcmd->map_cmdline_to_pid[map];
2445                 if (tpid == pid) {
2446                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447                         return;
2448                 }
2449         }
2450         strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2580 {
2581         unsigned int trace_flags = irqs_status;
2582         unsigned int pc;
2583
2584         pc = preempt_count();
2585
2586         if (pc & NMI_MASK)
2587                 trace_flags |= TRACE_FLAG_NMI;
2588         if (pc & HARDIRQ_MASK)
2589                 trace_flags |= TRACE_FLAG_HARDIRQ;
2590         if (in_serving_softirq())
2591                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2592
2593         if (tif_need_resched())
2594                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2595         if (test_preempt_need_resched())
2596                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2597         return (trace_flags << 16) | (pc & 0xff);
2598 }
2599
2600 struct ring_buffer_event *
2601 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2602                           int type,
2603                           unsigned long len,
2604                           unsigned int trace_ctx)
2605 {
2606         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2607 }
2608
2609 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2610 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2611 static int trace_buffered_event_ref;
2612
2613 /**
2614  * trace_buffered_event_enable - enable buffering events
2615  *
2616  * When events are being filtered, it is quicker to use a temporary
2617  * buffer to write the event data into if there's a likely chance
2618  * that it will not be committed. The discard of the ring buffer
2619  * is not as fast as committing, and is much slower than copying
2620  * a commit.
2621  *
2622  * When an event is to be filtered, allocate per cpu buffers to
2623  * write the event data into, and if the event is filtered and discarded
2624  * it is simply dropped, otherwise, the entire data is to be committed
2625  * in one shot.
2626  */
2627 void trace_buffered_event_enable(void)
2628 {
2629         struct ring_buffer_event *event;
2630         struct page *page;
2631         int cpu;
2632
2633         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2634
2635         if (trace_buffered_event_ref++)
2636                 return;
2637
2638         for_each_tracing_cpu(cpu) {
2639                 page = alloc_pages_node(cpu_to_node(cpu),
2640                                         GFP_KERNEL | __GFP_NORETRY, 0);
2641                 if (!page)
2642                         goto failed;
2643
2644                 event = page_address(page);
2645                 memset(event, 0, sizeof(*event));
2646
2647                 per_cpu(trace_buffered_event, cpu) = event;
2648
2649                 preempt_disable();
2650                 if (cpu == smp_processor_id() &&
2651                     __this_cpu_read(trace_buffered_event) !=
2652                     per_cpu(trace_buffered_event, cpu))
2653                         WARN_ON_ONCE(1);
2654                 preempt_enable();
2655         }
2656
2657         return;
2658  failed:
2659         trace_buffered_event_disable();
2660 }
2661
2662 static void enable_trace_buffered_event(void *data)
2663 {
2664         /* Probably not needed, but do it anyway */
2665         smp_rmb();
2666         this_cpu_dec(trace_buffered_event_cnt);
2667 }
2668
2669 static void disable_trace_buffered_event(void *data)
2670 {
2671         this_cpu_inc(trace_buffered_event_cnt);
2672 }
2673
2674 /**
2675  * trace_buffered_event_disable - disable buffering events
2676  *
2677  * When a filter is removed, it is faster to not use the buffered
2678  * events, and to commit directly into the ring buffer. Free up
2679  * the temp buffers when there are no more users. This requires
2680  * special synchronization with current events.
2681  */
2682 void trace_buffered_event_disable(void)
2683 {
2684         int cpu;
2685
2686         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2687
2688         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2689                 return;
2690
2691         if (--trace_buffered_event_ref)
2692                 return;
2693
2694         preempt_disable();
2695         /* For each CPU, set the buffer as used. */
2696         smp_call_function_many(tracing_buffer_mask,
2697                                disable_trace_buffered_event, NULL, 1);
2698         preempt_enable();
2699
2700         /* Wait for all current users to finish */
2701         synchronize_rcu();
2702
2703         for_each_tracing_cpu(cpu) {
2704                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2705                 per_cpu(trace_buffered_event, cpu) = NULL;
2706         }
2707         /*
2708          * Make sure trace_buffered_event is NULL before clearing
2709          * trace_buffered_event_cnt.
2710          */
2711         smp_wmb();
2712
2713         preempt_disable();
2714         /* Do the work on each cpu */
2715         smp_call_function_many(tracing_buffer_mask,
2716                                enable_trace_buffered_event, NULL, 1);
2717         preempt_enable();
2718 }
2719
2720 static struct trace_buffer *temp_buffer;
2721
2722 struct ring_buffer_event *
2723 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2724                           struct trace_event_file *trace_file,
2725                           int type, unsigned long len,
2726                           unsigned int trace_ctx)
2727 {
2728         struct ring_buffer_event *entry;
2729         struct trace_array *tr = trace_file->tr;
2730         int val;
2731
2732         *current_rb = tr->array_buffer.buffer;
2733
2734         if (!tr->no_filter_buffering_ref &&
2735             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2736             (entry = this_cpu_read(trace_buffered_event))) {
2737                 /*
2738                  * Filtering is on, so try to use the per cpu buffer first.
2739                  * This buffer will simulate a ring_buffer_event,
2740                  * where the type_len is zero and the array[0] will
2741                  * hold the full length.
2742                  * (see include/linux/ring-buffer.h for details on
2743                  *  how the ring_buffer_event is structured).
2744                  *
2745                  * Using a temp buffer during filtering and copying it
2746                  * on a matched filter is quicker than writing directly
2747                  * into the ring buffer and then discarding it when
2748                  * it doesn't match. That is because the discard
2749                  * requires several atomic operations to get right.
2750                  * Copying on match and doing nothing on a failed match
2751                  * is still quicker than no copy on match, but having
2752                  * to discard out of the ring buffer on a failed match.
2753                  */
2754                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2755
2756                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2757
2758                 /*
2759                  * Preemption is disabled, but interrupts and NMIs
2760                  * can still come in now. If that happens after
2761                  * the above increment, then it will have to go
2762                  * back to the old method of allocating the event
2763                  * on the ring buffer, and if the filter fails, it
2764                  * will have to call ring_buffer_discard_commit()
2765                  * to remove it.
2766                  *
2767                  * Need to also check the unlikely case that the
2768                  * length is bigger than the temp buffer size.
2769                  * If that happens, then the reserve is pretty much
2770                  * guaranteed to fail, as the ring buffer currently
2771                  * only allows events less than a page. But that may
2772                  * change in the future, so let the ring buffer reserve
2773                  * handle the failure in that case.
2774                  */
2775                 if (val == 1 && likely(len <= max_len)) {
2776                         trace_event_setup(entry, type, trace_ctx);
2777                         entry->array[0] = len;
2778                         return entry;
2779                 }
2780                 this_cpu_dec(trace_buffered_event_cnt);
2781         }
2782
2783         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2784                                             trace_ctx);
2785         /*
2786          * If tracing is off, but we have triggers enabled
2787          * we still need to look at the event data. Use the temp_buffer
2788          * to store the trace event for the trigger to use. It's recursive
2789          * safe and will not be recorded anywhere.
2790          */
2791         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2792                 *current_rb = temp_buffer;
2793                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2794                                                     trace_ctx);
2795         }
2796         return entry;
2797 }
2798 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2799
2800 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2801 static DEFINE_MUTEX(tracepoint_printk_mutex);
2802
2803 static void output_printk(struct trace_event_buffer *fbuffer)
2804 {
2805         struct trace_event_call *event_call;
2806         struct trace_event_file *file;
2807         struct trace_event *event;
2808         unsigned long flags;
2809         struct trace_iterator *iter = tracepoint_print_iter;
2810
2811         /* We should never get here if iter is NULL */
2812         if (WARN_ON_ONCE(!iter))
2813                 return;
2814
2815         event_call = fbuffer->trace_file->event_call;
2816         if (!event_call || !event_call->event.funcs ||
2817             !event_call->event.funcs->trace)
2818                 return;
2819
2820         file = fbuffer->trace_file;
2821         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2822             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2823              !filter_match_preds(file->filter, fbuffer->entry)))
2824                 return;
2825
2826         event = &fbuffer->trace_file->event_call->event;
2827
2828         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2829         trace_seq_init(&iter->seq);
2830         iter->ent = fbuffer->entry;
2831         event_call->event.funcs->trace(iter, 0, event);
2832         trace_seq_putc(&iter->seq, 0);
2833         printk("%s", iter->seq.buffer);
2834
2835         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2836 }
2837
2838 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2839                              void *buffer, size_t *lenp,
2840                              loff_t *ppos)
2841 {
2842         int save_tracepoint_printk;
2843         int ret;
2844
2845         mutex_lock(&tracepoint_printk_mutex);
2846         save_tracepoint_printk = tracepoint_printk;
2847
2848         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2849
2850         /*
2851          * This will force exiting early, as tracepoint_printk
2852          * is always zero when tracepoint_printk_iter is not allocated
2853          */
2854         if (!tracepoint_print_iter)
2855                 tracepoint_printk = 0;
2856
2857         if (save_tracepoint_printk == tracepoint_printk)
2858                 goto out;
2859
2860         if (tracepoint_printk)
2861                 static_key_enable(&tracepoint_printk_key.key);
2862         else
2863                 static_key_disable(&tracepoint_printk_key.key);
2864
2865  out:
2866         mutex_unlock(&tracepoint_printk_mutex);
2867
2868         return ret;
2869 }
2870
2871 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2872 {
2873         if (static_key_false(&tracepoint_printk_key.key))
2874                 output_printk(fbuffer);
2875
2876         if (static_branch_unlikely(&trace_event_exports_enabled))
2877                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2878         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2879                                     fbuffer->event, fbuffer->entry,
2880                                     fbuffer->trace_ctx, fbuffer->regs);
2881 }
2882 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2883
2884 /*
2885  * Skip 3:
2886  *
2887  *   trace_buffer_unlock_commit_regs()
2888  *   trace_event_buffer_commit()
2889  *   trace_event_raw_event_xxx()
2890  */
2891 # define STACK_SKIP 3
2892
2893 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2894                                      struct trace_buffer *buffer,
2895                                      struct ring_buffer_event *event,
2896                                      unsigned int trace_ctx,
2897                                      struct pt_regs *regs)
2898 {
2899         __buffer_unlock_commit(buffer, event);
2900
2901         /*
2902          * If regs is not set, then skip the necessary functions.
2903          * Note, we can still get here via blktrace, wakeup tracer
2904          * and mmiotrace, but that's ok if they lose a function or
2905          * two. They are not that meaningful.
2906          */
2907         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2908         ftrace_trace_userstack(tr, buffer, trace_ctx);
2909 }
2910
2911 /*
2912  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2913  */
2914 void
2915 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2916                                    struct ring_buffer_event *event)
2917 {
2918         __buffer_unlock_commit(buffer, event);
2919 }
2920
2921 void
2922 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2923                parent_ip, unsigned int trace_ctx)
2924 {
2925         struct trace_event_call *call = &event_function;
2926         struct trace_buffer *buffer = tr->array_buffer.buffer;
2927         struct ring_buffer_event *event;
2928         struct ftrace_entry *entry;
2929
2930         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2931                                             trace_ctx);
2932         if (!event)
2933                 return;
2934         entry   = ring_buffer_event_data(event);
2935         entry->ip                       = ip;
2936         entry->parent_ip                = parent_ip;
2937
2938         if (!call_filter_check_discard(call, entry, buffer, event)) {
2939                 if (static_branch_unlikely(&trace_function_exports_enabled))
2940                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2941                 __buffer_unlock_commit(buffer, event);
2942         }
2943 }
2944
2945 #ifdef CONFIG_STACKTRACE
2946
2947 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2948 #define FTRACE_KSTACK_NESTING   4
2949
2950 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2951
2952 struct ftrace_stack {
2953         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2954 };
2955
2956
2957 struct ftrace_stacks {
2958         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2959 };
2960
2961 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2962 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2963
2964 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2965                                  unsigned int trace_ctx,
2966                                  int skip, struct pt_regs *regs)
2967 {
2968         struct trace_event_call *call = &event_kernel_stack;
2969         struct ring_buffer_event *event;
2970         unsigned int size, nr_entries;
2971         struct ftrace_stack *fstack;
2972         struct stack_entry *entry;
2973         int stackidx;
2974
2975         /*
2976          * Add one, for this function and the call to save_stack_trace()
2977          * If regs is set, then these functions will not be in the way.
2978          */
2979 #ifndef CONFIG_UNWINDER_ORC
2980         if (!regs)
2981                 skip++;
2982 #endif
2983
2984         preempt_disable_notrace();
2985
2986         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2987
2988         /* This should never happen. If it does, yell once and skip */
2989         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2990                 goto out;
2991
2992         /*
2993          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2994          * interrupt will either see the value pre increment or post
2995          * increment. If the interrupt happens pre increment it will have
2996          * restored the counter when it returns.  We just need a barrier to
2997          * keep gcc from moving things around.
2998          */
2999         barrier();
3000
3001         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3002         size = ARRAY_SIZE(fstack->calls);
3003
3004         if (regs) {
3005                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3006                                                    size, skip);
3007         } else {
3008                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3009         }
3010
3011         size = nr_entries * sizeof(unsigned long);
3012         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3013                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3014                                     trace_ctx);
3015         if (!event)
3016                 goto out;
3017         entry = ring_buffer_event_data(event);
3018
3019         memcpy(&entry->caller, fstack->calls, size);
3020         entry->size = nr_entries;
3021
3022         if (!call_filter_check_discard(call, entry, buffer, event))
3023                 __buffer_unlock_commit(buffer, event);
3024
3025  out:
3026         /* Again, don't let gcc optimize things here */
3027         barrier();
3028         __this_cpu_dec(ftrace_stack_reserve);
3029         preempt_enable_notrace();
3030
3031 }
3032
3033 static inline void ftrace_trace_stack(struct trace_array *tr,
3034                                       struct trace_buffer *buffer,
3035                                       unsigned int trace_ctx,
3036                                       int skip, struct pt_regs *regs)
3037 {
3038         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3039                 return;
3040
3041         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3042 }
3043
3044 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3045                    int skip)
3046 {
3047         struct trace_buffer *buffer = tr->array_buffer.buffer;
3048
3049         if (rcu_is_watching()) {
3050                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3051                 return;
3052         }
3053
3054         /*
3055          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3056          * but if the above rcu_is_watching() failed, then the NMI
3057          * triggered someplace critical, and rcu_irq_enter() should
3058          * not be called from NMI.
3059          */
3060         if (unlikely(in_nmi()))
3061                 return;
3062
3063         rcu_irq_enter_irqson();
3064         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3065         rcu_irq_exit_irqson();
3066 }
3067
3068 /**
3069  * trace_dump_stack - record a stack back trace in the trace buffer
3070  * @skip: Number of functions to skip (helper handlers)
3071  */
3072 void trace_dump_stack(int skip)
3073 {
3074         if (tracing_disabled || tracing_selftest_running)
3075                 return;
3076
3077 #ifndef CONFIG_UNWINDER_ORC
3078         /* Skip 1 to skip this function. */
3079         skip++;
3080 #endif
3081         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3082                              tracing_gen_ctx(), skip, NULL);
3083 }
3084 EXPORT_SYMBOL_GPL(trace_dump_stack);
3085
3086 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3087 static DEFINE_PER_CPU(int, user_stack_count);
3088
3089 static void
3090 ftrace_trace_userstack(struct trace_array *tr,
3091                        struct trace_buffer *buffer, unsigned int trace_ctx)
3092 {
3093         struct trace_event_call *call = &event_user_stack;
3094         struct ring_buffer_event *event;
3095         struct userstack_entry *entry;
3096
3097         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3098                 return;
3099
3100         /*
3101          * NMIs can not handle page faults, even with fix ups.
3102          * The save user stack can (and often does) fault.
3103          */
3104         if (unlikely(in_nmi()))
3105                 return;
3106
3107         /*
3108          * prevent recursion, since the user stack tracing may
3109          * trigger other kernel events.
3110          */
3111         preempt_disable();
3112         if (__this_cpu_read(user_stack_count))
3113                 goto out;
3114
3115         __this_cpu_inc(user_stack_count);
3116
3117         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3118                                             sizeof(*entry), trace_ctx);
3119         if (!event)
3120                 goto out_drop_count;
3121         entry   = ring_buffer_event_data(event);
3122
3123         entry->tgid             = current->tgid;
3124         memset(&entry->caller, 0, sizeof(entry->caller));
3125
3126         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3127         if (!call_filter_check_discard(call, entry, buffer, event))
3128                 __buffer_unlock_commit(buffer, event);
3129
3130  out_drop_count:
3131         __this_cpu_dec(user_stack_count);
3132  out:
3133         preempt_enable();
3134 }
3135 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3136 static void ftrace_trace_userstack(struct trace_array *tr,
3137                                    struct trace_buffer *buffer,
3138                                    unsigned int trace_ctx)
3139 {
3140 }
3141 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3142
3143 #endif /* CONFIG_STACKTRACE */
3144
3145 static inline void
3146 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3147                           unsigned long long delta)
3148 {
3149         entry->bottom_delta_ts = delta & U32_MAX;
3150         entry->top_delta_ts = (delta >> 32);
3151 }
3152
3153 void trace_last_func_repeats(struct trace_array *tr,
3154                              struct trace_func_repeats *last_info,
3155                              unsigned int trace_ctx)
3156 {
3157         struct trace_buffer *buffer = tr->array_buffer.buffer;
3158         struct func_repeats_entry *entry;
3159         struct ring_buffer_event *event;
3160         u64 delta;
3161
3162         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3163                                             sizeof(*entry), trace_ctx);
3164         if (!event)
3165                 return;
3166
3167         delta = ring_buffer_event_time_stamp(buffer, event) -
3168                 last_info->ts_last_call;
3169
3170         entry = ring_buffer_event_data(event);
3171         entry->ip = last_info->ip;
3172         entry->parent_ip = last_info->parent_ip;
3173         entry->count = last_info->count;
3174         func_repeats_set_delta_ts(entry, delta);
3175
3176         __buffer_unlock_commit(buffer, event);
3177 }
3178
3179 /* created for use with alloc_percpu */
3180 struct trace_buffer_struct {
3181         int nesting;
3182         char buffer[4][TRACE_BUF_SIZE];
3183 };
3184
3185 static struct trace_buffer_struct *trace_percpu_buffer;
3186
3187 /*
3188  * This allows for lockless recording.  If we're nested too deeply, then
3189  * this returns NULL.
3190  */
3191 static char *get_trace_buf(void)
3192 {
3193         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3194
3195         if (!buffer || buffer->nesting >= 4)
3196                 return NULL;
3197
3198         buffer->nesting++;
3199
3200         /* Interrupts must see nesting incremented before we use the buffer */
3201         barrier();
3202         return &buffer->buffer[buffer->nesting - 1][0];
3203 }
3204
3205 static void put_trace_buf(void)
3206 {
3207         /* Don't let the decrement of nesting leak before this */
3208         barrier();
3209         this_cpu_dec(trace_percpu_buffer->nesting);
3210 }
3211
3212 static int alloc_percpu_trace_buffer(void)
3213 {
3214         struct trace_buffer_struct *buffers;
3215
3216         if (trace_percpu_buffer)
3217                 return 0;
3218
3219         buffers = alloc_percpu(struct trace_buffer_struct);
3220         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3221                 return -ENOMEM;
3222
3223         trace_percpu_buffer = buffers;
3224         return 0;
3225 }
3226
3227 static int buffers_allocated;
3228
3229 void trace_printk_init_buffers(void)
3230 {
3231         if (buffers_allocated)
3232                 return;
3233
3234         if (alloc_percpu_trace_buffer())
3235                 return;
3236
3237         /* trace_printk() is for debug use only. Don't use it in production. */
3238
3239         pr_warn("\n");
3240         pr_warn("**********************************************************\n");
3241         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3242         pr_warn("**                                                      **\n");
3243         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3244         pr_warn("**                                                      **\n");
3245         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3246         pr_warn("** unsafe for production use.                           **\n");
3247         pr_warn("**                                                      **\n");
3248         pr_warn("** If you see this message and you are not debugging    **\n");
3249         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3250         pr_warn("**                                                      **\n");
3251         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3252         pr_warn("**********************************************************\n");
3253
3254         /* Expand the buffers to set size */
3255         tracing_update_buffers();
3256
3257         buffers_allocated = 1;
3258
3259         /*
3260          * trace_printk_init_buffers() can be called by modules.
3261          * If that happens, then we need to start cmdline recording
3262          * directly here. If the global_trace.buffer is already
3263          * allocated here, then this was called by module code.
3264          */
3265         if (global_trace.array_buffer.buffer)
3266                 tracing_start_cmdline_record();
3267 }
3268 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3269
3270 void trace_printk_start_comm(void)
3271 {
3272         /* Start tracing comms if trace printk is set */
3273         if (!buffers_allocated)
3274                 return;
3275         tracing_start_cmdline_record();
3276 }
3277
3278 static void trace_printk_start_stop_comm(int enabled)
3279 {
3280         if (!buffers_allocated)
3281                 return;
3282
3283         if (enabled)
3284                 tracing_start_cmdline_record();
3285         else
3286                 tracing_stop_cmdline_record();
3287 }
3288
3289 /**
3290  * trace_vbprintk - write binary msg to tracing buffer
3291  * @ip:    The address of the caller
3292  * @fmt:   The string format to write to the buffer
3293  * @args:  Arguments for @fmt
3294  */
3295 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3296 {
3297         struct trace_event_call *call = &event_bprint;
3298         struct ring_buffer_event *event;
3299         struct trace_buffer *buffer;
3300         struct trace_array *tr = &global_trace;
3301         struct bprint_entry *entry;
3302         unsigned int trace_ctx;
3303         char *tbuffer;
3304         int len = 0, size;
3305
3306         if (unlikely(tracing_selftest_running || tracing_disabled))
3307                 return 0;
3308
3309         /* Don't pollute graph traces with trace_vprintk internals */
3310         pause_graph_tracing();
3311
3312         trace_ctx = tracing_gen_ctx();
3313         preempt_disable_notrace();
3314
3315         tbuffer = get_trace_buf();
3316         if (!tbuffer) {
3317                 len = 0;
3318                 goto out_nobuffer;
3319         }
3320
3321         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3322
3323         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3324                 goto out_put;
3325
3326         size = sizeof(*entry) + sizeof(u32) * len;
3327         buffer = tr->array_buffer.buffer;
3328         ring_buffer_nest_start(buffer);
3329         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3330                                             trace_ctx);
3331         if (!event)
3332                 goto out;
3333         entry = ring_buffer_event_data(event);
3334         entry->ip                       = ip;
3335         entry->fmt                      = fmt;
3336
3337         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3338         if (!call_filter_check_discard(call, entry, buffer, event)) {
3339                 __buffer_unlock_commit(buffer, event);
3340                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3341         }
3342
3343 out:
3344         ring_buffer_nest_end(buffer);
3345 out_put:
3346         put_trace_buf();
3347
3348 out_nobuffer:
3349         preempt_enable_notrace();
3350         unpause_graph_tracing();
3351
3352         return len;
3353 }
3354 EXPORT_SYMBOL_GPL(trace_vbprintk);
3355
3356 __printf(3, 0)
3357 static int
3358 __trace_array_vprintk(struct trace_buffer *buffer,
3359                       unsigned long ip, const char *fmt, va_list args)
3360 {
3361         struct trace_event_call *call = &event_print;
3362         struct ring_buffer_event *event;
3363         int len = 0, size;
3364         struct print_entry *entry;
3365         unsigned int trace_ctx;
3366         char *tbuffer;
3367
3368         if (tracing_disabled || tracing_selftest_running)
3369                 return 0;
3370
3371         /* Don't pollute graph traces with trace_vprintk internals */
3372         pause_graph_tracing();
3373
3374         trace_ctx = tracing_gen_ctx();
3375         preempt_disable_notrace();
3376
3377
3378         tbuffer = get_trace_buf();
3379         if (!tbuffer) {
3380                 len = 0;
3381                 goto out_nobuffer;
3382         }
3383
3384         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3385
3386         size = sizeof(*entry) + len + 1;
3387         ring_buffer_nest_start(buffer);
3388         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3389                                             trace_ctx);
3390         if (!event)
3391                 goto out;
3392         entry = ring_buffer_event_data(event);
3393         entry->ip = ip;
3394
3395         memcpy(&entry->buf, tbuffer, len + 1);
3396         if (!call_filter_check_discard(call, entry, buffer, event)) {
3397                 __buffer_unlock_commit(buffer, event);
3398                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3399         }
3400
3401 out:
3402         ring_buffer_nest_end(buffer);
3403         put_trace_buf();
3404
3405 out_nobuffer:
3406         preempt_enable_notrace();
3407         unpause_graph_tracing();
3408
3409         return len;
3410 }
3411
3412 __printf(3, 0)
3413 int trace_array_vprintk(struct trace_array *tr,
3414                         unsigned long ip, const char *fmt, va_list args)
3415 {
3416         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3417 }
3418
3419 /**
3420  * trace_array_printk - Print a message to a specific instance
3421  * @tr: The instance trace_array descriptor
3422  * @ip: The instruction pointer that this is called from.
3423  * @fmt: The format to print (printf format)
3424  *
3425  * If a subsystem sets up its own instance, they have the right to
3426  * printk strings into their tracing instance buffer using this
3427  * function. Note, this function will not write into the top level
3428  * buffer (use trace_printk() for that), as writing into the top level
3429  * buffer should only have events that can be individually disabled.
3430  * trace_printk() is only used for debugging a kernel, and should not
3431  * be ever incorporated in normal use.
3432  *
3433  * trace_array_printk() can be used, as it will not add noise to the
3434  * top level tracing buffer.
3435  *
3436  * Note, trace_array_init_printk() must be called on @tr before this
3437  * can be used.
3438  */
3439 __printf(3, 0)
3440 int trace_array_printk(struct trace_array *tr,
3441                        unsigned long ip, const char *fmt, ...)
3442 {
3443         int ret;
3444         va_list ap;
3445
3446         if (!tr)
3447                 return -ENOENT;
3448
3449         /* This is only allowed for created instances */
3450         if (tr == &global_trace)
3451                 return 0;
3452
3453         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3454                 return 0;
3455
3456         va_start(ap, fmt);
3457         ret = trace_array_vprintk(tr, ip, fmt, ap);
3458         va_end(ap);
3459         return ret;
3460 }
3461 EXPORT_SYMBOL_GPL(trace_array_printk);
3462
3463 /**
3464  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3465  * @tr: The trace array to initialize the buffers for
3466  *
3467  * As trace_array_printk() only writes into instances, they are OK to
3468  * have in the kernel (unlike trace_printk()). This needs to be called
3469  * before trace_array_printk() can be used on a trace_array.
3470  */
3471 int trace_array_init_printk(struct trace_array *tr)
3472 {
3473         if (!tr)
3474                 return -ENOENT;
3475
3476         /* This is only allowed for created instances */
3477         if (tr == &global_trace)
3478                 return -EINVAL;
3479
3480         return alloc_percpu_trace_buffer();
3481 }
3482 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3483
3484 __printf(3, 4)
3485 int trace_array_printk_buf(struct trace_buffer *buffer,
3486                            unsigned long ip, const char *fmt, ...)
3487 {
3488         int ret;
3489         va_list ap;
3490
3491         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3492                 return 0;
3493
3494         va_start(ap, fmt);
3495         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3496         va_end(ap);
3497         return ret;
3498 }
3499
3500 __printf(2, 0)
3501 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3502 {
3503         return trace_array_vprintk(&global_trace, ip, fmt, args);
3504 }
3505 EXPORT_SYMBOL_GPL(trace_vprintk);
3506
3507 static void trace_iterator_increment(struct trace_iterator *iter)
3508 {
3509         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3510
3511         iter->idx++;
3512         if (buf_iter)
3513                 ring_buffer_iter_advance(buf_iter);
3514 }
3515
3516 static struct trace_entry *
3517 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3518                 unsigned long *lost_events)
3519 {
3520         struct ring_buffer_event *event;
3521         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3522
3523         if (buf_iter) {
3524                 event = ring_buffer_iter_peek(buf_iter, ts);
3525                 if (lost_events)
3526                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3527                                 (unsigned long)-1 : 0;
3528         } else {
3529                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3530                                          lost_events);
3531         }
3532
3533         if (event) {
3534                 iter->ent_size = ring_buffer_event_length(event);
3535                 return ring_buffer_event_data(event);
3536         }
3537         iter->ent_size = 0;
3538         return NULL;
3539 }
3540
3541 static struct trace_entry *
3542 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3543                   unsigned long *missing_events, u64 *ent_ts)
3544 {
3545         struct trace_buffer *buffer = iter->array_buffer->buffer;
3546         struct trace_entry *ent, *next = NULL;
3547         unsigned long lost_events = 0, next_lost = 0;
3548         int cpu_file = iter->cpu_file;
3549         u64 next_ts = 0, ts;
3550         int next_cpu = -1;
3551         int next_size = 0;
3552         int cpu;
3553
3554         /*
3555          * If we are in a per_cpu trace file, don't bother by iterating over
3556          * all cpu and peek directly.
3557          */
3558         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3559                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3560                         return NULL;
3561                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3562                 if (ent_cpu)
3563                         *ent_cpu = cpu_file;
3564
3565                 return ent;
3566         }
3567
3568         for_each_tracing_cpu(cpu) {
3569
3570                 if (ring_buffer_empty_cpu(buffer, cpu))
3571                         continue;
3572
3573                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3574
3575                 /*
3576                  * Pick the entry with the smallest timestamp:
3577                  */
3578                 if (ent && (!next || ts < next_ts)) {
3579                         next = ent;
3580                         next_cpu = cpu;
3581                         next_ts = ts;
3582                         next_lost = lost_events;
3583                         next_size = iter->ent_size;
3584                 }
3585         }
3586
3587         iter->ent_size = next_size;
3588
3589         if (ent_cpu)
3590                 *ent_cpu = next_cpu;
3591
3592         if (ent_ts)
3593                 *ent_ts = next_ts;
3594
3595         if (missing_events)
3596                 *missing_events = next_lost;
3597
3598         return next;
3599 }
3600
3601 #define STATIC_FMT_BUF_SIZE     128
3602 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3603
3604 static char *trace_iter_expand_format(struct trace_iterator *iter)
3605 {
3606         char *tmp;
3607
3608         /*
3609          * iter->tr is NULL when used with tp_printk, which makes
3610          * this get called where it is not safe to call krealloc().
3611          */
3612         if (!iter->tr || iter->fmt == static_fmt_buf)
3613                 return NULL;
3614
3615         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3616                        GFP_KERNEL);
3617         if (tmp) {
3618                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3619                 iter->fmt = tmp;
3620         }
3621
3622         return tmp;
3623 }
3624
3625 /* Returns true if the string is safe to dereference from an event */
3626 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3627 {
3628         unsigned long addr = (unsigned long)str;
3629         struct trace_event *trace_event;
3630         struct trace_event_call *event;
3631
3632         /* OK if part of the event data */
3633         if ((addr >= (unsigned long)iter->ent) &&
3634             (addr < (unsigned long)iter->ent + iter->ent_size))
3635                 return true;
3636
3637         /* OK if part of the temp seq buffer */
3638         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3639             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3640                 return true;
3641
3642         /* Core rodata can not be freed */
3643         if (is_kernel_rodata(addr))
3644                 return true;
3645
3646         if (trace_is_tracepoint_string(str))
3647                 return true;
3648
3649         /*
3650          * Now this could be a module event, referencing core module
3651          * data, which is OK.
3652          */
3653         if (!iter->ent)
3654                 return false;
3655
3656         trace_event = ftrace_find_event(iter->ent->type);
3657         if (!trace_event)
3658                 return false;
3659
3660         event = container_of(trace_event, struct trace_event_call, event);
3661         if (!event->mod)
3662                 return false;
3663
3664         /* Would rather have rodata, but this will suffice */
3665         if (within_module_core(addr, event->mod))
3666                 return true;
3667
3668         return false;
3669 }
3670
3671 static const char *show_buffer(struct trace_seq *s)
3672 {
3673         struct seq_buf *seq = &s->seq;
3674
3675         seq_buf_terminate(seq);
3676
3677         return seq->buffer;
3678 }
3679
3680 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3681
3682 static int test_can_verify_check(const char *fmt, ...)
3683 {
3684         char buf[16];
3685         va_list ap;
3686         int ret;
3687
3688         /*
3689          * The verifier is dependent on vsnprintf() modifies the va_list
3690          * passed to it, where it is sent as a reference. Some architectures
3691          * (like x86_32) passes it by value, which means that vsnprintf()
3692          * does not modify the va_list passed to it, and the verifier
3693          * would then need to be able to understand all the values that
3694          * vsnprintf can use. If it is passed by value, then the verifier
3695          * is disabled.
3696          */
3697         va_start(ap, fmt);
3698         vsnprintf(buf, 16, "%d", ap);
3699         ret = va_arg(ap, int);
3700         va_end(ap);
3701
3702         return ret;
3703 }
3704
3705 static void test_can_verify(void)
3706 {
3707         if (!test_can_verify_check("%d %d", 0, 1)) {
3708                 pr_info("trace event string verifier disabled\n");
3709                 static_branch_inc(&trace_no_verify);
3710         }
3711 }
3712
3713 /**
3714  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3715  * @iter: The iterator that holds the seq buffer and the event being printed
3716  * @fmt: The format used to print the event
3717  * @ap: The va_list holding the data to print from @fmt.
3718  *
3719  * This writes the data into the @iter->seq buffer using the data from
3720  * @fmt and @ap. If the format has a %s, then the source of the string
3721  * is examined to make sure it is safe to print, otherwise it will
3722  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3723  * pointer.
3724  */
3725 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3726                          va_list ap)
3727 {
3728         const char *p = fmt;
3729         const char *str;
3730         int i, j;
3731
3732         if (WARN_ON_ONCE(!fmt))
3733                 return;
3734
3735         if (static_branch_unlikely(&trace_no_verify))
3736                 goto print;
3737
3738         /* Don't bother checking when doing a ftrace_dump() */
3739         if (iter->fmt == static_fmt_buf)
3740                 goto print;
3741
3742         while (*p) {
3743                 bool star = false;
3744                 int len = 0;
3745
3746                 j = 0;
3747
3748                 /* We only care about %s and variants */
3749                 for (i = 0; p[i]; i++) {
3750                         if (i + 1 >= iter->fmt_size) {
3751                                 /*
3752                                  * If we can't expand the copy buffer,
3753                                  * just print it.
3754                                  */
3755                                 if (!trace_iter_expand_format(iter))
3756                                         goto print;
3757                         }
3758
3759                         if (p[i] == '\\' && p[i+1]) {
3760                                 i++;
3761                                 continue;
3762                         }
3763                         if (p[i] == '%') {
3764                                 /* Need to test cases like %08.*s */
3765                                 for (j = 1; p[i+j]; j++) {
3766                                         if (isdigit(p[i+j]) ||
3767                                             p[i+j] == '.')
3768                                                 continue;
3769                                         if (p[i+j] == '*') {
3770                                                 star = true;
3771                                                 continue;
3772                                         }
3773                                         break;
3774                                 }
3775                                 if (p[i+j] == 's')
3776                                         break;
3777                                 star = false;
3778                         }
3779                         j = 0;
3780                 }
3781                 /* If no %s found then just print normally */
3782                 if (!p[i])
3783                         break;
3784
3785                 /* Copy up to the %s, and print that */
3786                 strncpy(iter->fmt, p, i);
3787                 iter->fmt[i] = '\0';
3788                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3789
3790                 if (star)
3791                         len = va_arg(ap, int);
3792
3793                 /* The ap now points to the string data of the %s */
3794                 str = va_arg(ap, const char *);
3795
3796                 /*
3797                  * If you hit this warning, it is likely that the
3798                  * trace event in question used %s on a string that
3799                  * was saved at the time of the event, but may not be
3800                  * around when the trace is read. Use __string(),
3801                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3802                  * instead. See samples/trace_events/trace-events-sample.h
3803                  * for reference.
3804                  */
3805                 if (WARN_ONCE(!trace_safe_str(iter, str),
3806                               "fmt: '%s' current_buffer: '%s'",
3807                               fmt, show_buffer(&iter->seq))) {
3808                         int ret;
3809
3810                         /* Try to safely read the string */
3811                         if (star) {
3812                                 if (len + 1 > iter->fmt_size)
3813                                         len = iter->fmt_size - 1;
3814                                 if (len < 0)
3815                                         len = 0;
3816                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3817                                 iter->fmt[len] = 0;
3818                                 star = false;
3819                         } else {
3820                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3821                                                                   iter->fmt_size);
3822                         }
3823                         if (ret < 0)
3824                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3825                         else
3826                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3827                                                  str, iter->fmt);
3828                         str = "[UNSAFE-MEMORY]";
3829                         strcpy(iter->fmt, "%s");
3830                 } else {
3831                         strncpy(iter->fmt, p + i, j + 1);
3832                         iter->fmt[j+1] = '\0';
3833                 }
3834                 if (star)
3835                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3836                 else
3837                         trace_seq_printf(&iter->seq, iter->fmt, str);
3838
3839                 p += i + j + 1;
3840         }
3841  print:
3842         if (*p)
3843                 trace_seq_vprintf(&iter->seq, p, ap);
3844 }
3845
3846 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3847 {
3848         const char *p, *new_fmt;
3849         char *q;
3850
3851         if (WARN_ON_ONCE(!fmt))
3852                 return fmt;
3853
3854         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3855                 return fmt;
3856
3857         p = fmt;
3858         new_fmt = q = iter->fmt;
3859         while (*p) {
3860                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3861                         if (!trace_iter_expand_format(iter))
3862                                 return fmt;
3863
3864                         q += iter->fmt - new_fmt;
3865                         new_fmt = iter->fmt;
3866                 }
3867
3868                 *q++ = *p++;
3869
3870                 /* Replace %p with %px */
3871                 if (p[-1] == '%') {
3872                         if (p[0] == '%') {
3873                                 *q++ = *p++;
3874                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3875                                 *q++ = *p++;
3876                                 *q++ = 'x';
3877                         }
3878                 }
3879         }
3880         *q = '\0';
3881
3882         return new_fmt;
3883 }
3884
3885 #define STATIC_TEMP_BUF_SIZE    128
3886 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3887
3888 /* Find the next real entry, without updating the iterator itself */
3889 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3890                                           int *ent_cpu, u64 *ent_ts)
3891 {
3892         /* __find_next_entry will reset ent_size */
3893         int ent_size = iter->ent_size;
3894         struct trace_entry *entry;
3895
3896         /*
3897          * If called from ftrace_dump(), then the iter->temp buffer
3898          * will be the static_temp_buf and not created from kmalloc.
3899          * If the entry size is greater than the buffer, we can
3900          * not save it. Just return NULL in that case. This is only
3901          * used to add markers when two consecutive events' time
3902          * stamps have a large delta. See trace_print_lat_context()
3903          */
3904         if (iter->temp == static_temp_buf &&
3905             STATIC_TEMP_BUF_SIZE < ent_size)
3906                 return NULL;
3907
3908         /*
3909          * The __find_next_entry() may call peek_next_entry(), which may
3910          * call ring_buffer_peek() that may make the contents of iter->ent
3911          * undefined. Need to copy iter->ent now.
3912          */
3913         if (iter->ent && iter->ent != iter->temp) {
3914                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3915                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3916                         void *temp;
3917                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3918                         if (!temp)
3919                                 return NULL;
3920                         kfree(iter->temp);
3921                         iter->temp = temp;
3922                         iter->temp_size = iter->ent_size;
3923                 }
3924                 memcpy(iter->temp, iter->ent, iter->ent_size);
3925                 iter->ent = iter->temp;
3926         }
3927         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3928         /* Put back the original ent_size */
3929         iter->ent_size = ent_size;
3930
3931         return entry;
3932 }
3933
3934 /* Find the next real entry, and increment the iterator to the next entry */
3935 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3936 {
3937         iter->ent = __find_next_entry(iter, &iter->cpu,
3938                                       &iter->lost_events, &iter->ts);
3939
3940         if (iter->ent)
3941                 trace_iterator_increment(iter);
3942
3943         return iter->ent ? iter : NULL;
3944 }
3945
3946 static void trace_consume(struct trace_iterator *iter)
3947 {
3948         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3949                             &iter->lost_events);
3950 }
3951
3952 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3953 {
3954         struct trace_iterator *iter = m->private;
3955         int i = (int)*pos;
3956         void *ent;
3957
3958         WARN_ON_ONCE(iter->leftover);
3959
3960         (*pos)++;
3961
3962         /* can't go backwards */
3963         if (iter->idx > i)
3964                 return NULL;
3965
3966         if (iter->idx < 0)
3967                 ent = trace_find_next_entry_inc(iter);
3968         else
3969                 ent = iter;
3970
3971         while (ent && iter->idx < i)
3972                 ent = trace_find_next_entry_inc(iter);
3973
3974         iter->pos = *pos;
3975
3976         return ent;
3977 }
3978
3979 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3980 {
3981         struct ring_buffer_iter *buf_iter;
3982         unsigned long entries = 0;
3983         u64 ts;
3984
3985         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3986
3987         buf_iter = trace_buffer_iter(iter, cpu);
3988         if (!buf_iter)
3989                 return;
3990
3991         ring_buffer_iter_reset(buf_iter);
3992
3993         /*
3994          * We could have the case with the max latency tracers
3995          * that a reset never took place on a cpu. This is evident
3996          * by the timestamp being before the start of the buffer.
3997          */
3998         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3999                 if (ts >= iter->array_buffer->time_start)
4000                         break;
4001                 entries++;
4002                 ring_buffer_iter_advance(buf_iter);
4003         }
4004
4005         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4006 }
4007
4008 /*
4009  * The current tracer is copied to avoid a global locking
4010  * all around.
4011  */
4012 static void *s_start(struct seq_file *m, loff_t *pos)
4013 {
4014         struct trace_iterator *iter = m->private;
4015         struct trace_array *tr = iter->tr;
4016         int cpu_file = iter->cpu_file;
4017         void *p = NULL;
4018         loff_t l = 0;
4019         int cpu;
4020
4021         /*
4022          * copy the tracer to avoid using a global lock all around.
4023          * iter->trace is a copy of current_trace, the pointer to the
4024          * name may be used instead of a strcmp(), as iter->trace->name
4025          * will point to the same string as current_trace->name.
4026          */
4027         mutex_lock(&trace_types_lock);
4028         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4029                 *iter->trace = *tr->current_trace;
4030         mutex_unlock(&trace_types_lock);
4031
4032 #ifdef CONFIG_TRACER_MAX_TRACE
4033         if (iter->snapshot && iter->trace->use_max_tr)
4034                 return ERR_PTR(-EBUSY);
4035 #endif
4036
4037         if (!iter->snapshot)
4038                 atomic_inc(&trace_record_taskinfo_disabled);
4039
4040         if (*pos != iter->pos) {
4041                 iter->ent = NULL;
4042                 iter->cpu = 0;
4043                 iter->idx = -1;
4044
4045                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4046                         for_each_tracing_cpu(cpu)
4047                                 tracing_iter_reset(iter, cpu);
4048                 } else
4049                         tracing_iter_reset(iter, cpu_file);
4050
4051                 iter->leftover = 0;
4052                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4053                         ;
4054
4055         } else {
4056                 /*
4057                  * If we overflowed the seq_file before, then we want
4058                  * to just reuse the trace_seq buffer again.
4059                  */
4060                 if (iter->leftover)
4061                         p = iter;
4062                 else {
4063                         l = *pos - 1;
4064                         p = s_next(m, p, &l);
4065                 }
4066         }
4067
4068         trace_event_read_lock();
4069         trace_access_lock(cpu_file);
4070         return p;
4071 }
4072
4073 static void s_stop(struct seq_file *m, void *p)
4074 {
4075         struct trace_iterator *iter = m->private;
4076
4077 #ifdef CONFIG_TRACER_MAX_TRACE
4078         if (iter->snapshot && iter->trace->use_max_tr)
4079                 return;
4080 #endif
4081
4082         if (!iter->snapshot)
4083                 atomic_dec(&trace_record_taskinfo_disabled);
4084
4085         trace_access_unlock(iter->cpu_file);
4086         trace_event_read_unlock();
4087 }
4088
4089 static void
4090 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4091                       unsigned long *entries, int cpu)
4092 {
4093         unsigned long count;
4094
4095         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4096         /*
4097          * If this buffer has skipped entries, then we hold all
4098          * entries for the trace and we need to ignore the
4099          * ones before the time stamp.
4100          */
4101         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4102                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4103                 /* total is the same as the entries */
4104                 *total = count;
4105         } else
4106                 *total = count +
4107                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4108         *entries = count;
4109 }
4110
4111 static void
4112 get_total_entries(struct array_buffer *buf,
4113                   unsigned long *total, unsigned long *entries)
4114 {
4115         unsigned long t, e;
4116         int cpu;
4117
4118         *total = 0;
4119         *entries = 0;
4120
4121         for_each_tracing_cpu(cpu) {
4122                 get_total_entries_cpu(buf, &t, &e, cpu);
4123                 *total += t;
4124                 *entries += e;
4125         }
4126 }
4127
4128 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4129 {
4130         unsigned long total, entries;
4131
4132         if (!tr)
4133                 tr = &global_trace;
4134
4135         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4136
4137         return entries;
4138 }
4139
4140 unsigned long trace_total_entries(struct trace_array *tr)
4141 {
4142         unsigned long total, entries;
4143
4144         if (!tr)
4145                 tr = &global_trace;
4146
4147         get_total_entries(&tr->array_buffer, &total, &entries);
4148
4149         return entries;
4150 }
4151
4152 static void print_lat_help_header(struct seq_file *m)
4153 {
4154         seq_puts(m, "#                    _------=> CPU#            \n"
4155                     "#                   / _-----=> irqs-off        \n"
4156                     "#                  | / _----=> need-resched    \n"
4157                     "#                  || / _---=> hardirq/softirq \n"
4158                     "#                  ||| / _--=> preempt-depth   \n"
4159                     "#                  |||| /     delay            \n"
4160                     "#  cmd     pid     ||||| time  |   caller      \n"
4161                     "#     \\   /        |||||  \\    |   /         \n");
4162 }
4163
4164 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4165 {
4166         unsigned long total;
4167         unsigned long entries;
4168
4169         get_total_entries(buf, &total, &entries);
4170         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4171                    entries, total, num_online_cpus());
4172         seq_puts(m, "#\n");
4173 }
4174
4175 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4176                                    unsigned int flags)
4177 {
4178         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4179
4180         print_event_info(buf, m);
4181
4182         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4183         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4184 }
4185
4186 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4187                                        unsigned int flags)
4188 {
4189         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4190         const char *space = "            ";
4191         int prec = tgid ? 12 : 2;
4192
4193         print_event_info(buf, m);
4194
4195         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4196         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4197         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4198         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4199         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4200         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4201         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4202 }
4203
4204 void
4205 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4206 {
4207         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4208         struct array_buffer *buf = iter->array_buffer;
4209         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4210         struct tracer *type = iter->trace;
4211         unsigned long entries;
4212         unsigned long total;
4213         const char *name = "preemption";
4214
4215         name = type->name;
4216
4217         get_total_entries(buf, &total, &entries);
4218
4219         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4220                    name, UTS_RELEASE);
4221         seq_puts(m, "# -----------------------------------"
4222                  "---------------------------------\n");
4223         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4224                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4225                    nsecs_to_usecs(data->saved_latency),
4226                    entries,
4227                    total,
4228                    buf->cpu,
4229 #if defined(CONFIG_PREEMPT_NONE)
4230                    "server",
4231 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4232                    "desktop",
4233 #elif defined(CONFIG_PREEMPT)
4234                    "preempt",
4235 #elif defined(CONFIG_PREEMPT_RT)
4236                    "preempt_rt",
4237 #else
4238                    "unknown",
4239 #endif
4240                    /* These are reserved for later use */
4241                    0, 0, 0, 0);
4242 #ifdef CONFIG_SMP
4243         seq_printf(m, " #P:%d)\n", num_online_cpus());
4244 #else
4245         seq_puts(m, ")\n");
4246 #endif
4247         seq_puts(m, "#    -----------------\n");
4248         seq_printf(m, "#    | task: %.16s-%d "
4249                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4250                    data->comm, data->pid,
4251                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4252                    data->policy, data->rt_priority);
4253         seq_puts(m, "#    -----------------\n");
4254
4255         if (data->critical_start) {
4256                 seq_puts(m, "#  => started at: ");
4257                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4258                 trace_print_seq(m, &iter->seq);
4259                 seq_puts(m, "\n#  => ended at:   ");
4260                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4261                 trace_print_seq(m, &iter->seq);
4262                 seq_puts(m, "\n#\n");
4263         }
4264
4265         seq_puts(m, "#\n");
4266 }
4267
4268 static void test_cpu_buff_start(struct trace_iterator *iter)
4269 {
4270         struct trace_seq *s = &iter->seq;
4271         struct trace_array *tr = iter->tr;
4272
4273         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4274                 return;
4275
4276         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4277                 return;
4278
4279         if (cpumask_available(iter->started) &&
4280             cpumask_test_cpu(iter->cpu, iter->started))
4281                 return;
4282
4283         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4284                 return;
4285
4286         if (cpumask_available(iter->started))
4287                 cpumask_set_cpu(iter->cpu, iter->started);
4288
4289         /* Don't print started cpu buffer for the first entry of the trace */
4290         if (iter->idx > 1)
4291                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4292                                 iter->cpu);
4293 }
4294
4295 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4296 {
4297         struct trace_array *tr = iter->tr;
4298         struct trace_seq *s = &iter->seq;
4299         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4300         struct trace_entry *entry;
4301         struct trace_event *event;
4302
4303         entry = iter->ent;
4304
4305         test_cpu_buff_start(iter);
4306
4307         event = ftrace_find_event(entry->type);
4308
4309         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4310                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4311                         trace_print_lat_context(iter);
4312                 else
4313                         trace_print_context(iter);
4314         }
4315
4316         if (trace_seq_has_overflowed(s))
4317                 return TRACE_TYPE_PARTIAL_LINE;
4318
4319         if (event)
4320                 return event->funcs->trace(iter, sym_flags, event);
4321
4322         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4323
4324         return trace_handle_return(s);
4325 }
4326
4327 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4328 {
4329         struct trace_array *tr = iter->tr;
4330         struct trace_seq *s = &iter->seq;
4331         struct trace_entry *entry;
4332         struct trace_event *event;
4333
4334         entry = iter->ent;
4335
4336         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4337                 trace_seq_printf(s, "%d %d %llu ",
4338                                  entry->pid, iter->cpu, iter->ts);
4339
4340         if (trace_seq_has_overflowed(s))
4341                 return TRACE_TYPE_PARTIAL_LINE;
4342
4343         event = ftrace_find_event(entry->type);
4344         if (event)
4345                 return event->funcs->raw(iter, 0, event);
4346
4347         trace_seq_printf(s, "%d ?\n", entry->type);
4348
4349         return trace_handle_return(s);
4350 }
4351
4352 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4353 {
4354         struct trace_array *tr = iter->tr;
4355         struct trace_seq *s = &iter->seq;
4356         unsigned char newline = '\n';
4357         struct trace_entry *entry;
4358         struct trace_event *event;
4359
4360         entry = iter->ent;
4361
4362         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4363                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4364                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4365                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4366                 if (trace_seq_has_overflowed(s))
4367                         return TRACE_TYPE_PARTIAL_LINE;
4368         }
4369
4370         event = ftrace_find_event(entry->type);
4371         if (event) {
4372                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4373                 if (ret != TRACE_TYPE_HANDLED)
4374                         return ret;
4375         }
4376
4377         SEQ_PUT_FIELD(s, newline);
4378
4379         return trace_handle_return(s);
4380 }
4381
4382 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4383 {
4384         struct trace_array *tr = iter->tr;
4385         struct trace_seq *s = &iter->seq;
4386         struct trace_entry *entry;
4387         struct trace_event *event;
4388
4389         entry = iter->ent;
4390
4391         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4392                 SEQ_PUT_FIELD(s, entry->pid);
4393                 SEQ_PUT_FIELD(s, iter->cpu);
4394                 SEQ_PUT_FIELD(s, iter->ts);
4395                 if (trace_seq_has_overflowed(s))
4396                         return TRACE_TYPE_PARTIAL_LINE;
4397         }
4398
4399         event = ftrace_find_event(entry->type);
4400         return event ? event->funcs->binary(iter, 0, event) :
4401                 TRACE_TYPE_HANDLED;
4402 }
4403
4404 int trace_empty(struct trace_iterator *iter)
4405 {
4406         struct ring_buffer_iter *buf_iter;
4407         int cpu;
4408
4409         /* If we are looking at one CPU buffer, only check that one */
4410         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4411                 cpu = iter->cpu_file;
4412                 buf_iter = trace_buffer_iter(iter, cpu);
4413                 if (buf_iter) {
4414                         if (!ring_buffer_iter_empty(buf_iter))
4415                                 return 0;
4416                 } else {
4417                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4418                                 return 0;
4419                 }
4420                 return 1;
4421         }
4422
4423         for_each_tracing_cpu(cpu) {
4424                 buf_iter = trace_buffer_iter(iter, cpu);
4425                 if (buf_iter) {
4426                         if (!ring_buffer_iter_empty(buf_iter))
4427                                 return 0;
4428                 } else {
4429                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4430                                 return 0;
4431                 }
4432         }
4433
4434         return 1;
4435 }
4436
4437 /*  Called with trace_event_read_lock() held. */
4438 enum print_line_t print_trace_line(struct trace_iterator *iter)
4439 {
4440         struct trace_array *tr = iter->tr;
4441         unsigned long trace_flags = tr->trace_flags;
4442         enum print_line_t ret;
4443
4444         if (iter->lost_events) {
4445                 if (iter->lost_events == (unsigned long)-1)
4446                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4447                                          iter->cpu);
4448                 else
4449                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4450                                          iter->cpu, iter->lost_events);
4451                 if (trace_seq_has_overflowed(&iter->seq))
4452                         return TRACE_TYPE_PARTIAL_LINE;
4453         }
4454
4455         if (iter->trace && iter->trace->print_line) {
4456                 ret = iter->trace->print_line(iter);
4457                 if (ret != TRACE_TYPE_UNHANDLED)
4458                         return ret;
4459         }
4460
4461         if (iter->ent->type == TRACE_BPUTS &&
4462                         trace_flags & TRACE_ITER_PRINTK &&
4463                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4464                 return trace_print_bputs_msg_only(iter);
4465
4466         if (iter->ent->type == TRACE_BPRINT &&
4467                         trace_flags & TRACE_ITER_PRINTK &&
4468                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4469                 return trace_print_bprintk_msg_only(iter);
4470
4471         if (iter->ent->type == TRACE_PRINT &&
4472                         trace_flags & TRACE_ITER_PRINTK &&
4473                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4474                 return trace_print_printk_msg_only(iter);
4475
4476         if (trace_flags & TRACE_ITER_BIN)
4477                 return print_bin_fmt(iter);
4478
4479         if (trace_flags & TRACE_ITER_HEX)
4480                 return print_hex_fmt(iter);
4481
4482         if (trace_flags & TRACE_ITER_RAW)
4483                 return print_raw_fmt(iter);
4484
4485         return print_trace_fmt(iter);
4486 }
4487
4488 void trace_latency_header(struct seq_file *m)
4489 {
4490         struct trace_iterator *iter = m->private;
4491         struct trace_array *tr = iter->tr;
4492
4493         /* print nothing if the buffers are empty */
4494         if (trace_empty(iter))
4495                 return;
4496
4497         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4498                 print_trace_header(m, iter);
4499
4500         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4501                 print_lat_help_header(m);
4502 }
4503
4504 void trace_default_header(struct seq_file *m)
4505 {
4506         struct trace_iterator *iter = m->private;
4507         struct trace_array *tr = iter->tr;
4508         unsigned long trace_flags = tr->trace_flags;
4509
4510         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4511                 return;
4512
4513         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4514                 /* print nothing if the buffers are empty */
4515                 if (trace_empty(iter))
4516                         return;
4517                 print_trace_header(m, iter);
4518                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4519                         print_lat_help_header(m);
4520         } else {
4521                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4522                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4523                                 print_func_help_header_irq(iter->array_buffer,
4524                                                            m, trace_flags);
4525                         else
4526                                 print_func_help_header(iter->array_buffer, m,
4527                                                        trace_flags);
4528                 }
4529         }
4530 }
4531
4532 static void test_ftrace_alive(struct seq_file *m)
4533 {
4534         if (!ftrace_is_dead())
4535                 return;
4536         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4537                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4538 }
4539
4540 #ifdef CONFIG_TRACER_MAX_TRACE
4541 static void show_snapshot_main_help(struct seq_file *m)
4542 {
4543         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4544                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4545                     "#                      Takes a snapshot of the main buffer.\n"
4546                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4547                     "#                      (Doesn't have to be '2' works with any number that\n"
4548                     "#                       is not a '0' or '1')\n");
4549 }
4550
4551 static void show_snapshot_percpu_help(struct seq_file *m)
4552 {
4553         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4554 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4555         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4556                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4557 #else
4558         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4559                     "#                     Must use main snapshot file to allocate.\n");
4560 #endif
4561         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4562                     "#                      (Doesn't have to be '2' works with any number that\n"
4563                     "#                       is not a '0' or '1')\n");
4564 }
4565
4566 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4567 {
4568         if (iter->tr->allocated_snapshot)
4569                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4570         else
4571                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4572
4573         seq_puts(m, "# Snapshot commands:\n");
4574         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4575                 show_snapshot_main_help(m);
4576         else
4577                 show_snapshot_percpu_help(m);
4578 }
4579 #else
4580 /* Should never be called */
4581 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4582 #endif
4583
4584 static int s_show(struct seq_file *m, void *v)
4585 {
4586         struct trace_iterator *iter = v;
4587         int ret;
4588
4589         if (iter->ent == NULL) {
4590                 if (iter->tr) {
4591                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4592                         seq_puts(m, "#\n");
4593                         test_ftrace_alive(m);
4594                 }
4595                 if (iter->snapshot && trace_empty(iter))
4596                         print_snapshot_help(m, iter);
4597                 else if (iter->trace && iter->trace->print_header)
4598                         iter->trace->print_header(m);
4599                 else
4600                         trace_default_header(m);
4601
4602         } else if (iter->leftover) {
4603                 /*
4604                  * If we filled the seq_file buffer earlier, we
4605                  * want to just show it now.
4606                  */
4607                 ret = trace_print_seq(m, &iter->seq);
4608
4609                 /* ret should this time be zero, but you never know */
4610                 iter->leftover = ret;
4611
4612         } else {
4613                 print_trace_line(iter);
4614                 ret = trace_print_seq(m, &iter->seq);
4615                 /*
4616                  * If we overflow the seq_file buffer, then it will
4617                  * ask us for this data again at start up.
4618                  * Use that instead.
4619                  *  ret is 0 if seq_file write succeeded.
4620                  *        -1 otherwise.
4621                  */
4622                 iter->leftover = ret;
4623         }
4624
4625         return 0;
4626 }
4627
4628 /*
4629  * Should be used after trace_array_get(), trace_types_lock
4630  * ensures that i_cdev was already initialized.
4631  */
4632 static inline int tracing_get_cpu(struct inode *inode)
4633 {
4634         if (inode->i_cdev) /* See trace_create_cpu_file() */
4635                 return (long)inode->i_cdev - 1;
4636         return RING_BUFFER_ALL_CPUS;
4637 }
4638
4639 static const struct seq_operations tracer_seq_ops = {
4640         .start          = s_start,
4641         .next           = s_next,
4642         .stop           = s_stop,
4643         .show           = s_show,
4644 };
4645
4646 static struct trace_iterator *
4647 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4648 {
4649         struct trace_array *tr = inode->i_private;
4650         struct trace_iterator *iter;
4651         int cpu;
4652
4653         if (tracing_disabled)
4654                 return ERR_PTR(-ENODEV);
4655
4656         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4657         if (!iter)
4658                 return ERR_PTR(-ENOMEM);
4659
4660         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4661                                     GFP_KERNEL);
4662         if (!iter->buffer_iter)
4663                 goto release;
4664
4665         /*
4666          * trace_find_next_entry() may need to save off iter->ent.
4667          * It will place it into the iter->temp buffer. As most
4668          * events are less than 128, allocate a buffer of that size.
4669          * If one is greater, then trace_find_next_entry() will
4670          * allocate a new buffer to adjust for the bigger iter->ent.
4671          * It's not critical if it fails to get allocated here.
4672          */
4673         iter->temp = kmalloc(128, GFP_KERNEL);
4674         if (iter->temp)
4675                 iter->temp_size = 128;
4676
4677         /*
4678          * trace_event_printf() may need to modify given format
4679          * string to replace %p with %px so that it shows real address
4680          * instead of hash value. However, that is only for the event
4681          * tracing, other tracer may not need. Defer the allocation
4682          * until it is needed.
4683          */
4684         iter->fmt = NULL;
4685         iter->fmt_size = 0;
4686
4687         /*
4688          * We make a copy of the current tracer to avoid concurrent
4689          * changes on it while we are reading.
4690          */
4691         mutex_lock(&trace_types_lock);
4692         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4693         if (!iter->trace)
4694                 goto fail;
4695
4696         *iter->trace = *tr->current_trace;
4697
4698         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4699                 goto fail;
4700
4701         iter->tr = tr;
4702
4703 #ifdef CONFIG_TRACER_MAX_TRACE
4704         /* Currently only the top directory has a snapshot */
4705         if (tr->current_trace->print_max || snapshot)
4706                 iter->array_buffer = &tr->max_buffer;
4707         else
4708 #endif
4709                 iter->array_buffer = &tr->array_buffer;
4710         iter->snapshot = snapshot;
4711         iter->pos = -1;
4712         iter->cpu_file = tracing_get_cpu(inode);
4713         mutex_init(&iter->mutex);
4714
4715         /* Notify the tracer early; before we stop tracing. */
4716         if (iter->trace->open)
4717                 iter->trace->open(iter);
4718
4719         /* Annotate start of buffers if we had overruns */
4720         if (ring_buffer_overruns(iter->array_buffer->buffer))
4721                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4722
4723         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4724         if (trace_clocks[tr->clock_id].in_ns)
4725                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4726
4727         /*
4728          * If pause-on-trace is enabled, then stop the trace while
4729          * dumping, unless this is the "snapshot" file
4730          */
4731         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4732                 tracing_stop_tr(tr);
4733
4734         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4735                 for_each_tracing_cpu(cpu) {
4736                         iter->buffer_iter[cpu] =
4737                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4738                                                          cpu, GFP_KERNEL);
4739                 }
4740                 ring_buffer_read_prepare_sync();
4741                 for_each_tracing_cpu(cpu) {
4742                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4743                         tracing_iter_reset(iter, cpu);
4744                 }
4745         } else {
4746                 cpu = iter->cpu_file;
4747                 iter->buffer_iter[cpu] =
4748                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4749                                                  cpu, GFP_KERNEL);
4750                 ring_buffer_read_prepare_sync();
4751                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4752                 tracing_iter_reset(iter, cpu);
4753         }
4754
4755         mutex_unlock(&trace_types_lock);
4756
4757         return iter;
4758
4759  fail:
4760         mutex_unlock(&trace_types_lock);
4761         kfree(iter->trace);
4762         kfree(iter->temp);
4763         kfree(iter->buffer_iter);
4764 release:
4765         seq_release_private(inode, file);
4766         return ERR_PTR(-ENOMEM);
4767 }
4768
4769 int tracing_open_generic(struct inode *inode, struct file *filp)
4770 {
4771         int ret;
4772
4773         ret = tracing_check_open_get_tr(NULL);
4774         if (ret)
4775                 return ret;
4776
4777         filp->private_data = inode->i_private;
4778         return 0;
4779 }
4780
4781 bool tracing_is_disabled(void)
4782 {
4783         return (tracing_disabled) ? true: false;
4784 }
4785
4786 /*
4787  * Open and update trace_array ref count.
4788  * Must have the current trace_array passed to it.
4789  */
4790 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4791 {
4792         struct trace_array *tr = inode->i_private;
4793         int ret;
4794
4795         ret = tracing_check_open_get_tr(tr);
4796         if (ret)
4797                 return ret;
4798
4799         filp->private_data = inode->i_private;
4800
4801         return 0;
4802 }
4803
4804 static int tracing_release(struct inode *inode, struct file *file)
4805 {
4806         struct trace_array *tr = inode->i_private;
4807         struct seq_file *m = file->private_data;
4808         struct trace_iterator *iter;
4809         int cpu;
4810
4811         if (!(file->f_mode & FMODE_READ)) {
4812                 trace_array_put(tr);
4813                 return 0;
4814         }
4815
4816         /* Writes do not use seq_file */
4817         iter = m->private;
4818         mutex_lock(&trace_types_lock);
4819
4820         for_each_tracing_cpu(cpu) {
4821                 if (iter->buffer_iter[cpu])
4822                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4823         }
4824
4825         if (iter->trace && iter->trace->close)
4826                 iter->trace->close(iter);
4827
4828         if (!iter->snapshot && tr->stop_count)
4829                 /* reenable tracing if it was previously enabled */
4830                 tracing_start_tr(tr);
4831
4832         __trace_array_put(tr);
4833
4834         mutex_unlock(&trace_types_lock);
4835
4836         mutex_destroy(&iter->mutex);
4837         free_cpumask_var(iter->started);
4838         kfree(iter->fmt);
4839         kfree(iter->temp);
4840         kfree(iter->trace);
4841         kfree(iter->buffer_iter);
4842         seq_release_private(inode, file);
4843
4844         return 0;
4845 }
4846
4847 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4848 {
4849         struct trace_array *tr = inode->i_private;
4850
4851         trace_array_put(tr);
4852         return 0;
4853 }
4854
4855 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4856 {
4857         struct trace_array *tr = inode->i_private;
4858
4859         trace_array_put(tr);
4860
4861         return single_release(inode, file);
4862 }
4863
4864 static int tracing_open(struct inode *inode, struct file *file)
4865 {
4866         struct trace_array *tr = inode->i_private;
4867         struct trace_iterator *iter;
4868         int ret;
4869
4870         ret = tracing_check_open_get_tr(tr);
4871         if (ret)
4872                 return ret;
4873
4874         /* If this file was open for write, then erase contents */
4875         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4876                 int cpu = tracing_get_cpu(inode);
4877                 struct array_buffer *trace_buf = &tr->array_buffer;
4878
4879 #ifdef CONFIG_TRACER_MAX_TRACE
4880                 if (tr->current_trace->print_max)
4881                         trace_buf = &tr->max_buffer;
4882 #endif
4883
4884                 if (cpu == RING_BUFFER_ALL_CPUS)
4885                         tracing_reset_online_cpus(trace_buf);
4886                 else
4887                         tracing_reset_cpu(trace_buf, cpu);
4888         }
4889
4890         if (file->f_mode & FMODE_READ) {
4891                 iter = __tracing_open(inode, file, false);
4892                 if (IS_ERR(iter))
4893                         ret = PTR_ERR(iter);
4894                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4895                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4896         }
4897
4898         if (ret < 0)
4899                 trace_array_put(tr);
4900
4901         return ret;
4902 }
4903
4904 /*
4905  * Some tracers are not suitable for instance buffers.
4906  * A tracer is always available for the global array (toplevel)
4907  * or if it explicitly states that it is.
4908  */
4909 static bool
4910 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4911 {
4912         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4913 }
4914
4915 /* Find the next tracer that this trace array may use */
4916 static struct tracer *
4917 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4918 {
4919         while (t && !trace_ok_for_array(t, tr))
4920                 t = t->next;
4921
4922         return t;
4923 }
4924
4925 static void *
4926 t_next(struct seq_file *m, void *v, loff_t *pos)
4927 {
4928         struct trace_array *tr = m->private;
4929         struct tracer *t = v;
4930
4931         (*pos)++;
4932
4933         if (t)
4934                 t = get_tracer_for_array(tr, t->next);
4935
4936         return t;
4937 }
4938
4939 static void *t_start(struct seq_file *m, loff_t *pos)
4940 {
4941         struct trace_array *tr = m->private;
4942         struct tracer *t;
4943         loff_t l = 0;
4944
4945         mutex_lock(&trace_types_lock);
4946
4947         t = get_tracer_for_array(tr, trace_types);
4948         for (; t && l < *pos; t = t_next(m, t, &l))
4949                         ;
4950
4951         return t;
4952 }
4953
4954 static void t_stop(struct seq_file *m, void *p)
4955 {
4956         mutex_unlock(&trace_types_lock);
4957 }
4958
4959 static int t_show(struct seq_file *m, void *v)
4960 {
4961         struct tracer *t = v;
4962
4963         if (!t)
4964                 return 0;
4965
4966         seq_puts(m, t->name);
4967         if (t->next)
4968                 seq_putc(m, ' ');
4969         else
4970                 seq_putc(m, '\n');
4971
4972         return 0;
4973 }
4974
4975 static const struct seq_operations show_traces_seq_ops = {
4976         .start          = t_start,
4977         .next           = t_next,
4978         .stop           = t_stop,
4979         .show           = t_show,
4980 };
4981
4982 static int show_traces_open(struct inode *inode, struct file *file)
4983 {
4984         struct trace_array *tr = inode->i_private;
4985         struct seq_file *m;
4986         int ret;
4987
4988         ret = tracing_check_open_get_tr(tr);
4989         if (ret)
4990                 return ret;
4991
4992         ret = seq_open(file, &show_traces_seq_ops);
4993         if (ret) {
4994                 trace_array_put(tr);
4995                 return ret;
4996         }
4997
4998         m = file->private_data;
4999         m->private = tr;
5000
5001         return 0;
5002 }
5003
5004 static int show_traces_release(struct inode *inode, struct file *file)
5005 {
5006         struct trace_array *tr = inode->i_private;
5007
5008         trace_array_put(tr);
5009         return seq_release(inode, file);
5010 }
5011
5012 static ssize_t
5013 tracing_write_stub(struct file *filp, const char __user *ubuf,
5014                    size_t count, loff_t *ppos)
5015 {
5016         return count;
5017 }
5018
5019 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5020 {
5021         int ret;
5022
5023         if (file->f_mode & FMODE_READ)
5024                 ret = seq_lseek(file, offset, whence);
5025         else
5026                 file->f_pos = ret = 0;
5027
5028         return ret;
5029 }
5030
5031 static const struct file_operations tracing_fops = {
5032         .open           = tracing_open,
5033         .read           = seq_read,
5034         .write          = tracing_write_stub,
5035         .llseek         = tracing_lseek,
5036         .release        = tracing_release,
5037 };
5038
5039 static const struct file_operations show_traces_fops = {
5040         .open           = show_traces_open,
5041         .read           = seq_read,
5042         .llseek         = seq_lseek,
5043         .release        = show_traces_release,
5044 };
5045
5046 static ssize_t
5047 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5048                      size_t count, loff_t *ppos)
5049 {
5050         struct trace_array *tr = file_inode(filp)->i_private;
5051         char *mask_str;
5052         int len;
5053
5054         len = snprintf(NULL, 0, "%*pb\n",
5055                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5056         mask_str = kmalloc(len, GFP_KERNEL);
5057         if (!mask_str)
5058                 return -ENOMEM;
5059
5060         len = snprintf(mask_str, len, "%*pb\n",
5061                        cpumask_pr_args(tr->tracing_cpumask));
5062         if (len >= count) {
5063                 count = -EINVAL;
5064                 goto out_err;
5065         }
5066         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5067
5068 out_err:
5069         kfree(mask_str);
5070
5071         return count;
5072 }
5073
5074 int tracing_set_cpumask(struct trace_array *tr,
5075                         cpumask_var_t tracing_cpumask_new)
5076 {
5077         int cpu;
5078
5079         if (!tr)
5080                 return -EINVAL;
5081
5082         local_irq_disable();
5083         arch_spin_lock(&tr->max_lock);
5084         for_each_tracing_cpu(cpu) {
5085                 /*
5086                  * Increase/decrease the disabled counter if we are
5087                  * about to flip a bit in the cpumask:
5088                  */
5089                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5090                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5091                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5092                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5093                 }
5094                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5095                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5096                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5097                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5098                 }
5099         }
5100         arch_spin_unlock(&tr->max_lock);
5101         local_irq_enable();
5102
5103         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5104
5105         return 0;
5106 }
5107
5108 static ssize_t
5109 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5110                       size_t count, loff_t *ppos)
5111 {
5112         struct trace_array *tr = file_inode(filp)->i_private;
5113         cpumask_var_t tracing_cpumask_new;
5114         int err;
5115
5116         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5117                 return -ENOMEM;
5118
5119         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5120         if (err)
5121                 goto err_free;
5122
5123         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5124         if (err)
5125                 goto err_free;
5126
5127         free_cpumask_var(tracing_cpumask_new);
5128
5129         return count;
5130
5131 err_free:
5132         free_cpumask_var(tracing_cpumask_new);
5133
5134         return err;
5135 }
5136
5137 static const struct file_operations tracing_cpumask_fops = {
5138         .open           = tracing_open_generic_tr,
5139         .read           = tracing_cpumask_read,
5140         .write          = tracing_cpumask_write,
5141         .release        = tracing_release_generic_tr,
5142         .llseek         = generic_file_llseek,
5143 };
5144
5145 static int tracing_trace_options_show(struct seq_file *m, void *v)
5146 {
5147         struct tracer_opt *trace_opts;
5148         struct trace_array *tr = m->private;
5149         u32 tracer_flags;
5150         int i;
5151
5152         mutex_lock(&trace_types_lock);
5153         tracer_flags = tr->current_trace->flags->val;
5154         trace_opts = tr->current_trace->flags->opts;
5155
5156         for (i = 0; trace_options[i]; i++) {
5157                 if (tr->trace_flags & (1 << i))
5158                         seq_printf(m, "%s\n", trace_options[i]);
5159                 else
5160                         seq_printf(m, "no%s\n", trace_options[i]);
5161         }
5162
5163         for (i = 0; trace_opts[i].name; i++) {
5164                 if (tracer_flags & trace_opts[i].bit)
5165                         seq_printf(m, "%s\n", trace_opts[i].name);
5166                 else
5167                         seq_printf(m, "no%s\n", trace_opts[i].name);
5168         }
5169         mutex_unlock(&trace_types_lock);
5170
5171         return 0;
5172 }
5173
5174 static int __set_tracer_option(struct trace_array *tr,
5175                                struct tracer_flags *tracer_flags,
5176                                struct tracer_opt *opts, int neg)
5177 {
5178         struct tracer *trace = tracer_flags->trace;
5179         int ret;
5180
5181         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5182         if (ret)
5183                 return ret;
5184
5185         if (neg)
5186                 tracer_flags->val &= ~opts->bit;
5187         else
5188                 tracer_flags->val |= opts->bit;
5189         return 0;
5190 }
5191
5192 /* Try to assign a tracer specific option */
5193 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5194 {
5195         struct tracer *trace = tr->current_trace;
5196         struct tracer_flags *tracer_flags = trace->flags;
5197         struct tracer_opt *opts = NULL;
5198         int i;
5199
5200         for (i = 0; tracer_flags->opts[i].name; i++) {
5201                 opts = &tracer_flags->opts[i];
5202
5203                 if (strcmp(cmp, opts->name) == 0)
5204                         return __set_tracer_option(tr, trace->flags, opts, neg);
5205         }
5206
5207         return -EINVAL;
5208 }
5209
5210 /* Some tracers require overwrite to stay enabled */
5211 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5212 {
5213         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5214                 return -1;
5215
5216         return 0;
5217 }
5218
5219 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5220 {
5221         if ((mask == TRACE_ITER_RECORD_TGID) ||
5222             (mask == TRACE_ITER_RECORD_CMD))
5223                 lockdep_assert_held(&event_mutex);
5224
5225         /* do nothing if flag is already set */
5226         if (!!(tr->trace_flags & mask) == !!enabled)
5227                 return 0;
5228
5229         /* Give the tracer a chance to approve the change */
5230         if (tr->current_trace->flag_changed)
5231                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5232                         return -EINVAL;
5233
5234         if (enabled)
5235                 tr->trace_flags |= mask;
5236         else
5237                 tr->trace_flags &= ~mask;
5238
5239         if (mask == TRACE_ITER_RECORD_CMD)
5240                 trace_event_enable_cmd_record(enabled);
5241
5242         if (mask == TRACE_ITER_RECORD_TGID) {
5243                 if (!tgid_map)
5244                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5245                                            sizeof(*tgid_map),
5246                                            GFP_KERNEL);
5247                 if (!tgid_map) {
5248                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5249                         return -ENOMEM;
5250                 }
5251
5252                 trace_event_enable_tgid_record(enabled);
5253         }
5254
5255         if (mask == TRACE_ITER_EVENT_FORK)
5256                 trace_event_follow_fork(tr, enabled);
5257
5258         if (mask == TRACE_ITER_FUNC_FORK)
5259                 ftrace_pid_follow_fork(tr, enabled);
5260
5261         if (mask == TRACE_ITER_OVERWRITE) {
5262                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5263 #ifdef CONFIG_TRACER_MAX_TRACE
5264                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5265 #endif
5266         }
5267
5268         if (mask == TRACE_ITER_PRINTK) {
5269                 trace_printk_start_stop_comm(enabled);
5270                 trace_printk_control(enabled);
5271         }
5272
5273         return 0;
5274 }
5275
5276 int trace_set_options(struct trace_array *tr, char *option)
5277 {
5278         char *cmp;
5279         int neg = 0;
5280         int ret;
5281         size_t orig_len = strlen(option);
5282         int len;
5283
5284         cmp = strstrip(option);
5285
5286         len = str_has_prefix(cmp, "no");
5287         if (len)
5288                 neg = 1;
5289
5290         cmp += len;
5291
5292         mutex_lock(&event_mutex);
5293         mutex_lock(&trace_types_lock);
5294
5295         ret = match_string(trace_options, -1, cmp);
5296         /* If no option could be set, test the specific tracer options */
5297         if (ret < 0)
5298                 ret = set_tracer_option(tr, cmp, neg);
5299         else
5300                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5301
5302         mutex_unlock(&trace_types_lock);
5303         mutex_unlock(&event_mutex);
5304
5305         /*
5306          * If the first trailing whitespace is replaced with '\0' by strstrip,
5307          * turn it back into a space.
5308          */
5309         if (orig_len > strlen(option))
5310                 option[strlen(option)] = ' ';
5311
5312         return ret;
5313 }
5314
5315 static void __init apply_trace_boot_options(void)
5316 {
5317         char *buf = trace_boot_options_buf;
5318         char *option;
5319
5320         while (true) {
5321                 option = strsep(&buf, ",");
5322
5323                 if (!option)
5324                         break;
5325
5326                 if (*option)
5327                         trace_set_options(&global_trace, option);
5328
5329                 /* Put back the comma to allow this to be called again */
5330                 if (buf)
5331                         *(buf - 1) = ',';
5332         }
5333 }
5334
5335 static ssize_t
5336 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5337                         size_t cnt, loff_t *ppos)
5338 {
5339         struct seq_file *m = filp->private_data;
5340         struct trace_array *tr = m->private;
5341         char buf[64];
5342         int ret;
5343
5344         if (cnt >= sizeof(buf))
5345                 return -EINVAL;
5346
5347         if (copy_from_user(buf, ubuf, cnt))
5348                 return -EFAULT;
5349
5350         buf[cnt] = 0;
5351
5352         ret = trace_set_options(tr, buf);
5353         if (ret < 0)
5354                 return ret;
5355
5356         *ppos += cnt;
5357
5358         return cnt;
5359 }
5360
5361 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5362 {
5363         struct trace_array *tr = inode->i_private;
5364         int ret;
5365
5366         ret = tracing_check_open_get_tr(tr);
5367         if (ret)
5368                 return ret;
5369
5370         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5371         if (ret < 0)
5372                 trace_array_put(tr);
5373
5374         return ret;
5375 }
5376
5377 static const struct file_operations tracing_iter_fops = {
5378         .open           = tracing_trace_options_open,
5379         .read           = seq_read,
5380         .llseek         = seq_lseek,
5381         .release        = tracing_single_release_tr,
5382         .write          = tracing_trace_options_write,
5383 };
5384
5385 static const char readme_msg[] =
5386         "tracing mini-HOWTO:\n\n"
5387         "# echo 0 > tracing_on : quick way to disable tracing\n"
5388         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5389         " Important files:\n"
5390         "  trace\t\t\t- The static contents of the buffer\n"
5391         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5392         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5393         "  current_tracer\t- function and latency tracers\n"
5394         "  available_tracers\t- list of configured tracers for current_tracer\n"
5395         "  error_log\t- error log for failed commands (that support it)\n"
5396         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5397         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5398         "  trace_clock\t\t-change the clock used to order events\n"
5399         "       local:   Per cpu clock but may not be synced across CPUs\n"
5400         "      global:   Synced across CPUs but slows tracing down.\n"
5401         "     counter:   Not a clock, but just an increment\n"
5402         "      uptime:   Jiffy counter from time of boot\n"
5403         "        perf:   Same clock that perf events use\n"
5404 #ifdef CONFIG_X86_64
5405         "     x86-tsc:   TSC cycle counter\n"
5406 #endif
5407         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5408         "       delta:   Delta difference against a buffer-wide timestamp\n"
5409         "    absolute:   Absolute (standalone) timestamp\n"
5410         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5411         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5412         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5413         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5414         "\t\t\t  Remove sub-buffer with rmdir\n"
5415         "  trace_options\t\t- Set format or modify how tracing happens\n"
5416         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5417         "\t\t\t  option name\n"
5418         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5419 #ifdef CONFIG_DYNAMIC_FTRACE
5420         "\n  available_filter_functions - list of functions that can be filtered on\n"
5421         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5422         "\t\t\t  functions\n"
5423         "\t     accepts: func_full_name or glob-matching-pattern\n"
5424         "\t     modules: Can select a group via module\n"
5425         "\t      Format: :mod:<module-name>\n"
5426         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5427         "\t    triggers: a command to perform when function is hit\n"
5428         "\t      Format: <function>:<trigger>[:count]\n"
5429         "\t     trigger: traceon, traceoff\n"
5430         "\t\t      enable_event:<system>:<event>\n"
5431         "\t\t      disable_event:<system>:<event>\n"
5432 #ifdef CONFIG_STACKTRACE
5433         "\t\t      stacktrace\n"
5434 #endif
5435 #ifdef CONFIG_TRACER_SNAPSHOT
5436         "\t\t      snapshot\n"
5437 #endif
5438         "\t\t      dump\n"
5439         "\t\t      cpudump\n"
5440         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5441         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5442         "\t     The first one will disable tracing every time do_fault is hit\n"
5443         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5444         "\t       The first time do trap is hit and it disables tracing, the\n"
5445         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5446         "\t       the counter will not decrement. It only decrements when the\n"
5447         "\t       trigger did work\n"
5448         "\t     To remove trigger without count:\n"
5449         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5450         "\t     To remove trigger with a count:\n"
5451         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5452         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5453         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5454         "\t    modules: Can select a group via module command :mod:\n"
5455         "\t    Does not accept triggers\n"
5456 #endif /* CONFIG_DYNAMIC_FTRACE */
5457 #ifdef CONFIG_FUNCTION_TRACER
5458         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5459         "\t\t    (function)\n"
5460         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5461         "\t\t    (function)\n"
5462 #endif
5463 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5464         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5465         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5466         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5467 #endif
5468 #ifdef CONFIG_TRACER_SNAPSHOT
5469         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5470         "\t\t\t  snapshot buffer. Read the contents for more\n"
5471         "\t\t\t  information\n"
5472 #endif
5473 #ifdef CONFIG_STACK_TRACER
5474         "  stack_trace\t\t- Shows the max stack trace when active\n"
5475         "  stack_max_size\t- Shows current max stack size that was traced\n"
5476         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5477         "\t\t\t  new trace)\n"
5478 #ifdef CONFIG_DYNAMIC_FTRACE
5479         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5480         "\t\t\t  traces\n"
5481 #endif
5482 #endif /* CONFIG_STACK_TRACER */
5483 #ifdef CONFIG_DYNAMIC_EVENTS
5484         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5485         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5486 #endif
5487 #ifdef CONFIG_KPROBE_EVENTS
5488         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5489         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5490 #endif
5491 #ifdef CONFIG_UPROBE_EVENTS
5492         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5493         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5494 #endif
5495 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5496         "\t  accepts: event-definitions (one definition per line)\n"
5497         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5498         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5499 #ifdef CONFIG_HIST_TRIGGERS
5500         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5501 #endif
5502         "\t           -:[<group>/]<event>\n"
5503 #ifdef CONFIG_KPROBE_EVENTS
5504         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5505   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5506 #endif
5507 #ifdef CONFIG_UPROBE_EVENTS
5508   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5509 #endif
5510         "\t     args: <name>=fetcharg[:type]\n"
5511         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5512 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5513         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5514 #else
5515         "\t           $stack<index>, $stack, $retval, $comm,\n"
5516 #endif
5517         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5518         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5519         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5520         "\t           <type>\\[<array-size>\\]\n"
5521 #ifdef CONFIG_HIST_TRIGGERS
5522         "\t    field: <stype> <name>;\n"
5523         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5524         "\t           [unsigned] char/int/long\n"
5525 #endif
5526 #endif
5527         "  events/\t\t- Directory containing all trace event subsystems:\n"
5528         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5529         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5530         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5531         "\t\t\t  events\n"
5532         "      filter\t\t- If set, only events passing filter are traced\n"
5533         "  events/<system>/<event>/\t- Directory containing control files for\n"
5534         "\t\t\t  <event>:\n"
5535         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5536         "      filter\t\t- If set, only events passing filter are traced\n"
5537         "      trigger\t\t- If set, a command to perform when event is hit\n"
5538         "\t    Format: <trigger>[:count][if <filter>]\n"
5539         "\t   trigger: traceon, traceoff\n"
5540         "\t            enable_event:<system>:<event>\n"
5541         "\t            disable_event:<system>:<event>\n"
5542 #ifdef CONFIG_HIST_TRIGGERS
5543         "\t            enable_hist:<system>:<event>\n"
5544         "\t            disable_hist:<system>:<event>\n"
5545 #endif
5546 #ifdef CONFIG_STACKTRACE
5547         "\t\t    stacktrace\n"
5548 #endif
5549 #ifdef CONFIG_TRACER_SNAPSHOT
5550         "\t\t    snapshot\n"
5551 #endif
5552 #ifdef CONFIG_HIST_TRIGGERS
5553         "\t\t    hist (see below)\n"
5554 #endif
5555         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5556         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5557         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5558         "\t                  events/block/block_unplug/trigger\n"
5559         "\t   The first disables tracing every time block_unplug is hit.\n"
5560         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5561         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5562         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5563         "\t   Like function triggers, the counter is only decremented if it\n"
5564         "\t    enabled or disabled tracing.\n"
5565         "\t   To remove a trigger without a count:\n"
5566         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5567         "\t   To remove a trigger with a count:\n"
5568         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5569         "\t   Filters can be ignored when removing a trigger.\n"
5570 #ifdef CONFIG_HIST_TRIGGERS
5571         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5572         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5573         "\t            [:values=<field1[,field2,...]>]\n"
5574         "\t            [:sort=<field1[,field2,...]>]\n"
5575         "\t            [:size=#entries]\n"
5576         "\t            [:pause][:continue][:clear]\n"
5577         "\t            [:name=histname1]\n"
5578         "\t            [:<handler>.<action>]\n"
5579         "\t            [if <filter>]\n\n"
5580         "\t    When a matching event is hit, an entry is added to a hash\n"
5581         "\t    table using the key(s) and value(s) named, and the value of a\n"
5582         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5583         "\t    correspond to fields in the event's format description.  Keys\n"
5584         "\t    can be any field, or the special string 'stacktrace'.\n"
5585         "\t    Compound keys consisting of up to two fields can be specified\n"
5586         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5587         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5588         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5589         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5590         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5591         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5592         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5593         "\t    its histogram data will be shared with other triggers of the\n"
5594         "\t    same name, and trigger hits will update this common data.\n\n"
5595         "\t    Reading the 'hist' file for the event will dump the hash\n"
5596         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5597         "\t    triggers attached to an event, there will be a table for each\n"
5598         "\t    trigger in the output.  The table displayed for a named\n"
5599         "\t    trigger will be the same as any other instance having the\n"
5600         "\t    same name.  The default format used to display a given field\n"
5601         "\t    can be modified by appending any of the following modifiers\n"
5602         "\t    to the field name, as applicable:\n\n"
5603         "\t            .hex        display a number as a hex value\n"
5604         "\t            .sym        display an address as a symbol\n"
5605         "\t            .sym-offset display an address as a symbol and offset\n"
5606         "\t            .execname   display a common_pid as a program name\n"
5607         "\t            .syscall    display a syscall id as a syscall name\n"
5608         "\t            .log2       display log2 value rather than raw number\n"
5609         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5610         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5611         "\t    trigger or to start a hist trigger but not log any events\n"
5612         "\t    until told to do so.  'continue' can be used to start or\n"
5613         "\t    restart a paused hist trigger.\n\n"
5614         "\t    The 'clear' parameter will clear the contents of a running\n"
5615         "\t    hist trigger and leave its current paused/active state\n"
5616         "\t    unchanged.\n\n"
5617         "\t    The enable_hist and disable_hist triggers can be used to\n"
5618         "\t    have one event conditionally start and stop another event's\n"
5619         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5620         "\t    the enable_event and disable_event triggers.\n\n"
5621         "\t    Hist trigger handlers and actions are executed whenever a\n"
5622         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5623         "\t        <handler>.<action>\n\n"
5624         "\t    The available handlers are:\n\n"
5625         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5626         "\t        onmax(var)               - invoke if var exceeds current max\n"
5627         "\t        onchange(var)            - invoke action if var changes\n\n"
5628         "\t    The available actions are:\n\n"
5629         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5630         "\t        save(field,...)                      - save current event fields\n"
5631 #ifdef CONFIG_TRACER_SNAPSHOT
5632         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5633 #endif
5634 #ifdef CONFIG_SYNTH_EVENTS
5635         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5636         "\t  Write into this file to define/undefine new synthetic events.\n"
5637         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5638 #endif
5639 #endif
5640 ;
5641
5642 static ssize_t
5643 tracing_readme_read(struct file *filp, char __user *ubuf,
5644                        size_t cnt, loff_t *ppos)
5645 {
5646         return simple_read_from_buffer(ubuf, cnt, ppos,
5647                                         readme_msg, strlen(readme_msg));
5648 }
5649
5650 static const struct file_operations tracing_readme_fops = {
5651         .open           = tracing_open_generic,
5652         .read           = tracing_readme_read,
5653         .llseek         = generic_file_llseek,
5654 };
5655
5656 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5657 {
5658         int *ptr = v;
5659
5660         if (*pos || m->count)
5661                 ptr++;
5662
5663         (*pos)++;
5664
5665         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5666                 if (trace_find_tgid(*ptr))
5667                         return ptr;
5668         }
5669
5670         return NULL;
5671 }
5672
5673 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5674 {
5675         void *v;
5676         loff_t l = 0;
5677
5678         if (!tgid_map)
5679                 return NULL;
5680
5681         v = &tgid_map[0];
5682         while (l <= *pos) {
5683                 v = saved_tgids_next(m, v, &l);
5684                 if (!v)
5685                         return NULL;
5686         }
5687
5688         return v;
5689 }
5690
5691 static void saved_tgids_stop(struct seq_file *m, void *v)
5692 {
5693 }
5694
5695 static int saved_tgids_show(struct seq_file *m, void *v)
5696 {
5697         int pid = (int *)v - tgid_map;
5698
5699         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5700         return 0;
5701 }
5702
5703 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5704         .start          = saved_tgids_start,
5705         .stop           = saved_tgids_stop,
5706         .next           = saved_tgids_next,
5707         .show           = saved_tgids_show,
5708 };
5709
5710 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5711 {
5712         int ret;
5713
5714         ret = tracing_check_open_get_tr(NULL);
5715         if (ret)
5716                 return ret;
5717
5718         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5719 }
5720
5721
5722 static const struct file_operations tracing_saved_tgids_fops = {
5723         .open           = tracing_saved_tgids_open,
5724         .read           = seq_read,
5725         .llseek         = seq_lseek,
5726         .release        = seq_release,
5727 };
5728
5729 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5730 {
5731         unsigned int *ptr = v;
5732
5733         if (*pos || m->count)
5734                 ptr++;
5735
5736         (*pos)++;
5737
5738         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5739              ptr++) {
5740                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5741                         continue;
5742
5743                 return ptr;
5744         }
5745
5746         return NULL;
5747 }
5748
5749 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5750 {
5751         void *v;
5752         loff_t l = 0;
5753
5754         preempt_disable();
5755         arch_spin_lock(&trace_cmdline_lock);
5756
5757         v = &savedcmd->map_cmdline_to_pid[0];
5758         while (l <= *pos) {
5759                 v = saved_cmdlines_next(m, v, &l);
5760                 if (!v)
5761                         return NULL;
5762         }
5763
5764         return v;
5765 }
5766
5767 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5768 {
5769         arch_spin_unlock(&trace_cmdline_lock);
5770         preempt_enable();
5771 }
5772
5773 static int saved_cmdlines_show(struct seq_file *m, void *v)
5774 {
5775         char buf[TASK_COMM_LEN];
5776         unsigned int *pid = v;
5777
5778         __trace_find_cmdline(*pid, buf);
5779         seq_printf(m, "%d %s\n", *pid, buf);
5780         return 0;
5781 }
5782
5783 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5784         .start          = saved_cmdlines_start,
5785         .next           = saved_cmdlines_next,
5786         .stop           = saved_cmdlines_stop,
5787         .show           = saved_cmdlines_show,
5788 };
5789
5790 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5791 {
5792         int ret;
5793
5794         ret = tracing_check_open_get_tr(NULL);
5795         if (ret)
5796                 return ret;
5797
5798         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5799 }
5800
5801 static const struct file_operations tracing_saved_cmdlines_fops = {
5802         .open           = tracing_saved_cmdlines_open,
5803         .read           = seq_read,
5804         .llseek         = seq_lseek,
5805         .release        = seq_release,
5806 };
5807
5808 static ssize_t
5809 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5810                                  size_t cnt, loff_t *ppos)
5811 {
5812         char buf[64];
5813         int r;
5814
5815         arch_spin_lock(&trace_cmdline_lock);
5816         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5817         arch_spin_unlock(&trace_cmdline_lock);
5818
5819         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5820 }
5821
5822 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5823 {
5824         kfree(s->saved_cmdlines);
5825         kfree(s->map_cmdline_to_pid);
5826         kfree(s);
5827 }
5828
5829 static int tracing_resize_saved_cmdlines(unsigned int val)
5830 {
5831         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5832
5833         s = kmalloc(sizeof(*s), GFP_KERNEL);
5834         if (!s)
5835                 return -ENOMEM;
5836
5837         if (allocate_cmdlines_buffer(val, s) < 0) {
5838                 kfree(s);
5839                 return -ENOMEM;
5840         }
5841
5842         arch_spin_lock(&trace_cmdline_lock);
5843         savedcmd_temp = savedcmd;
5844         savedcmd = s;
5845         arch_spin_unlock(&trace_cmdline_lock);
5846         free_saved_cmdlines_buffer(savedcmd_temp);
5847
5848         return 0;
5849 }
5850
5851 static ssize_t
5852 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5853                                   size_t cnt, loff_t *ppos)
5854 {
5855         unsigned long val;
5856         int ret;
5857
5858         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5859         if (ret)
5860                 return ret;
5861
5862         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5863         if (!val || val > PID_MAX_DEFAULT)
5864                 return -EINVAL;
5865
5866         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5867         if (ret < 0)
5868                 return ret;
5869
5870         *ppos += cnt;
5871
5872         return cnt;
5873 }
5874
5875 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5876         .open           = tracing_open_generic,
5877         .read           = tracing_saved_cmdlines_size_read,
5878         .write          = tracing_saved_cmdlines_size_write,
5879 };
5880
5881 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5882 static union trace_eval_map_item *
5883 update_eval_map(union trace_eval_map_item *ptr)
5884 {
5885         if (!ptr->map.eval_string) {
5886                 if (ptr->tail.next) {
5887                         ptr = ptr->tail.next;
5888                         /* Set ptr to the next real item (skip head) */
5889                         ptr++;
5890                 } else
5891                         return NULL;
5892         }
5893         return ptr;
5894 }
5895
5896 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5897 {
5898         union trace_eval_map_item *ptr = v;
5899
5900         /*
5901          * Paranoid! If ptr points to end, we don't want to increment past it.
5902          * This really should never happen.
5903          */
5904         (*pos)++;
5905         ptr = update_eval_map(ptr);
5906         if (WARN_ON_ONCE(!ptr))
5907                 return NULL;
5908
5909         ptr++;
5910         ptr = update_eval_map(ptr);
5911
5912         return ptr;
5913 }
5914
5915 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5916 {
5917         union trace_eval_map_item *v;
5918         loff_t l = 0;
5919
5920         mutex_lock(&trace_eval_mutex);
5921
5922         v = trace_eval_maps;
5923         if (v)
5924                 v++;
5925
5926         while (v && l < *pos) {
5927                 v = eval_map_next(m, v, &l);
5928         }
5929
5930         return v;
5931 }
5932
5933 static void eval_map_stop(struct seq_file *m, void *v)
5934 {
5935         mutex_unlock(&trace_eval_mutex);
5936 }
5937
5938 static int eval_map_show(struct seq_file *m, void *v)
5939 {
5940         union trace_eval_map_item *ptr = v;
5941
5942         seq_printf(m, "%s %ld (%s)\n",
5943                    ptr->map.eval_string, ptr->map.eval_value,
5944                    ptr->map.system);
5945
5946         return 0;
5947 }
5948
5949 static const struct seq_operations tracing_eval_map_seq_ops = {
5950         .start          = eval_map_start,
5951         .next           = eval_map_next,
5952         .stop           = eval_map_stop,
5953         .show           = eval_map_show,
5954 };
5955
5956 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5957 {
5958         int ret;
5959
5960         ret = tracing_check_open_get_tr(NULL);
5961         if (ret)
5962                 return ret;
5963
5964         return seq_open(filp, &tracing_eval_map_seq_ops);
5965 }
5966
5967 static const struct file_operations tracing_eval_map_fops = {
5968         .open           = tracing_eval_map_open,
5969         .read           = seq_read,
5970         .llseek         = seq_lseek,
5971         .release        = seq_release,
5972 };
5973
5974 static inline union trace_eval_map_item *
5975 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5976 {
5977         /* Return tail of array given the head */
5978         return ptr + ptr->head.length + 1;
5979 }
5980
5981 static void
5982 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5983                            int len)
5984 {
5985         struct trace_eval_map **stop;
5986         struct trace_eval_map **map;
5987         union trace_eval_map_item *map_array;
5988         union trace_eval_map_item *ptr;
5989
5990         stop = start + len;
5991
5992         /*
5993          * The trace_eval_maps contains the map plus a head and tail item,
5994          * where the head holds the module and length of array, and the
5995          * tail holds a pointer to the next list.
5996          */
5997         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5998         if (!map_array) {
5999                 pr_warn("Unable to allocate trace eval mapping\n");
6000                 return;
6001         }
6002
6003         mutex_lock(&trace_eval_mutex);
6004
6005         if (!trace_eval_maps)
6006                 trace_eval_maps = map_array;
6007         else {
6008                 ptr = trace_eval_maps;
6009                 for (;;) {
6010                         ptr = trace_eval_jmp_to_tail(ptr);
6011                         if (!ptr->tail.next)
6012                                 break;
6013                         ptr = ptr->tail.next;
6014
6015                 }
6016                 ptr->tail.next = map_array;
6017         }
6018         map_array->head.mod = mod;
6019         map_array->head.length = len;
6020         map_array++;
6021
6022         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6023                 map_array->map = **map;
6024                 map_array++;
6025         }
6026         memset(map_array, 0, sizeof(*map_array));
6027
6028         mutex_unlock(&trace_eval_mutex);
6029 }
6030
6031 static void trace_create_eval_file(struct dentry *d_tracer)
6032 {
6033         trace_create_file("eval_map", 0444, d_tracer,
6034                           NULL, &tracing_eval_map_fops);
6035 }
6036
6037 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6038 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6039 static inline void trace_insert_eval_map_file(struct module *mod,
6040                               struct trace_eval_map **start, int len) { }
6041 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6042
6043 static void trace_insert_eval_map(struct module *mod,
6044                                   struct trace_eval_map **start, int len)
6045 {
6046         struct trace_eval_map **map;
6047
6048         if (len <= 0)
6049                 return;
6050
6051         map = start;
6052
6053         trace_event_eval_update(map, len);
6054
6055         trace_insert_eval_map_file(mod, start, len);
6056 }
6057
6058 static ssize_t
6059 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6060                        size_t cnt, loff_t *ppos)
6061 {
6062         struct trace_array *tr = filp->private_data;
6063         char buf[MAX_TRACER_SIZE+2];
6064         int r;
6065
6066         mutex_lock(&trace_types_lock);
6067         r = sprintf(buf, "%s\n", tr->current_trace->name);
6068         mutex_unlock(&trace_types_lock);
6069
6070         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6071 }
6072
6073 int tracer_init(struct tracer *t, struct trace_array *tr)
6074 {
6075         tracing_reset_online_cpus(&tr->array_buffer);
6076         return t->init(tr);
6077 }
6078
6079 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6080 {
6081         int cpu;
6082
6083         for_each_tracing_cpu(cpu)
6084                 per_cpu_ptr(buf->data, cpu)->entries = val;
6085 }
6086
6087 #ifdef CONFIG_TRACER_MAX_TRACE
6088 /* resize @tr's buffer to the size of @size_tr's entries */
6089 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6090                                         struct array_buffer *size_buf, int cpu_id)
6091 {
6092         int cpu, ret = 0;
6093
6094         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6095                 for_each_tracing_cpu(cpu) {
6096                         ret = ring_buffer_resize(trace_buf->buffer,
6097                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6098                         if (ret < 0)
6099                                 break;
6100                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6101                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6102                 }
6103         } else {
6104                 ret = ring_buffer_resize(trace_buf->buffer,
6105                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6106                 if (ret == 0)
6107                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6108                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6109         }
6110
6111         return ret;
6112 }
6113 #endif /* CONFIG_TRACER_MAX_TRACE */
6114
6115 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6116                                         unsigned long size, int cpu)
6117 {
6118         int ret;
6119
6120         /*
6121          * If kernel or user changes the size of the ring buffer
6122          * we use the size that was given, and we can forget about
6123          * expanding it later.
6124          */
6125         ring_buffer_expanded = true;
6126
6127         /* May be called before buffers are initialized */
6128         if (!tr->array_buffer.buffer)
6129                 return 0;
6130
6131         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6132         if (ret < 0)
6133                 return ret;
6134
6135 #ifdef CONFIG_TRACER_MAX_TRACE
6136         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6137             !tr->current_trace->use_max_tr)
6138                 goto out;
6139
6140         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6141         if (ret < 0) {
6142                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6143                                                      &tr->array_buffer, cpu);
6144                 if (r < 0) {
6145                         /*
6146                          * AARGH! We are left with different
6147                          * size max buffer!!!!
6148                          * The max buffer is our "snapshot" buffer.
6149                          * When a tracer needs a snapshot (one of the
6150                          * latency tracers), it swaps the max buffer
6151                          * with the saved snap shot. We succeeded to
6152                          * update the size of the main buffer, but failed to
6153                          * update the size of the max buffer. But when we tried
6154                          * to reset the main buffer to the original size, we
6155                          * failed there too. This is very unlikely to
6156                          * happen, but if it does, warn and kill all
6157                          * tracing.
6158                          */
6159                         WARN_ON(1);
6160                         tracing_disabled = 1;
6161                 }
6162                 return ret;
6163         }
6164
6165         if (cpu == RING_BUFFER_ALL_CPUS)
6166                 set_buffer_entries(&tr->max_buffer, size);
6167         else
6168                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6169
6170  out:
6171 #endif /* CONFIG_TRACER_MAX_TRACE */
6172
6173         if (cpu == RING_BUFFER_ALL_CPUS)
6174                 set_buffer_entries(&tr->array_buffer, size);
6175         else
6176                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6177
6178         return ret;
6179 }
6180
6181 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6182                                   unsigned long size, int cpu_id)
6183 {
6184         int ret;
6185
6186         mutex_lock(&trace_types_lock);
6187
6188         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6189                 /* make sure, this cpu is enabled in the mask */
6190                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6191                         ret = -EINVAL;
6192                         goto out;
6193                 }
6194         }
6195
6196         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6197         if (ret < 0)
6198                 ret = -ENOMEM;
6199
6200 out:
6201         mutex_unlock(&trace_types_lock);
6202
6203         return ret;
6204 }
6205
6206
6207 /**
6208  * tracing_update_buffers - used by tracing facility to expand ring buffers
6209  *
6210  * To save on memory when the tracing is never used on a system with it
6211  * configured in. The ring buffers are set to a minimum size. But once
6212  * a user starts to use the tracing facility, then they need to grow
6213  * to their default size.
6214  *
6215  * This function is to be called when a tracer is about to be used.
6216  */
6217 int tracing_update_buffers(void)
6218 {
6219         int ret = 0;
6220
6221         mutex_lock(&trace_types_lock);
6222         if (!ring_buffer_expanded)
6223                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6224                                                 RING_BUFFER_ALL_CPUS);
6225         mutex_unlock(&trace_types_lock);
6226
6227         return ret;
6228 }
6229
6230 struct trace_option_dentry;
6231
6232 static void
6233 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6234
6235 /*
6236  * Used to clear out the tracer before deletion of an instance.
6237  * Must have trace_types_lock held.
6238  */
6239 static void tracing_set_nop(struct trace_array *tr)
6240 {
6241         if (tr->current_trace == &nop_trace)
6242                 return;
6243         
6244         tr->current_trace->enabled--;
6245
6246         if (tr->current_trace->reset)
6247                 tr->current_trace->reset(tr);
6248
6249         tr->current_trace = &nop_trace;
6250 }
6251
6252 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6253 {
6254         /* Only enable if the directory has been created already. */
6255         if (!tr->dir)
6256                 return;
6257
6258         create_trace_option_files(tr, t);
6259 }
6260
6261 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6262 {
6263         struct tracer *t;
6264 #ifdef CONFIG_TRACER_MAX_TRACE
6265         bool had_max_tr;
6266 #endif
6267         int ret = 0;
6268
6269         mutex_lock(&trace_types_lock);
6270
6271         if (!ring_buffer_expanded) {
6272                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6273                                                 RING_BUFFER_ALL_CPUS);
6274                 if (ret < 0)
6275                         goto out;
6276                 ret = 0;
6277         }
6278
6279         for (t = trace_types; t; t = t->next) {
6280                 if (strcmp(t->name, buf) == 0)
6281                         break;
6282         }
6283         if (!t) {
6284                 ret = -EINVAL;
6285                 goto out;
6286         }
6287         if (t == tr->current_trace)
6288                 goto out;
6289
6290 #ifdef CONFIG_TRACER_SNAPSHOT
6291         if (t->use_max_tr) {
6292                 arch_spin_lock(&tr->max_lock);
6293                 if (tr->cond_snapshot)
6294                         ret = -EBUSY;
6295                 arch_spin_unlock(&tr->max_lock);
6296                 if (ret)
6297                         goto out;
6298         }
6299 #endif
6300         /* Some tracers won't work on kernel command line */
6301         if (system_state < SYSTEM_RUNNING && t->noboot) {
6302                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6303                         t->name);
6304                 goto out;
6305         }
6306
6307         /* Some tracers are only allowed for the top level buffer */
6308         if (!trace_ok_for_array(t, tr)) {
6309                 ret = -EINVAL;
6310                 goto out;
6311         }
6312
6313         /* If trace pipe files are being read, we can't change the tracer */
6314         if (tr->trace_ref) {
6315                 ret = -EBUSY;
6316                 goto out;
6317         }
6318
6319         trace_branch_disable();
6320
6321         tr->current_trace->enabled--;
6322
6323         if (tr->current_trace->reset)
6324                 tr->current_trace->reset(tr);
6325
6326         /* Current trace needs to be nop_trace before synchronize_rcu */
6327         tr->current_trace = &nop_trace;
6328
6329 #ifdef CONFIG_TRACER_MAX_TRACE
6330         had_max_tr = tr->allocated_snapshot;
6331
6332         if (had_max_tr && !t->use_max_tr) {
6333                 /*
6334                  * We need to make sure that the update_max_tr sees that
6335                  * current_trace changed to nop_trace to keep it from
6336                  * swapping the buffers after we resize it.
6337                  * The update_max_tr is called from interrupts disabled
6338                  * so a synchronized_sched() is sufficient.
6339                  */
6340                 synchronize_rcu();
6341                 free_snapshot(tr);
6342         }
6343 #endif
6344
6345 #ifdef CONFIG_TRACER_MAX_TRACE
6346         if (t->use_max_tr && !had_max_tr) {
6347                 ret = tracing_alloc_snapshot_instance(tr);
6348                 if (ret < 0)
6349                         goto out;
6350         }
6351 #endif
6352
6353         if (t->init) {
6354                 ret = tracer_init(t, tr);
6355                 if (ret)
6356                         goto out;
6357         }
6358
6359         tr->current_trace = t;
6360         tr->current_trace->enabled++;
6361         trace_branch_enable(tr);
6362  out:
6363         mutex_unlock(&trace_types_lock);
6364
6365         return ret;
6366 }
6367
6368 static ssize_t
6369 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6370                         size_t cnt, loff_t *ppos)
6371 {
6372         struct trace_array *tr = filp->private_data;
6373         char buf[MAX_TRACER_SIZE+1];
6374         int i;
6375         size_t ret;
6376         int err;
6377
6378         ret = cnt;
6379
6380         if (cnt > MAX_TRACER_SIZE)
6381                 cnt = MAX_TRACER_SIZE;
6382
6383         if (copy_from_user(buf, ubuf, cnt))
6384                 return -EFAULT;
6385
6386         buf[cnt] = 0;
6387
6388         /* strip ending whitespace. */
6389         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6390                 buf[i] = 0;
6391
6392         err = tracing_set_tracer(tr, buf);
6393         if (err)
6394                 return err;
6395
6396         *ppos += ret;
6397
6398         return ret;
6399 }
6400
6401 static ssize_t
6402 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6403                    size_t cnt, loff_t *ppos)
6404 {
6405         char buf[64];
6406         int r;
6407
6408         r = snprintf(buf, sizeof(buf), "%ld\n",
6409                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6410         if (r > sizeof(buf))
6411                 r = sizeof(buf);
6412         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6413 }
6414
6415 static ssize_t
6416 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6417                     size_t cnt, loff_t *ppos)
6418 {
6419         unsigned long val;
6420         int ret;
6421
6422         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6423         if (ret)
6424                 return ret;
6425
6426         *ptr = val * 1000;
6427
6428         return cnt;
6429 }
6430
6431 static ssize_t
6432 tracing_thresh_read(struct file *filp, char __user *ubuf,
6433                     size_t cnt, loff_t *ppos)
6434 {
6435         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6436 }
6437
6438 static ssize_t
6439 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6440                      size_t cnt, loff_t *ppos)
6441 {
6442         struct trace_array *tr = filp->private_data;
6443         int ret;
6444
6445         mutex_lock(&trace_types_lock);
6446         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6447         if (ret < 0)
6448                 goto out;
6449
6450         if (tr->current_trace->update_thresh) {
6451                 ret = tr->current_trace->update_thresh(tr);
6452                 if (ret < 0)
6453                         goto out;
6454         }
6455
6456         ret = cnt;
6457 out:
6458         mutex_unlock(&trace_types_lock);
6459
6460         return ret;
6461 }
6462
6463 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6464
6465 static ssize_t
6466 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6467                      size_t cnt, loff_t *ppos)
6468 {
6469         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6470 }
6471
6472 static ssize_t
6473 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6474                       size_t cnt, loff_t *ppos)
6475 {
6476         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6477 }
6478
6479 #endif
6480
6481 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6482 {
6483         struct trace_array *tr = inode->i_private;
6484         struct trace_iterator *iter;
6485         int ret;
6486
6487         ret = tracing_check_open_get_tr(tr);
6488         if (ret)
6489                 return ret;
6490
6491         mutex_lock(&trace_types_lock);
6492
6493         /* create a buffer to store the information to pass to userspace */
6494         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6495         if (!iter) {
6496                 ret = -ENOMEM;
6497                 __trace_array_put(tr);
6498                 goto out;
6499         }
6500
6501         trace_seq_init(&iter->seq);
6502         iter->trace = tr->current_trace;
6503
6504         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6505                 ret = -ENOMEM;
6506                 goto fail;
6507         }
6508
6509         /* trace pipe does not show start of buffer */
6510         cpumask_setall(iter->started);
6511
6512         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6513                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6514
6515         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6516         if (trace_clocks[tr->clock_id].in_ns)
6517                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6518
6519         iter->tr = tr;
6520         iter->array_buffer = &tr->array_buffer;
6521         iter->cpu_file = tracing_get_cpu(inode);
6522         mutex_init(&iter->mutex);
6523         filp->private_data = iter;
6524
6525         if (iter->trace->pipe_open)
6526                 iter->trace->pipe_open(iter);
6527
6528         nonseekable_open(inode, filp);
6529
6530         tr->trace_ref++;
6531 out:
6532         mutex_unlock(&trace_types_lock);
6533         return ret;
6534
6535 fail:
6536         kfree(iter);
6537         __trace_array_put(tr);
6538         mutex_unlock(&trace_types_lock);
6539         return ret;
6540 }
6541
6542 static int tracing_release_pipe(struct inode *inode, struct file *file)
6543 {
6544         struct trace_iterator *iter = file->private_data;
6545         struct trace_array *tr = inode->i_private;
6546
6547         mutex_lock(&trace_types_lock);
6548
6549         tr->trace_ref--;
6550
6551         if (iter->trace->pipe_close)
6552                 iter->trace->pipe_close(iter);
6553
6554         mutex_unlock(&trace_types_lock);
6555
6556         free_cpumask_var(iter->started);
6557         mutex_destroy(&iter->mutex);
6558         kfree(iter);
6559
6560         trace_array_put(tr);
6561
6562         return 0;
6563 }
6564
6565 static __poll_t
6566 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6567 {
6568         struct trace_array *tr = iter->tr;
6569
6570         /* Iterators are static, they should be filled or empty */
6571         if (trace_buffer_iter(iter, iter->cpu_file))
6572                 return EPOLLIN | EPOLLRDNORM;
6573
6574         if (tr->trace_flags & TRACE_ITER_BLOCK)
6575                 /*
6576                  * Always select as readable when in blocking mode
6577                  */
6578                 return EPOLLIN | EPOLLRDNORM;
6579         else
6580                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6581                                              filp, poll_table);
6582 }
6583
6584 static __poll_t
6585 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6586 {
6587         struct trace_iterator *iter = filp->private_data;
6588
6589         return trace_poll(iter, filp, poll_table);
6590 }
6591
6592 /* Must be called with iter->mutex held. */
6593 static int tracing_wait_pipe(struct file *filp)
6594 {
6595         struct trace_iterator *iter = filp->private_data;
6596         int ret;
6597
6598         while (trace_empty(iter)) {
6599
6600                 if ((filp->f_flags & O_NONBLOCK)) {
6601                         return -EAGAIN;
6602                 }
6603
6604                 /*
6605                  * We block until we read something and tracing is disabled.
6606                  * We still block if tracing is disabled, but we have never
6607                  * read anything. This allows a user to cat this file, and
6608                  * then enable tracing. But after we have read something,
6609                  * we give an EOF when tracing is again disabled.
6610                  *
6611                  * iter->pos will be 0 if we haven't read anything.
6612                  */
6613                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6614                         break;
6615
6616                 mutex_unlock(&iter->mutex);
6617
6618                 ret = wait_on_pipe(iter, 0);
6619
6620                 mutex_lock(&iter->mutex);
6621
6622                 if (ret)
6623                         return ret;
6624         }
6625
6626         return 1;
6627 }
6628
6629 /*
6630  * Consumer reader.
6631  */
6632 static ssize_t
6633 tracing_read_pipe(struct file *filp, char __user *ubuf,
6634                   size_t cnt, loff_t *ppos)
6635 {
6636         struct trace_iterator *iter = filp->private_data;
6637         ssize_t sret;
6638
6639         /*
6640          * Avoid more than one consumer on a single file descriptor
6641          * This is just a matter of traces coherency, the ring buffer itself
6642          * is protected.
6643          */
6644         mutex_lock(&iter->mutex);
6645
6646         /* return any leftover data */
6647         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6648         if (sret != -EBUSY)
6649                 goto out;
6650
6651         trace_seq_init(&iter->seq);
6652
6653         if (iter->trace->read) {
6654                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6655                 if (sret)
6656                         goto out;
6657         }
6658
6659 waitagain:
6660         sret = tracing_wait_pipe(filp);
6661         if (sret <= 0)
6662                 goto out;
6663
6664         /* stop when tracing is finished */
6665         if (trace_empty(iter)) {
6666                 sret = 0;
6667                 goto out;
6668         }
6669
6670         if (cnt >= PAGE_SIZE)
6671                 cnt = PAGE_SIZE - 1;
6672
6673         /* reset all but tr, trace, and overruns */
6674         memset(&iter->seq, 0,
6675                sizeof(struct trace_iterator) -
6676                offsetof(struct trace_iterator, seq));
6677         cpumask_clear(iter->started);
6678         trace_seq_init(&iter->seq);
6679         iter->pos = -1;
6680
6681         trace_event_read_lock();
6682         trace_access_lock(iter->cpu_file);
6683         while (trace_find_next_entry_inc(iter) != NULL) {
6684                 enum print_line_t ret;
6685                 int save_len = iter->seq.seq.len;
6686
6687                 ret = print_trace_line(iter);
6688                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6689                         /* don't print partial lines */
6690                         iter->seq.seq.len = save_len;
6691                         break;
6692                 }
6693                 if (ret != TRACE_TYPE_NO_CONSUME)
6694                         trace_consume(iter);
6695
6696                 if (trace_seq_used(&iter->seq) >= cnt)
6697                         break;
6698
6699                 /*
6700                  * Setting the full flag means we reached the trace_seq buffer
6701                  * size and we should leave by partial output condition above.
6702                  * One of the trace_seq_* functions is not used properly.
6703                  */
6704                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6705                           iter->ent->type);
6706         }
6707         trace_access_unlock(iter->cpu_file);
6708         trace_event_read_unlock();
6709
6710         /* Now copy what we have to the user */
6711         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6712         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6713                 trace_seq_init(&iter->seq);
6714
6715         /*
6716          * If there was nothing to send to user, in spite of consuming trace
6717          * entries, go back to wait for more entries.
6718          */
6719         if (sret == -EBUSY)
6720                 goto waitagain;
6721
6722 out:
6723         mutex_unlock(&iter->mutex);
6724
6725         return sret;
6726 }
6727
6728 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6729                                      unsigned int idx)
6730 {
6731         __free_page(spd->pages[idx]);
6732 }
6733
6734 static size_t
6735 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6736 {
6737         size_t count;
6738         int save_len;
6739         int ret;
6740
6741         /* Seq buffer is page-sized, exactly what we need. */
6742         for (;;) {
6743                 save_len = iter->seq.seq.len;
6744                 ret = print_trace_line(iter);
6745
6746                 if (trace_seq_has_overflowed(&iter->seq)) {
6747                         iter->seq.seq.len = save_len;
6748                         break;
6749                 }
6750
6751                 /*
6752                  * This should not be hit, because it should only
6753                  * be set if the iter->seq overflowed. But check it
6754                  * anyway to be safe.
6755                  */
6756                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6757                         iter->seq.seq.len = save_len;
6758                         break;
6759                 }
6760
6761                 count = trace_seq_used(&iter->seq) - save_len;
6762                 if (rem < count) {
6763                         rem = 0;
6764                         iter->seq.seq.len = save_len;
6765                         break;
6766                 }
6767
6768                 if (ret != TRACE_TYPE_NO_CONSUME)
6769                         trace_consume(iter);
6770                 rem -= count;
6771                 if (!trace_find_next_entry_inc(iter))   {
6772                         rem = 0;
6773                         iter->ent = NULL;
6774                         break;
6775                 }
6776         }
6777
6778         return rem;
6779 }
6780
6781 static ssize_t tracing_splice_read_pipe(struct file *filp,
6782                                         loff_t *ppos,
6783                                         struct pipe_inode_info *pipe,
6784                                         size_t len,
6785                                         unsigned int flags)
6786 {
6787         struct page *pages_def[PIPE_DEF_BUFFERS];
6788         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6789         struct trace_iterator *iter = filp->private_data;
6790         struct splice_pipe_desc spd = {
6791                 .pages          = pages_def,
6792                 .partial        = partial_def,
6793                 .nr_pages       = 0, /* This gets updated below. */
6794                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6795                 .ops            = &default_pipe_buf_ops,
6796                 .spd_release    = tracing_spd_release_pipe,
6797         };
6798         ssize_t ret;
6799         size_t rem;
6800         unsigned int i;
6801
6802         if (splice_grow_spd(pipe, &spd))
6803                 return -ENOMEM;
6804
6805         mutex_lock(&iter->mutex);
6806
6807         if (iter->trace->splice_read) {
6808                 ret = iter->trace->splice_read(iter, filp,
6809                                                ppos, pipe, len, flags);
6810                 if (ret)
6811                         goto out_err;
6812         }
6813
6814         ret = tracing_wait_pipe(filp);
6815         if (ret <= 0)
6816                 goto out_err;
6817
6818         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6819                 ret = -EFAULT;
6820                 goto out_err;
6821         }
6822
6823         trace_event_read_lock();
6824         trace_access_lock(iter->cpu_file);
6825
6826         /* Fill as many pages as possible. */
6827         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6828                 spd.pages[i] = alloc_page(GFP_KERNEL);
6829                 if (!spd.pages[i])
6830                         break;
6831
6832                 rem = tracing_fill_pipe_page(rem, iter);
6833
6834                 /* Copy the data into the page, so we can start over. */
6835                 ret = trace_seq_to_buffer(&iter->seq,
6836                                           page_address(spd.pages[i]),
6837                                           trace_seq_used(&iter->seq));
6838                 if (ret < 0) {
6839                         __free_page(spd.pages[i]);
6840                         break;
6841                 }
6842                 spd.partial[i].offset = 0;
6843                 spd.partial[i].len = trace_seq_used(&iter->seq);
6844
6845                 trace_seq_init(&iter->seq);
6846         }
6847
6848         trace_access_unlock(iter->cpu_file);
6849         trace_event_read_unlock();
6850         mutex_unlock(&iter->mutex);
6851
6852         spd.nr_pages = i;
6853
6854         if (i)
6855                 ret = splice_to_pipe(pipe, &spd);
6856         else
6857                 ret = 0;
6858 out:
6859         splice_shrink_spd(&spd);
6860         return ret;
6861
6862 out_err:
6863         mutex_unlock(&iter->mutex);
6864         goto out;
6865 }
6866
6867 static ssize_t
6868 tracing_entries_read(struct file *filp, char __user *ubuf,
6869                      size_t cnt, loff_t *ppos)
6870 {
6871         struct inode *inode = file_inode(filp);
6872         struct trace_array *tr = inode->i_private;
6873         int cpu = tracing_get_cpu(inode);
6874         char buf[64];
6875         int r = 0;
6876         ssize_t ret;
6877
6878         mutex_lock(&trace_types_lock);
6879
6880         if (cpu == RING_BUFFER_ALL_CPUS) {
6881                 int cpu, buf_size_same;
6882                 unsigned long size;
6883
6884                 size = 0;
6885                 buf_size_same = 1;
6886                 /* check if all cpu sizes are same */
6887                 for_each_tracing_cpu(cpu) {
6888                         /* fill in the size from first enabled cpu */
6889                         if (size == 0)
6890                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6891                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6892                                 buf_size_same = 0;
6893                                 break;
6894                         }
6895                 }
6896
6897                 if (buf_size_same) {
6898                         if (!ring_buffer_expanded)
6899                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6900                                             size >> 10,
6901                                             trace_buf_size >> 10);
6902                         else
6903                                 r = sprintf(buf, "%lu\n", size >> 10);
6904                 } else
6905                         r = sprintf(buf, "X\n");
6906         } else
6907                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6908
6909         mutex_unlock(&trace_types_lock);
6910
6911         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6912         return ret;
6913 }
6914
6915 static ssize_t
6916 tracing_entries_write(struct file *filp, const char __user *ubuf,
6917                       size_t cnt, loff_t *ppos)
6918 {
6919         struct inode *inode = file_inode(filp);
6920         struct trace_array *tr = inode->i_private;
6921         unsigned long val;
6922         int ret;
6923
6924         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6925         if (ret)
6926                 return ret;
6927
6928         /* must have at least 1 entry */
6929         if (!val)
6930                 return -EINVAL;
6931
6932         /* value is in KB */
6933         val <<= 10;
6934         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6935         if (ret < 0)
6936                 return ret;
6937
6938         *ppos += cnt;
6939
6940         return cnt;
6941 }
6942
6943 static ssize_t
6944 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6945                                 size_t cnt, loff_t *ppos)
6946 {
6947         struct trace_array *tr = filp->private_data;
6948         char buf[64];
6949         int r, cpu;
6950         unsigned long size = 0, expanded_size = 0;
6951
6952         mutex_lock(&trace_types_lock);
6953         for_each_tracing_cpu(cpu) {
6954                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6955                 if (!ring_buffer_expanded)
6956                         expanded_size += trace_buf_size >> 10;
6957         }
6958         if (ring_buffer_expanded)
6959                 r = sprintf(buf, "%lu\n", size);
6960         else
6961                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6962         mutex_unlock(&trace_types_lock);
6963
6964         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6965 }
6966
6967 static ssize_t
6968 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6969                           size_t cnt, loff_t *ppos)
6970 {
6971         /*
6972          * There is no need to read what the user has written, this function
6973          * is just to make sure that there is no error when "echo" is used
6974          */
6975
6976         *ppos += cnt;
6977
6978         return cnt;
6979 }
6980
6981 static int
6982 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6983 {
6984         struct trace_array *tr = inode->i_private;
6985
6986         /* disable tracing ? */
6987         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6988                 tracer_tracing_off(tr);
6989         /* resize the ring buffer to 0 */
6990         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6991
6992         trace_array_put(tr);
6993
6994         return 0;
6995 }
6996
6997 static ssize_t
6998 tracing_mark_write(struct file *filp, const char __user *ubuf,
6999                                         size_t cnt, loff_t *fpos)
7000 {
7001         struct trace_array *tr = filp->private_data;
7002         struct ring_buffer_event *event;
7003         enum event_trigger_type tt = ETT_NONE;
7004         struct trace_buffer *buffer;
7005         struct print_entry *entry;
7006         ssize_t written;
7007         int size;
7008         int len;
7009
7010 /* Used in tracing_mark_raw_write() as well */
7011 #define FAULTED_STR "<faulted>"
7012 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7013
7014         if (tracing_disabled)
7015                 return -EINVAL;
7016
7017         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7018                 return -EINVAL;
7019
7020         if (cnt > TRACE_BUF_SIZE)
7021                 cnt = TRACE_BUF_SIZE;
7022
7023         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7024
7025         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7026
7027         /* If less than "<faulted>", then make sure we can still add that */
7028         if (cnt < FAULTED_SIZE)
7029                 size += FAULTED_SIZE - cnt;
7030
7031         buffer = tr->array_buffer.buffer;
7032         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7033                                             tracing_gen_ctx());
7034         if (unlikely(!event))
7035                 /* Ring buffer disabled, return as if not open for write */
7036                 return -EBADF;
7037
7038         entry = ring_buffer_event_data(event);
7039         entry->ip = _THIS_IP_;
7040
7041         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7042         if (len) {
7043                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7044                 cnt = FAULTED_SIZE;
7045                 written = -EFAULT;
7046         } else
7047                 written = cnt;
7048
7049         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7050                 /* do not add \n before testing triggers, but add \0 */
7051                 entry->buf[cnt] = '\0';
7052                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7053         }
7054
7055         if (entry->buf[cnt - 1] != '\n') {
7056                 entry->buf[cnt] = '\n';
7057                 entry->buf[cnt + 1] = '\0';
7058         } else
7059                 entry->buf[cnt] = '\0';
7060
7061         if (static_branch_unlikely(&trace_marker_exports_enabled))
7062                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7063         __buffer_unlock_commit(buffer, event);
7064
7065         if (tt)
7066                 event_triggers_post_call(tr->trace_marker_file, tt);
7067
7068         if (written > 0)
7069                 *fpos += written;
7070
7071         return written;
7072 }
7073
7074 /* Limit it for now to 3K (including tag) */
7075 #define RAW_DATA_MAX_SIZE (1024*3)
7076
7077 static ssize_t
7078 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7079                                         size_t cnt, loff_t *fpos)
7080 {
7081         struct trace_array *tr = filp->private_data;
7082         struct ring_buffer_event *event;
7083         struct trace_buffer *buffer;
7084         struct raw_data_entry *entry;
7085         ssize_t written;
7086         int size;
7087         int len;
7088
7089 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7090
7091         if (tracing_disabled)
7092                 return -EINVAL;
7093
7094         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7095                 return -EINVAL;
7096
7097         /* The marker must at least have a tag id */
7098         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7099                 return -EINVAL;
7100
7101         if (cnt > TRACE_BUF_SIZE)
7102                 cnt = TRACE_BUF_SIZE;
7103
7104         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7105
7106         size = sizeof(*entry) + cnt;
7107         if (cnt < FAULT_SIZE_ID)
7108                 size += FAULT_SIZE_ID - cnt;
7109
7110         buffer = tr->array_buffer.buffer;
7111         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7112                                             tracing_gen_ctx());
7113         if (!event)
7114                 /* Ring buffer disabled, return as if not open for write */
7115                 return -EBADF;
7116
7117         entry = ring_buffer_event_data(event);
7118
7119         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7120         if (len) {
7121                 entry->id = -1;
7122                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7123                 written = -EFAULT;
7124         } else
7125                 written = cnt;
7126
7127         __buffer_unlock_commit(buffer, event);
7128
7129         if (written > 0)
7130                 *fpos += written;
7131
7132         return written;
7133 }
7134
7135 static int tracing_clock_show(struct seq_file *m, void *v)
7136 {
7137         struct trace_array *tr = m->private;
7138         int i;
7139
7140         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7141                 seq_printf(m,
7142                         "%s%s%s%s", i ? " " : "",
7143                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7144                         i == tr->clock_id ? "]" : "");
7145         seq_putc(m, '\n');
7146
7147         return 0;
7148 }
7149
7150 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7151 {
7152         int i;
7153
7154         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7155                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7156                         break;
7157         }
7158         if (i == ARRAY_SIZE(trace_clocks))
7159                 return -EINVAL;
7160
7161         mutex_lock(&trace_types_lock);
7162
7163         tr->clock_id = i;
7164
7165         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7166
7167         /*
7168          * New clock may not be consistent with the previous clock.
7169          * Reset the buffer so that it doesn't have incomparable timestamps.
7170          */
7171         tracing_reset_online_cpus(&tr->array_buffer);
7172
7173 #ifdef CONFIG_TRACER_MAX_TRACE
7174         if (tr->max_buffer.buffer)
7175                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7176         tracing_reset_online_cpus(&tr->max_buffer);
7177 #endif
7178
7179         mutex_unlock(&trace_types_lock);
7180
7181         return 0;
7182 }
7183
7184 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7185                                    size_t cnt, loff_t *fpos)
7186 {
7187         struct seq_file *m = filp->private_data;
7188         struct trace_array *tr = m->private;
7189         char buf[64];
7190         const char *clockstr;
7191         int ret;
7192
7193         if (cnt >= sizeof(buf))
7194                 return -EINVAL;
7195
7196         if (copy_from_user(buf, ubuf, cnt))
7197                 return -EFAULT;
7198
7199         buf[cnt] = 0;
7200
7201         clockstr = strstrip(buf);
7202
7203         ret = tracing_set_clock(tr, clockstr);
7204         if (ret)
7205                 return ret;
7206
7207         *fpos += cnt;
7208
7209         return cnt;
7210 }
7211
7212 static int tracing_clock_open(struct inode *inode, struct file *file)
7213 {
7214         struct trace_array *tr = inode->i_private;
7215         int ret;
7216
7217         ret = tracing_check_open_get_tr(tr);
7218         if (ret)
7219                 return ret;
7220
7221         ret = single_open(file, tracing_clock_show, inode->i_private);
7222         if (ret < 0)
7223                 trace_array_put(tr);
7224
7225         return ret;
7226 }
7227
7228 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7229 {
7230         struct trace_array *tr = m->private;
7231
7232         mutex_lock(&trace_types_lock);
7233
7234         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7235                 seq_puts(m, "delta [absolute]\n");
7236         else
7237                 seq_puts(m, "[delta] absolute\n");
7238
7239         mutex_unlock(&trace_types_lock);
7240
7241         return 0;
7242 }
7243
7244 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7245 {
7246         struct trace_array *tr = inode->i_private;
7247         int ret;
7248
7249         ret = tracing_check_open_get_tr(tr);
7250         if (ret)
7251                 return ret;
7252
7253         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7254         if (ret < 0)
7255                 trace_array_put(tr);
7256
7257         return ret;
7258 }
7259
7260 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7261 {
7262         if (rbe == this_cpu_read(trace_buffered_event))
7263                 return ring_buffer_time_stamp(buffer);
7264
7265         return ring_buffer_event_time_stamp(buffer, rbe);
7266 }
7267
7268 /*
7269  * Set or disable using the per CPU trace_buffer_event when possible.
7270  */
7271 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7272 {
7273         int ret = 0;
7274
7275         mutex_lock(&trace_types_lock);
7276
7277         if (set && tr->no_filter_buffering_ref++)
7278                 goto out;
7279
7280         if (!set) {
7281                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7282                         ret = -EINVAL;
7283                         goto out;
7284                 }
7285
7286                 --tr->no_filter_buffering_ref;
7287         }
7288  out:
7289         mutex_unlock(&trace_types_lock);
7290
7291         return ret;
7292 }
7293
7294 struct ftrace_buffer_info {
7295         struct trace_iterator   iter;
7296         void                    *spare;
7297         unsigned int            spare_cpu;
7298         unsigned int            read;
7299 };
7300
7301 #ifdef CONFIG_TRACER_SNAPSHOT
7302 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7303 {
7304         struct trace_array *tr = inode->i_private;
7305         struct trace_iterator *iter;
7306         struct seq_file *m;
7307         int ret;
7308
7309         ret = tracing_check_open_get_tr(tr);
7310         if (ret)
7311                 return ret;
7312
7313         if (file->f_mode & FMODE_READ) {
7314                 iter = __tracing_open(inode, file, true);
7315                 if (IS_ERR(iter))
7316                         ret = PTR_ERR(iter);
7317         } else {
7318                 /* Writes still need the seq_file to hold the private data */
7319                 ret = -ENOMEM;
7320                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7321                 if (!m)
7322                         goto out;
7323                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7324                 if (!iter) {
7325                         kfree(m);
7326                         goto out;
7327                 }
7328                 ret = 0;
7329
7330                 iter->tr = tr;
7331                 iter->array_buffer = &tr->max_buffer;
7332                 iter->cpu_file = tracing_get_cpu(inode);
7333                 m->private = iter;
7334                 file->private_data = m;
7335         }
7336 out:
7337         if (ret < 0)
7338                 trace_array_put(tr);
7339
7340         return ret;
7341 }
7342
7343 static ssize_t
7344 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7345                        loff_t *ppos)
7346 {
7347         struct seq_file *m = filp->private_data;
7348         struct trace_iterator *iter = m->private;
7349         struct trace_array *tr = iter->tr;
7350         unsigned long val;
7351         int ret;
7352
7353         ret = tracing_update_buffers();
7354         if (ret < 0)
7355                 return ret;
7356
7357         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7358         if (ret)
7359                 return ret;
7360
7361         mutex_lock(&trace_types_lock);
7362
7363         if (tr->current_trace->use_max_tr) {
7364                 ret = -EBUSY;
7365                 goto out;
7366         }
7367
7368         arch_spin_lock(&tr->max_lock);
7369         if (tr->cond_snapshot)
7370                 ret = -EBUSY;
7371         arch_spin_unlock(&tr->max_lock);
7372         if (ret)
7373                 goto out;
7374
7375         switch (val) {
7376         case 0:
7377                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7378                         ret = -EINVAL;
7379                         break;
7380                 }
7381                 if (tr->allocated_snapshot)
7382                         free_snapshot(tr);
7383                 break;
7384         case 1:
7385 /* Only allow per-cpu swap if the ring buffer supports it */
7386 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7387                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7388                         ret = -EINVAL;
7389                         break;
7390                 }
7391 #endif
7392                 if (tr->allocated_snapshot)
7393                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7394                                         &tr->array_buffer, iter->cpu_file);
7395                 else
7396                         ret = tracing_alloc_snapshot_instance(tr);
7397                 if (ret < 0)
7398                         break;
7399                 local_irq_disable();
7400                 /* Now, we're going to swap */
7401                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7402                         update_max_tr(tr, current, smp_processor_id(), NULL);
7403                 else
7404                         update_max_tr_single(tr, current, iter->cpu_file);
7405                 local_irq_enable();
7406                 break;
7407         default:
7408                 if (tr->allocated_snapshot) {
7409                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7410                                 tracing_reset_online_cpus(&tr->max_buffer);
7411                         else
7412                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7413                 }
7414                 break;
7415         }
7416
7417         if (ret >= 0) {
7418                 *ppos += cnt;
7419                 ret = cnt;
7420         }
7421 out:
7422         mutex_unlock(&trace_types_lock);
7423         return ret;
7424 }
7425
7426 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7427 {
7428         struct seq_file *m = file->private_data;
7429         int ret;
7430
7431         ret = tracing_release(inode, file);
7432
7433         if (file->f_mode & FMODE_READ)
7434                 return ret;
7435
7436         /* If write only, the seq_file is just a stub */
7437         if (m)
7438                 kfree(m->private);
7439         kfree(m);
7440
7441         return 0;
7442 }
7443
7444 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7445 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7446                                     size_t count, loff_t *ppos);
7447 static int tracing_buffers_release(struct inode *inode, struct file *file);
7448 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7449                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7450
7451 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7452 {
7453         struct ftrace_buffer_info *info;
7454         int ret;
7455
7456         /* The following checks for tracefs lockdown */
7457         ret = tracing_buffers_open(inode, filp);
7458         if (ret < 0)
7459                 return ret;
7460
7461         info = filp->private_data;
7462
7463         if (info->iter.trace->use_max_tr) {
7464                 tracing_buffers_release(inode, filp);
7465                 return -EBUSY;
7466         }
7467
7468         info->iter.snapshot = true;
7469         info->iter.array_buffer = &info->iter.tr->max_buffer;
7470
7471         return ret;
7472 }
7473
7474 #endif /* CONFIG_TRACER_SNAPSHOT */
7475
7476
7477 static const struct file_operations tracing_thresh_fops = {
7478         .open           = tracing_open_generic,
7479         .read           = tracing_thresh_read,
7480         .write          = tracing_thresh_write,
7481         .llseek         = generic_file_llseek,
7482 };
7483
7484 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7485 static const struct file_operations tracing_max_lat_fops = {
7486         .open           = tracing_open_generic,
7487         .read           = tracing_max_lat_read,
7488         .write          = tracing_max_lat_write,
7489         .llseek         = generic_file_llseek,
7490 };
7491 #endif
7492
7493 static const struct file_operations set_tracer_fops = {
7494         .open           = tracing_open_generic,
7495         .read           = tracing_set_trace_read,
7496         .write          = tracing_set_trace_write,
7497         .llseek         = generic_file_llseek,
7498 };
7499
7500 static const struct file_operations tracing_pipe_fops = {
7501         .open           = tracing_open_pipe,
7502         .poll           = tracing_poll_pipe,
7503         .read           = tracing_read_pipe,
7504         .splice_read    = tracing_splice_read_pipe,
7505         .release        = tracing_release_pipe,
7506         .llseek         = no_llseek,
7507 };
7508
7509 static const struct file_operations tracing_entries_fops = {
7510         .open           = tracing_open_generic_tr,
7511         .read           = tracing_entries_read,
7512         .write          = tracing_entries_write,
7513         .llseek         = generic_file_llseek,
7514         .release        = tracing_release_generic_tr,
7515 };
7516
7517 static const struct file_operations tracing_total_entries_fops = {
7518         .open           = tracing_open_generic_tr,
7519         .read           = tracing_total_entries_read,
7520         .llseek         = generic_file_llseek,
7521         .release        = tracing_release_generic_tr,
7522 };
7523
7524 static const struct file_operations tracing_free_buffer_fops = {
7525         .open           = tracing_open_generic_tr,
7526         .write          = tracing_free_buffer_write,
7527         .release        = tracing_free_buffer_release,
7528 };
7529
7530 static const struct file_operations tracing_mark_fops = {
7531         .open           = tracing_open_generic_tr,
7532         .write          = tracing_mark_write,
7533         .llseek         = generic_file_llseek,
7534         .release        = tracing_release_generic_tr,
7535 };
7536
7537 static const struct file_operations tracing_mark_raw_fops = {
7538         .open           = tracing_open_generic_tr,
7539         .write          = tracing_mark_raw_write,
7540         .llseek         = generic_file_llseek,
7541         .release        = tracing_release_generic_tr,
7542 };
7543
7544 static const struct file_operations trace_clock_fops = {
7545         .open           = tracing_clock_open,
7546         .read           = seq_read,
7547         .llseek         = seq_lseek,
7548         .release        = tracing_single_release_tr,
7549         .write          = tracing_clock_write,
7550 };
7551
7552 static const struct file_operations trace_time_stamp_mode_fops = {
7553         .open           = tracing_time_stamp_mode_open,
7554         .read           = seq_read,
7555         .llseek         = seq_lseek,
7556         .release        = tracing_single_release_tr,
7557 };
7558
7559 #ifdef CONFIG_TRACER_SNAPSHOT
7560 static const struct file_operations snapshot_fops = {
7561         .open           = tracing_snapshot_open,
7562         .read           = seq_read,
7563         .write          = tracing_snapshot_write,
7564         .llseek         = tracing_lseek,
7565         .release        = tracing_snapshot_release,
7566 };
7567
7568 static const struct file_operations snapshot_raw_fops = {
7569         .open           = snapshot_raw_open,
7570         .read           = tracing_buffers_read,
7571         .release        = tracing_buffers_release,
7572         .splice_read    = tracing_buffers_splice_read,
7573         .llseek         = no_llseek,
7574 };
7575
7576 #endif /* CONFIG_TRACER_SNAPSHOT */
7577
7578 #define TRACING_LOG_ERRS_MAX    8
7579 #define TRACING_LOG_LOC_MAX     128
7580
7581 #define CMD_PREFIX "  Command: "
7582
7583 struct err_info {
7584         const char      **errs; /* ptr to loc-specific array of err strings */
7585         u8              type;   /* index into errs -> specific err string */
7586         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7587         u64             ts;
7588 };
7589
7590 struct tracing_log_err {
7591         struct list_head        list;
7592         struct err_info         info;
7593         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7594         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7595 };
7596
7597 static DEFINE_MUTEX(tracing_err_log_lock);
7598
7599 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7600 {
7601         struct tracing_log_err *err;
7602
7603         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7604                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7605                 if (!err)
7606                         err = ERR_PTR(-ENOMEM);
7607                 tr->n_err_log_entries++;
7608
7609                 return err;
7610         }
7611
7612         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7613         list_del(&err->list);
7614
7615         return err;
7616 }
7617
7618 /**
7619  * err_pos - find the position of a string within a command for error careting
7620  * @cmd: The tracing command that caused the error
7621  * @str: The string to position the caret at within @cmd
7622  *
7623  * Finds the position of the first occurrence of @str within @cmd.  The
7624  * return value can be passed to tracing_log_err() for caret placement
7625  * within @cmd.
7626  *
7627  * Returns the index within @cmd of the first occurrence of @str or 0
7628  * if @str was not found.
7629  */
7630 unsigned int err_pos(char *cmd, const char *str)
7631 {
7632         char *found;
7633
7634         if (WARN_ON(!strlen(cmd)))
7635                 return 0;
7636
7637         found = strstr(cmd, str);
7638         if (found)
7639                 return found - cmd;
7640
7641         return 0;
7642 }
7643
7644 /**
7645  * tracing_log_err - write an error to the tracing error log
7646  * @tr: The associated trace array for the error (NULL for top level array)
7647  * @loc: A string describing where the error occurred
7648  * @cmd: The tracing command that caused the error
7649  * @errs: The array of loc-specific static error strings
7650  * @type: The index into errs[], which produces the specific static err string
7651  * @pos: The position the caret should be placed in the cmd
7652  *
7653  * Writes an error into tracing/error_log of the form:
7654  *
7655  * <loc>: error: <text>
7656  *   Command: <cmd>
7657  *              ^
7658  *
7659  * tracing/error_log is a small log file containing the last
7660  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7661  * unless there has been a tracing error, and the error log can be
7662  * cleared and have its memory freed by writing the empty string in
7663  * truncation mode to it i.e. echo > tracing/error_log.
7664  *
7665  * NOTE: the @errs array along with the @type param are used to
7666  * produce a static error string - this string is not copied and saved
7667  * when the error is logged - only a pointer to it is saved.  See
7668  * existing callers for examples of how static strings are typically
7669  * defined for use with tracing_log_err().
7670  */
7671 void tracing_log_err(struct trace_array *tr,
7672                      const char *loc, const char *cmd,
7673                      const char **errs, u8 type, u8 pos)
7674 {
7675         struct tracing_log_err *err;
7676
7677         if (!tr)
7678                 tr = &global_trace;
7679
7680         mutex_lock(&tracing_err_log_lock);
7681         err = get_tracing_log_err(tr);
7682         if (PTR_ERR(err) == -ENOMEM) {
7683                 mutex_unlock(&tracing_err_log_lock);
7684                 return;
7685         }
7686
7687         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7688         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7689
7690         err->info.errs = errs;
7691         err->info.type = type;
7692         err->info.pos = pos;
7693         err->info.ts = local_clock();
7694
7695         list_add_tail(&err->list, &tr->err_log);
7696         mutex_unlock(&tracing_err_log_lock);
7697 }
7698
7699 static void clear_tracing_err_log(struct trace_array *tr)
7700 {
7701         struct tracing_log_err *err, *next;
7702
7703         mutex_lock(&tracing_err_log_lock);
7704         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7705                 list_del(&err->list);
7706                 kfree(err);
7707         }
7708
7709         tr->n_err_log_entries = 0;
7710         mutex_unlock(&tracing_err_log_lock);
7711 }
7712
7713 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7714 {
7715         struct trace_array *tr = m->private;
7716
7717         mutex_lock(&tracing_err_log_lock);
7718
7719         return seq_list_start(&tr->err_log, *pos);
7720 }
7721
7722 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7723 {
7724         struct trace_array *tr = m->private;
7725
7726         return seq_list_next(v, &tr->err_log, pos);
7727 }
7728
7729 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7730 {
7731         mutex_unlock(&tracing_err_log_lock);
7732 }
7733
7734 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7735 {
7736         u8 i;
7737
7738         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7739                 seq_putc(m, ' ');
7740         for (i = 0; i < pos; i++)
7741                 seq_putc(m, ' ');
7742         seq_puts(m, "^\n");
7743 }
7744
7745 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7746 {
7747         struct tracing_log_err *err = v;
7748
7749         if (err) {
7750                 const char *err_text = err->info.errs[err->info.type];
7751                 u64 sec = err->info.ts;
7752                 u32 nsec;
7753
7754                 nsec = do_div(sec, NSEC_PER_SEC);
7755                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7756                            err->loc, err_text);
7757                 seq_printf(m, "%s", err->cmd);
7758                 tracing_err_log_show_pos(m, err->info.pos);
7759         }
7760
7761         return 0;
7762 }
7763
7764 static const struct seq_operations tracing_err_log_seq_ops = {
7765         .start  = tracing_err_log_seq_start,
7766         .next   = tracing_err_log_seq_next,
7767         .stop   = tracing_err_log_seq_stop,
7768         .show   = tracing_err_log_seq_show
7769 };
7770
7771 static int tracing_err_log_open(struct inode *inode, struct file *file)
7772 {
7773         struct trace_array *tr = inode->i_private;
7774         int ret = 0;
7775
7776         ret = tracing_check_open_get_tr(tr);
7777         if (ret)
7778                 return ret;
7779
7780         /* If this file was opened for write, then erase contents */
7781         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7782                 clear_tracing_err_log(tr);
7783
7784         if (file->f_mode & FMODE_READ) {
7785                 ret = seq_open(file, &tracing_err_log_seq_ops);
7786                 if (!ret) {
7787                         struct seq_file *m = file->private_data;
7788                         m->private = tr;
7789                 } else {
7790                         trace_array_put(tr);
7791                 }
7792         }
7793         return ret;
7794 }
7795
7796 static ssize_t tracing_err_log_write(struct file *file,
7797                                      const char __user *buffer,
7798                                      size_t count, loff_t *ppos)
7799 {
7800         return count;
7801 }
7802
7803 static int tracing_err_log_release(struct inode *inode, struct file *file)
7804 {
7805         struct trace_array *tr = inode->i_private;
7806
7807         trace_array_put(tr);
7808
7809         if (file->f_mode & FMODE_READ)
7810                 seq_release(inode, file);
7811
7812         return 0;
7813 }
7814
7815 static const struct file_operations tracing_err_log_fops = {
7816         .open           = tracing_err_log_open,
7817         .write          = tracing_err_log_write,
7818         .read           = seq_read,
7819         .llseek         = seq_lseek,
7820         .release        = tracing_err_log_release,
7821 };
7822
7823 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7824 {
7825         struct trace_array *tr = inode->i_private;
7826         struct ftrace_buffer_info *info;
7827         int ret;
7828
7829         ret = tracing_check_open_get_tr(tr);
7830         if (ret)
7831                 return ret;
7832
7833         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7834         if (!info) {
7835                 trace_array_put(tr);
7836                 return -ENOMEM;
7837         }
7838
7839         mutex_lock(&trace_types_lock);
7840
7841         info->iter.tr           = tr;
7842         info->iter.cpu_file     = tracing_get_cpu(inode);
7843         info->iter.trace        = tr->current_trace;
7844         info->iter.array_buffer = &tr->array_buffer;
7845         info->spare             = NULL;
7846         /* Force reading ring buffer for first read */
7847         info->read              = (unsigned int)-1;
7848
7849         filp->private_data = info;
7850
7851         tr->trace_ref++;
7852
7853         mutex_unlock(&trace_types_lock);
7854
7855         ret = nonseekable_open(inode, filp);
7856         if (ret < 0)
7857                 trace_array_put(tr);
7858
7859         return ret;
7860 }
7861
7862 static __poll_t
7863 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7864 {
7865         struct ftrace_buffer_info *info = filp->private_data;
7866         struct trace_iterator *iter = &info->iter;
7867
7868         return trace_poll(iter, filp, poll_table);
7869 }
7870
7871 static ssize_t
7872 tracing_buffers_read(struct file *filp, char __user *ubuf,
7873                      size_t count, loff_t *ppos)
7874 {
7875         struct ftrace_buffer_info *info = filp->private_data;
7876         struct trace_iterator *iter = &info->iter;
7877         ssize_t ret = 0;
7878         ssize_t size;
7879
7880         if (!count)
7881                 return 0;
7882
7883 #ifdef CONFIG_TRACER_MAX_TRACE
7884         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7885                 return -EBUSY;
7886 #endif
7887
7888         if (!info->spare) {
7889                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7890                                                           iter->cpu_file);
7891                 if (IS_ERR(info->spare)) {
7892                         ret = PTR_ERR(info->spare);
7893                         info->spare = NULL;
7894                 } else {
7895                         info->spare_cpu = iter->cpu_file;
7896                 }
7897         }
7898         if (!info->spare)
7899                 return ret;
7900
7901         /* Do we have previous read data to read? */
7902         if (info->read < PAGE_SIZE)
7903                 goto read;
7904
7905  again:
7906         trace_access_lock(iter->cpu_file);
7907         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7908                                     &info->spare,
7909                                     count,
7910                                     iter->cpu_file, 0);
7911         trace_access_unlock(iter->cpu_file);
7912
7913         if (ret < 0) {
7914                 if (trace_empty(iter)) {
7915                         if ((filp->f_flags & O_NONBLOCK))
7916                                 return -EAGAIN;
7917
7918                         ret = wait_on_pipe(iter, 0);
7919                         if (ret)
7920                                 return ret;
7921
7922                         goto again;
7923                 }
7924                 return 0;
7925         }
7926
7927         info->read = 0;
7928  read:
7929         size = PAGE_SIZE - info->read;
7930         if (size > count)
7931                 size = count;
7932
7933         ret = copy_to_user(ubuf, info->spare + info->read, size);
7934         if (ret == size)
7935                 return -EFAULT;
7936
7937         size -= ret;
7938
7939         *ppos += size;
7940         info->read += size;
7941
7942         return size;
7943 }
7944
7945 static int tracing_buffers_release(struct inode *inode, struct file *file)
7946 {
7947         struct ftrace_buffer_info *info = file->private_data;
7948         struct trace_iterator *iter = &info->iter;
7949
7950         mutex_lock(&trace_types_lock);
7951
7952         iter->tr->trace_ref--;
7953
7954         __trace_array_put(iter->tr);
7955
7956         if (info->spare)
7957                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7958                                            info->spare_cpu, info->spare);
7959         kvfree(info);
7960
7961         mutex_unlock(&trace_types_lock);
7962
7963         return 0;
7964 }
7965
7966 struct buffer_ref {
7967         struct trace_buffer     *buffer;
7968         void                    *page;
7969         int                     cpu;
7970         refcount_t              refcount;
7971 };
7972
7973 static void buffer_ref_release(struct buffer_ref *ref)
7974 {
7975         if (!refcount_dec_and_test(&ref->refcount))
7976                 return;
7977         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7978         kfree(ref);
7979 }
7980
7981 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7982                                     struct pipe_buffer *buf)
7983 {
7984         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7985
7986         buffer_ref_release(ref);
7987         buf->private = 0;
7988 }
7989
7990 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7991                                 struct pipe_buffer *buf)
7992 {
7993         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7994
7995         if (refcount_read(&ref->refcount) > INT_MAX/2)
7996                 return false;
7997
7998         refcount_inc(&ref->refcount);
7999         return true;
8000 }
8001
8002 /* Pipe buffer operations for a buffer. */
8003 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8004         .release                = buffer_pipe_buf_release,
8005         .get                    = buffer_pipe_buf_get,
8006 };
8007
8008 /*
8009  * Callback from splice_to_pipe(), if we need to release some pages
8010  * at the end of the spd in case we error'ed out in filling the pipe.
8011  */
8012 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8013 {
8014         struct buffer_ref *ref =
8015                 (struct buffer_ref *)spd->partial[i].private;
8016
8017         buffer_ref_release(ref);
8018         spd->partial[i].private = 0;
8019 }
8020
8021 static ssize_t
8022 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8023                             struct pipe_inode_info *pipe, size_t len,
8024                             unsigned int flags)
8025 {
8026         struct ftrace_buffer_info *info = file->private_data;
8027         struct trace_iterator *iter = &info->iter;
8028         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8029         struct page *pages_def[PIPE_DEF_BUFFERS];
8030         struct splice_pipe_desc spd = {
8031                 .pages          = pages_def,
8032                 .partial        = partial_def,
8033                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8034                 .ops            = &buffer_pipe_buf_ops,
8035                 .spd_release    = buffer_spd_release,
8036         };
8037         struct buffer_ref *ref;
8038         int entries, i;
8039         ssize_t ret = 0;
8040
8041 #ifdef CONFIG_TRACER_MAX_TRACE
8042         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8043                 return -EBUSY;
8044 #endif
8045
8046         if (*ppos & (PAGE_SIZE - 1))
8047                 return -EINVAL;
8048
8049         if (len & (PAGE_SIZE - 1)) {
8050                 if (len < PAGE_SIZE)
8051                         return -EINVAL;
8052                 len &= PAGE_MASK;
8053         }
8054
8055         if (splice_grow_spd(pipe, &spd))
8056                 return -ENOMEM;
8057
8058  again:
8059         trace_access_lock(iter->cpu_file);
8060         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8061
8062         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8063                 struct page *page;
8064                 int r;
8065
8066                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8067                 if (!ref) {
8068                         ret = -ENOMEM;
8069                         break;
8070                 }
8071
8072                 refcount_set(&ref->refcount, 1);
8073                 ref->buffer = iter->array_buffer->buffer;
8074                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8075                 if (IS_ERR(ref->page)) {
8076                         ret = PTR_ERR(ref->page);
8077                         ref->page = NULL;
8078                         kfree(ref);
8079                         break;
8080                 }
8081                 ref->cpu = iter->cpu_file;
8082
8083                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8084                                           len, iter->cpu_file, 1);
8085                 if (r < 0) {
8086                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8087                                                    ref->page);
8088                         kfree(ref);
8089                         break;
8090                 }
8091
8092                 page = virt_to_page(ref->page);
8093
8094                 spd.pages[i] = page;
8095                 spd.partial[i].len = PAGE_SIZE;
8096                 spd.partial[i].offset = 0;
8097                 spd.partial[i].private = (unsigned long)ref;
8098                 spd.nr_pages++;
8099                 *ppos += PAGE_SIZE;
8100
8101                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8102         }
8103
8104         trace_access_unlock(iter->cpu_file);
8105         spd.nr_pages = i;
8106
8107         /* did we read anything? */
8108         if (!spd.nr_pages) {
8109                 if (ret)
8110                         goto out;
8111
8112                 ret = -EAGAIN;
8113                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8114                         goto out;
8115
8116                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8117                 if (ret)
8118                         goto out;
8119
8120                 goto again;
8121         }
8122
8123         ret = splice_to_pipe(pipe, &spd);
8124 out:
8125         splice_shrink_spd(&spd);
8126
8127         return ret;
8128 }
8129
8130 static const struct file_operations tracing_buffers_fops = {
8131         .open           = tracing_buffers_open,
8132         .read           = tracing_buffers_read,
8133         .poll           = tracing_buffers_poll,
8134         .release        = tracing_buffers_release,
8135         .splice_read    = tracing_buffers_splice_read,
8136         .llseek         = no_llseek,
8137 };
8138
8139 static ssize_t
8140 tracing_stats_read(struct file *filp, char __user *ubuf,
8141                    size_t count, loff_t *ppos)
8142 {
8143         struct inode *inode = file_inode(filp);
8144         struct trace_array *tr = inode->i_private;
8145         struct array_buffer *trace_buf = &tr->array_buffer;
8146         int cpu = tracing_get_cpu(inode);
8147         struct trace_seq *s;
8148         unsigned long cnt;
8149         unsigned long long t;
8150         unsigned long usec_rem;
8151
8152         s = kmalloc(sizeof(*s), GFP_KERNEL);
8153         if (!s)
8154                 return -ENOMEM;
8155
8156         trace_seq_init(s);
8157
8158         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8159         trace_seq_printf(s, "entries: %ld\n", cnt);
8160
8161         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8162         trace_seq_printf(s, "overrun: %ld\n", cnt);
8163
8164         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8165         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8166
8167         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8168         trace_seq_printf(s, "bytes: %ld\n", cnt);
8169
8170         if (trace_clocks[tr->clock_id].in_ns) {
8171                 /* local or global for trace_clock */
8172                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8173                 usec_rem = do_div(t, USEC_PER_SEC);
8174                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8175                                                                 t, usec_rem);
8176
8177                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8178                 usec_rem = do_div(t, USEC_PER_SEC);
8179                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8180         } else {
8181                 /* counter or tsc mode for trace_clock */
8182                 trace_seq_printf(s, "oldest event ts: %llu\n",
8183                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8184
8185                 trace_seq_printf(s, "now ts: %llu\n",
8186                                 ring_buffer_time_stamp(trace_buf->buffer));
8187         }
8188
8189         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8190         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8191
8192         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8193         trace_seq_printf(s, "read events: %ld\n", cnt);
8194
8195         count = simple_read_from_buffer(ubuf, count, ppos,
8196                                         s->buffer, trace_seq_used(s));
8197
8198         kfree(s);
8199
8200         return count;
8201 }
8202
8203 static const struct file_operations tracing_stats_fops = {
8204         .open           = tracing_open_generic_tr,
8205         .read           = tracing_stats_read,
8206         .llseek         = generic_file_llseek,
8207         .release        = tracing_release_generic_tr,
8208 };
8209
8210 #ifdef CONFIG_DYNAMIC_FTRACE
8211
8212 static ssize_t
8213 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8214                   size_t cnt, loff_t *ppos)
8215 {
8216         ssize_t ret;
8217         char *buf;
8218         int r;
8219
8220         /* 256 should be plenty to hold the amount needed */
8221         buf = kmalloc(256, GFP_KERNEL);
8222         if (!buf)
8223                 return -ENOMEM;
8224
8225         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8226                       ftrace_update_tot_cnt,
8227                       ftrace_number_of_pages,
8228                       ftrace_number_of_groups);
8229
8230         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8231         kfree(buf);
8232         return ret;
8233 }
8234
8235 static const struct file_operations tracing_dyn_info_fops = {
8236         .open           = tracing_open_generic,
8237         .read           = tracing_read_dyn_info,
8238         .llseek         = generic_file_llseek,
8239 };
8240 #endif /* CONFIG_DYNAMIC_FTRACE */
8241
8242 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8243 static void
8244 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8245                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8246                 void *data)
8247 {
8248         tracing_snapshot_instance(tr);
8249 }
8250
8251 static void
8252 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8253                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8254                       void *data)
8255 {
8256         struct ftrace_func_mapper *mapper = data;
8257         long *count = NULL;
8258
8259         if (mapper)
8260                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8261
8262         if (count) {
8263
8264                 if (*count <= 0)
8265                         return;
8266
8267                 (*count)--;
8268         }
8269
8270         tracing_snapshot_instance(tr);
8271 }
8272
8273 static int
8274 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8275                       struct ftrace_probe_ops *ops, void *data)
8276 {
8277         struct ftrace_func_mapper *mapper = data;
8278         long *count = NULL;
8279
8280         seq_printf(m, "%ps:", (void *)ip);
8281
8282         seq_puts(m, "snapshot");
8283
8284         if (mapper)
8285                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8286
8287         if (count)
8288                 seq_printf(m, ":count=%ld\n", *count);
8289         else
8290                 seq_puts(m, ":unlimited\n");
8291
8292         return 0;
8293 }
8294
8295 static int
8296 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8297                      unsigned long ip, void *init_data, void **data)
8298 {
8299         struct ftrace_func_mapper *mapper = *data;
8300
8301         if (!mapper) {
8302                 mapper = allocate_ftrace_func_mapper();
8303                 if (!mapper)
8304                         return -ENOMEM;
8305                 *data = mapper;
8306         }
8307
8308         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8309 }
8310
8311 static void
8312 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8313                      unsigned long ip, void *data)
8314 {
8315         struct ftrace_func_mapper *mapper = data;
8316
8317         if (!ip) {
8318                 if (!mapper)
8319                         return;
8320                 free_ftrace_func_mapper(mapper, NULL);
8321                 return;
8322         }
8323
8324         ftrace_func_mapper_remove_ip(mapper, ip);
8325 }
8326
8327 static struct ftrace_probe_ops snapshot_probe_ops = {
8328         .func                   = ftrace_snapshot,
8329         .print                  = ftrace_snapshot_print,
8330 };
8331
8332 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8333         .func                   = ftrace_count_snapshot,
8334         .print                  = ftrace_snapshot_print,
8335         .init                   = ftrace_snapshot_init,
8336         .free                   = ftrace_snapshot_free,
8337 };
8338
8339 static int
8340 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8341                                char *glob, char *cmd, char *param, int enable)
8342 {
8343         struct ftrace_probe_ops *ops;
8344         void *count = (void *)-1;
8345         char *number;
8346         int ret;
8347
8348         if (!tr)
8349                 return -ENODEV;
8350
8351         /* hash funcs only work with set_ftrace_filter */
8352         if (!enable)
8353                 return -EINVAL;
8354
8355         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8356
8357         if (glob[0] == '!')
8358                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8359
8360         if (!param)
8361                 goto out_reg;
8362
8363         number = strsep(&param, ":");
8364
8365         if (!strlen(number))
8366                 goto out_reg;
8367
8368         /*
8369          * We use the callback data field (which is a pointer)
8370          * as our counter.
8371          */
8372         ret = kstrtoul(number, 0, (unsigned long *)&count);
8373         if (ret)
8374                 return ret;
8375
8376  out_reg:
8377         ret = tracing_alloc_snapshot_instance(tr);
8378         if (ret < 0)
8379                 goto out;
8380
8381         ret = register_ftrace_function_probe(glob, tr, ops, count);
8382
8383  out:
8384         return ret < 0 ? ret : 0;
8385 }
8386
8387 static struct ftrace_func_command ftrace_snapshot_cmd = {
8388         .name                   = "snapshot",
8389         .func                   = ftrace_trace_snapshot_callback,
8390 };
8391
8392 static __init int register_snapshot_cmd(void)
8393 {
8394         return register_ftrace_command(&ftrace_snapshot_cmd);
8395 }
8396 #else
8397 static inline __init int register_snapshot_cmd(void) { return 0; }
8398 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8399
8400 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8401 {
8402         if (WARN_ON(!tr->dir))
8403                 return ERR_PTR(-ENODEV);
8404
8405         /* Top directory uses NULL as the parent */
8406         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8407                 return NULL;
8408
8409         /* All sub buffers have a descriptor */
8410         return tr->dir;
8411 }
8412
8413 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8414 {
8415         struct dentry *d_tracer;
8416
8417         if (tr->percpu_dir)
8418                 return tr->percpu_dir;
8419
8420         d_tracer = tracing_get_dentry(tr);
8421         if (IS_ERR(d_tracer))
8422                 return NULL;
8423
8424         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8425
8426         MEM_FAIL(!tr->percpu_dir,
8427                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8428
8429         return tr->percpu_dir;
8430 }
8431
8432 static struct dentry *
8433 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8434                       void *data, long cpu, const struct file_operations *fops)
8435 {
8436         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8437
8438         if (ret) /* See tracing_get_cpu() */
8439                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8440         return ret;
8441 }
8442
8443 static void
8444 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8445 {
8446         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8447         struct dentry *d_cpu;
8448         char cpu_dir[30]; /* 30 characters should be more than enough */
8449
8450         if (!d_percpu)
8451                 return;
8452
8453         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8454         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8455         if (!d_cpu) {
8456                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8457                 return;
8458         }
8459
8460         /* per cpu trace_pipe */
8461         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8462                                 tr, cpu, &tracing_pipe_fops);
8463
8464         /* per cpu trace */
8465         trace_create_cpu_file("trace", 0644, d_cpu,
8466                                 tr, cpu, &tracing_fops);
8467
8468         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8469                                 tr, cpu, &tracing_buffers_fops);
8470
8471         trace_create_cpu_file("stats", 0444, d_cpu,
8472                                 tr, cpu, &tracing_stats_fops);
8473
8474         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8475                                 tr, cpu, &tracing_entries_fops);
8476
8477 #ifdef CONFIG_TRACER_SNAPSHOT
8478         trace_create_cpu_file("snapshot", 0644, d_cpu,
8479                                 tr, cpu, &snapshot_fops);
8480
8481         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8482                                 tr, cpu, &snapshot_raw_fops);
8483 #endif
8484 }
8485
8486 #ifdef CONFIG_FTRACE_SELFTEST
8487 /* Let selftest have access to static functions in this file */
8488 #include "trace_selftest.c"
8489 #endif
8490
8491 static ssize_t
8492 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8493                         loff_t *ppos)
8494 {
8495         struct trace_option_dentry *topt = filp->private_data;
8496         char *buf;
8497
8498         if (topt->flags->val & topt->opt->bit)
8499                 buf = "1\n";
8500         else
8501                 buf = "0\n";
8502
8503         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8504 }
8505
8506 static ssize_t
8507 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8508                          loff_t *ppos)
8509 {
8510         struct trace_option_dentry *topt = filp->private_data;
8511         unsigned long val;
8512         int ret;
8513
8514         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8515         if (ret)
8516                 return ret;
8517
8518         if (val != 0 && val != 1)
8519                 return -EINVAL;
8520
8521         if (!!(topt->flags->val & topt->opt->bit) != val) {
8522                 mutex_lock(&trace_types_lock);
8523                 ret = __set_tracer_option(topt->tr, topt->flags,
8524                                           topt->opt, !val);
8525                 mutex_unlock(&trace_types_lock);
8526                 if (ret)
8527                         return ret;
8528         }
8529
8530         *ppos += cnt;
8531
8532         return cnt;
8533 }
8534
8535
8536 static const struct file_operations trace_options_fops = {
8537         .open = tracing_open_generic,
8538         .read = trace_options_read,
8539         .write = trace_options_write,
8540         .llseek = generic_file_llseek,
8541 };
8542
8543 /*
8544  * In order to pass in both the trace_array descriptor as well as the index
8545  * to the flag that the trace option file represents, the trace_array
8546  * has a character array of trace_flags_index[], which holds the index
8547  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8548  * The address of this character array is passed to the flag option file
8549  * read/write callbacks.
8550  *
8551  * In order to extract both the index and the trace_array descriptor,
8552  * get_tr_index() uses the following algorithm.
8553  *
8554  *   idx = *ptr;
8555  *
8556  * As the pointer itself contains the address of the index (remember
8557  * index[1] == 1).
8558  *
8559  * Then to get the trace_array descriptor, by subtracting that index
8560  * from the ptr, we get to the start of the index itself.
8561  *
8562  *   ptr - idx == &index[0]
8563  *
8564  * Then a simple container_of() from that pointer gets us to the
8565  * trace_array descriptor.
8566  */
8567 static void get_tr_index(void *data, struct trace_array **ptr,
8568                          unsigned int *pindex)
8569 {
8570         *pindex = *(unsigned char *)data;
8571
8572         *ptr = container_of(data - *pindex, struct trace_array,
8573                             trace_flags_index);
8574 }
8575
8576 static ssize_t
8577 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8578                         loff_t *ppos)
8579 {
8580         void *tr_index = filp->private_data;
8581         struct trace_array *tr;
8582         unsigned int index;
8583         char *buf;
8584
8585         get_tr_index(tr_index, &tr, &index);
8586
8587         if (tr->trace_flags & (1 << index))
8588                 buf = "1\n";
8589         else
8590                 buf = "0\n";
8591
8592         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8593 }
8594
8595 static ssize_t
8596 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8597                          loff_t *ppos)
8598 {
8599         void *tr_index = filp->private_data;
8600         struct trace_array *tr;
8601         unsigned int index;
8602         unsigned long val;
8603         int ret;
8604
8605         get_tr_index(tr_index, &tr, &index);
8606
8607         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8608         if (ret)
8609                 return ret;
8610
8611         if (val != 0 && val != 1)
8612                 return -EINVAL;
8613
8614         mutex_lock(&event_mutex);
8615         mutex_lock(&trace_types_lock);
8616         ret = set_tracer_flag(tr, 1 << index, val);
8617         mutex_unlock(&trace_types_lock);
8618         mutex_unlock(&event_mutex);
8619
8620         if (ret < 0)
8621                 return ret;
8622
8623         *ppos += cnt;
8624
8625         return cnt;
8626 }
8627
8628 static const struct file_operations trace_options_core_fops = {
8629         .open = tracing_open_generic,
8630         .read = trace_options_core_read,
8631         .write = trace_options_core_write,
8632         .llseek = generic_file_llseek,
8633 };
8634
8635 struct dentry *trace_create_file(const char *name,
8636                                  umode_t mode,
8637                                  struct dentry *parent,
8638                                  void *data,
8639                                  const struct file_operations *fops)
8640 {
8641         struct dentry *ret;
8642
8643         ret = tracefs_create_file(name, mode, parent, data, fops);
8644         if (!ret)
8645                 pr_warn("Could not create tracefs '%s' entry\n", name);
8646
8647         return ret;
8648 }
8649
8650
8651 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8652 {
8653         struct dentry *d_tracer;
8654
8655         if (tr->options)
8656                 return tr->options;
8657
8658         d_tracer = tracing_get_dentry(tr);
8659         if (IS_ERR(d_tracer))
8660                 return NULL;
8661
8662         tr->options = tracefs_create_dir("options", d_tracer);
8663         if (!tr->options) {
8664                 pr_warn("Could not create tracefs directory 'options'\n");
8665                 return NULL;
8666         }
8667
8668         return tr->options;
8669 }
8670
8671 static void
8672 create_trace_option_file(struct trace_array *tr,
8673                          struct trace_option_dentry *topt,
8674                          struct tracer_flags *flags,
8675                          struct tracer_opt *opt)
8676 {
8677         struct dentry *t_options;
8678
8679         t_options = trace_options_init_dentry(tr);
8680         if (!t_options)
8681                 return;
8682
8683         topt->flags = flags;
8684         topt->opt = opt;
8685         topt->tr = tr;
8686
8687         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8688                                     &trace_options_fops);
8689
8690 }
8691
8692 static void
8693 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8694 {
8695         struct trace_option_dentry *topts;
8696         struct trace_options *tr_topts;
8697         struct tracer_flags *flags;
8698         struct tracer_opt *opts;
8699         int cnt;
8700         int i;
8701
8702         if (!tracer)
8703                 return;
8704
8705         flags = tracer->flags;
8706
8707         if (!flags || !flags->opts)
8708                 return;
8709
8710         /*
8711          * If this is an instance, only create flags for tracers
8712          * the instance may have.
8713          */
8714         if (!trace_ok_for_array(tracer, tr))
8715                 return;
8716
8717         for (i = 0; i < tr->nr_topts; i++) {
8718                 /* Make sure there's no duplicate flags. */
8719                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8720                         return;
8721         }
8722
8723         opts = flags->opts;
8724
8725         for (cnt = 0; opts[cnt].name; cnt++)
8726                 ;
8727
8728         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8729         if (!topts)
8730                 return;
8731
8732         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8733                             GFP_KERNEL);
8734         if (!tr_topts) {
8735                 kfree(topts);
8736                 return;
8737         }
8738
8739         tr->topts = tr_topts;
8740         tr->topts[tr->nr_topts].tracer = tracer;
8741         tr->topts[tr->nr_topts].topts = topts;
8742         tr->nr_topts++;
8743
8744         for (cnt = 0; opts[cnt].name; cnt++) {
8745                 create_trace_option_file(tr, &topts[cnt], flags,
8746                                          &opts[cnt]);
8747                 MEM_FAIL(topts[cnt].entry == NULL,
8748                           "Failed to create trace option: %s",
8749                           opts[cnt].name);
8750         }
8751 }
8752
8753 static struct dentry *
8754 create_trace_option_core_file(struct trace_array *tr,
8755                               const char *option, long index)
8756 {
8757         struct dentry *t_options;
8758
8759         t_options = trace_options_init_dentry(tr);
8760         if (!t_options)
8761                 return NULL;
8762
8763         return trace_create_file(option, 0644, t_options,
8764                                  (void *)&tr->trace_flags_index[index],
8765                                  &trace_options_core_fops);
8766 }
8767
8768 static void create_trace_options_dir(struct trace_array *tr)
8769 {
8770         struct dentry *t_options;
8771         bool top_level = tr == &global_trace;
8772         int i;
8773
8774         t_options = trace_options_init_dentry(tr);
8775         if (!t_options)
8776                 return;
8777
8778         for (i = 0; trace_options[i]; i++) {
8779                 if (top_level ||
8780                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8781                         create_trace_option_core_file(tr, trace_options[i], i);
8782         }
8783 }
8784
8785 static ssize_t
8786 rb_simple_read(struct file *filp, char __user *ubuf,
8787                size_t cnt, loff_t *ppos)
8788 {
8789         struct trace_array *tr = filp->private_data;
8790         char buf[64];
8791         int r;
8792
8793         r = tracer_tracing_is_on(tr);
8794         r = sprintf(buf, "%d\n", r);
8795
8796         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8797 }
8798
8799 static ssize_t
8800 rb_simple_write(struct file *filp, const char __user *ubuf,
8801                 size_t cnt, loff_t *ppos)
8802 {
8803         struct trace_array *tr = filp->private_data;
8804         struct trace_buffer *buffer = tr->array_buffer.buffer;
8805         unsigned long val;
8806         int ret;
8807
8808         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8809         if (ret)
8810                 return ret;
8811
8812         if (buffer) {
8813                 mutex_lock(&trace_types_lock);
8814                 if (!!val == tracer_tracing_is_on(tr)) {
8815                         val = 0; /* do nothing */
8816                 } else if (val) {
8817                         tracer_tracing_on(tr);
8818                         if (tr->current_trace->start)
8819                                 tr->current_trace->start(tr);
8820                 } else {
8821                         tracer_tracing_off(tr);
8822                         if (tr->current_trace->stop)
8823                                 tr->current_trace->stop(tr);
8824                 }
8825                 mutex_unlock(&trace_types_lock);
8826         }
8827
8828         (*ppos)++;
8829
8830         return cnt;
8831 }
8832
8833 static const struct file_operations rb_simple_fops = {
8834         .open           = tracing_open_generic_tr,
8835         .read           = rb_simple_read,
8836         .write          = rb_simple_write,
8837         .release        = tracing_release_generic_tr,
8838         .llseek         = default_llseek,
8839 };
8840
8841 static ssize_t
8842 buffer_percent_read(struct file *filp, char __user *ubuf,
8843                     size_t cnt, loff_t *ppos)
8844 {
8845         struct trace_array *tr = filp->private_data;
8846         char buf[64];
8847         int r;
8848
8849         r = tr->buffer_percent;
8850         r = sprintf(buf, "%d\n", r);
8851
8852         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8853 }
8854
8855 static ssize_t
8856 buffer_percent_write(struct file *filp, const char __user *ubuf,
8857                      size_t cnt, loff_t *ppos)
8858 {
8859         struct trace_array *tr = filp->private_data;
8860         unsigned long val;
8861         int ret;
8862
8863         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8864         if (ret)
8865                 return ret;
8866
8867         if (val > 100)
8868                 return -EINVAL;
8869
8870         if (!val)
8871                 val = 1;
8872
8873         tr->buffer_percent = val;
8874
8875         (*ppos)++;
8876
8877         return cnt;
8878 }
8879
8880 static const struct file_operations buffer_percent_fops = {
8881         .open           = tracing_open_generic_tr,
8882         .read           = buffer_percent_read,
8883         .write          = buffer_percent_write,
8884         .release        = tracing_release_generic_tr,
8885         .llseek         = default_llseek,
8886 };
8887
8888 static struct dentry *trace_instance_dir;
8889
8890 static void
8891 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8892
8893 static int
8894 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8895 {
8896         enum ring_buffer_flags rb_flags;
8897
8898         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8899
8900         buf->tr = tr;
8901
8902         buf->buffer = ring_buffer_alloc(size, rb_flags);
8903         if (!buf->buffer)
8904                 return -ENOMEM;
8905
8906         buf->data = alloc_percpu(struct trace_array_cpu);
8907         if (!buf->data) {
8908                 ring_buffer_free(buf->buffer);
8909                 buf->buffer = NULL;
8910                 return -ENOMEM;
8911         }
8912
8913         /* Allocate the first page for all buffers */
8914         set_buffer_entries(&tr->array_buffer,
8915                            ring_buffer_size(tr->array_buffer.buffer, 0));
8916
8917         return 0;
8918 }
8919
8920 static int allocate_trace_buffers(struct trace_array *tr, int size)
8921 {
8922         int ret;
8923
8924         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8925         if (ret)
8926                 return ret;
8927
8928 #ifdef CONFIG_TRACER_MAX_TRACE
8929         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8930                                     allocate_snapshot ? size : 1);
8931         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8932                 ring_buffer_free(tr->array_buffer.buffer);
8933                 tr->array_buffer.buffer = NULL;
8934                 free_percpu(tr->array_buffer.data);
8935                 tr->array_buffer.data = NULL;
8936                 return -ENOMEM;
8937         }
8938         tr->allocated_snapshot = allocate_snapshot;
8939
8940         /*
8941          * Only the top level trace array gets its snapshot allocated
8942          * from the kernel command line.
8943          */
8944         allocate_snapshot = false;
8945 #endif
8946
8947         return 0;
8948 }
8949
8950 static void free_trace_buffer(struct array_buffer *buf)
8951 {
8952         if (buf->buffer) {
8953                 ring_buffer_free(buf->buffer);
8954                 buf->buffer = NULL;
8955                 free_percpu(buf->data);
8956                 buf->data = NULL;
8957         }
8958 }
8959
8960 static void free_trace_buffers(struct trace_array *tr)
8961 {
8962         if (!tr)
8963                 return;
8964
8965         free_trace_buffer(&tr->array_buffer);
8966
8967 #ifdef CONFIG_TRACER_MAX_TRACE
8968         free_trace_buffer(&tr->max_buffer);
8969 #endif
8970 }
8971
8972 static void init_trace_flags_index(struct trace_array *tr)
8973 {
8974         int i;
8975
8976         /* Used by the trace options files */
8977         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8978                 tr->trace_flags_index[i] = i;
8979 }
8980
8981 static void __update_tracer_options(struct trace_array *tr)
8982 {
8983         struct tracer *t;
8984
8985         for (t = trace_types; t; t = t->next)
8986                 add_tracer_options(tr, t);
8987 }
8988
8989 static void update_tracer_options(struct trace_array *tr)
8990 {
8991         mutex_lock(&trace_types_lock);
8992         __update_tracer_options(tr);
8993         mutex_unlock(&trace_types_lock);
8994 }
8995
8996 /* Must have trace_types_lock held */
8997 struct trace_array *trace_array_find(const char *instance)
8998 {
8999         struct trace_array *tr, *found = NULL;
9000
9001         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9002                 if (tr->name && strcmp(tr->name, instance) == 0) {
9003                         found = tr;
9004                         break;
9005                 }
9006         }
9007
9008         return found;
9009 }
9010
9011 struct trace_array *trace_array_find_get(const char *instance)
9012 {
9013         struct trace_array *tr;
9014
9015         mutex_lock(&trace_types_lock);
9016         tr = trace_array_find(instance);
9017         if (tr)
9018                 tr->ref++;
9019         mutex_unlock(&trace_types_lock);
9020
9021         return tr;
9022 }
9023
9024 static int trace_array_create_dir(struct trace_array *tr)
9025 {
9026         int ret;
9027
9028         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9029         if (!tr->dir)
9030                 return -EINVAL;
9031
9032         ret = event_trace_add_tracer(tr->dir, tr);
9033         if (ret)
9034                 tracefs_remove(tr->dir);
9035
9036         init_tracer_tracefs(tr, tr->dir);
9037         __update_tracer_options(tr);
9038
9039         return ret;
9040 }
9041
9042 static struct trace_array *trace_array_create(const char *name)
9043 {
9044         struct trace_array *tr;
9045         int ret;
9046
9047         ret = -ENOMEM;
9048         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9049         if (!tr)
9050                 return ERR_PTR(ret);
9051
9052         tr->name = kstrdup(name, GFP_KERNEL);
9053         if (!tr->name)
9054                 goto out_free_tr;
9055
9056         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9057                 goto out_free_tr;
9058
9059         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9060
9061         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9062
9063         raw_spin_lock_init(&tr->start_lock);
9064
9065         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9066
9067         tr->current_trace = &nop_trace;
9068
9069         INIT_LIST_HEAD(&tr->systems);
9070         INIT_LIST_HEAD(&tr->events);
9071         INIT_LIST_HEAD(&tr->hist_vars);
9072         INIT_LIST_HEAD(&tr->err_log);
9073
9074         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9075                 goto out_free_tr;
9076
9077         if (ftrace_allocate_ftrace_ops(tr) < 0)
9078                 goto out_free_tr;
9079
9080         ftrace_init_trace_array(tr);
9081
9082         init_trace_flags_index(tr);
9083
9084         if (trace_instance_dir) {
9085                 ret = trace_array_create_dir(tr);
9086                 if (ret)
9087                         goto out_free_tr;
9088         } else
9089                 __trace_early_add_events(tr);
9090
9091         list_add(&tr->list, &ftrace_trace_arrays);
9092
9093         tr->ref++;
9094
9095         return tr;
9096
9097  out_free_tr:
9098         ftrace_free_ftrace_ops(tr);
9099         free_trace_buffers(tr);
9100         free_cpumask_var(tr->tracing_cpumask);
9101         kfree(tr->name);
9102         kfree(tr);
9103
9104         return ERR_PTR(ret);
9105 }
9106
9107 static int instance_mkdir(const char *name)
9108 {
9109         struct trace_array *tr;
9110         int ret;
9111
9112         mutex_lock(&event_mutex);
9113         mutex_lock(&trace_types_lock);
9114
9115         ret = -EEXIST;
9116         if (trace_array_find(name))
9117                 goto out_unlock;
9118
9119         tr = trace_array_create(name);
9120
9121         ret = PTR_ERR_OR_ZERO(tr);
9122
9123 out_unlock:
9124         mutex_unlock(&trace_types_lock);
9125         mutex_unlock(&event_mutex);
9126         return ret;
9127 }
9128
9129 /**
9130  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9131  * @name: The name of the trace array to be looked up/created.
9132  *
9133  * Returns pointer to trace array with given name.
9134  * NULL, if it cannot be created.
9135  *
9136  * NOTE: This function increments the reference counter associated with the
9137  * trace array returned. This makes sure it cannot be freed while in use.
9138  * Use trace_array_put() once the trace array is no longer needed.
9139  * If the trace_array is to be freed, trace_array_destroy() needs to
9140  * be called after the trace_array_put(), or simply let user space delete
9141  * it from the tracefs instances directory. But until the
9142  * trace_array_put() is called, user space can not delete it.
9143  *
9144  */
9145 struct trace_array *trace_array_get_by_name(const char *name)
9146 {
9147         struct trace_array *tr;
9148
9149         mutex_lock(&event_mutex);
9150         mutex_lock(&trace_types_lock);
9151
9152         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9153                 if (tr->name && strcmp(tr->name, name) == 0)
9154                         goto out_unlock;
9155         }
9156
9157         tr = trace_array_create(name);
9158
9159         if (IS_ERR(tr))
9160                 tr = NULL;
9161 out_unlock:
9162         if (tr)
9163                 tr->ref++;
9164
9165         mutex_unlock(&trace_types_lock);
9166         mutex_unlock(&event_mutex);
9167         return tr;
9168 }
9169 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9170
9171 static int __remove_instance(struct trace_array *tr)
9172 {
9173         int i;
9174
9175         /* Reference counter for a newly created trace array = 1. */
9176         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9177                 return -EBUSY;
9178
9179         list_del(&tr->list);
9180
9181         /* Disable all the flags that were enabled coming in */
9182         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9183                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9184                         set_tracer_flag(tr, 1 << i, 0);
9185         }
9186
9187         tracing_set_nop(tr);
9188         clear_ftrace_function_probes(tr);
9189         event_trace_del_tracer(tr);
9190         ftrace_clear_pids(tr);
9191         ftrace_destroy_function_files(tr);
9192         tracefs_remove(tr->dir);
9193         free_percpu(tr->last_func_repeats);
9194         free_trace_buffers(tr);
9195
9196         for (i = 0; i < tr->nr_topts; i++) {
9197                 kfree(tr->topts[i].topts);
9198         }
9199         kfree(tr->topts);
9200
9201         free_cpumask_var(tr->tracing_cpumask);
9202         kfree(tr->name);
9203         kfree(tr);
9204
9205         return 0;
9206 }
9207
9208 int trace_array_destroy(struct trace_array *this_tr)
9209 {
9210         struct trace_array *tr;
9211         int ret;
9212
9213         if (!this_tr)
9214                 return -EINVAL;
9215
9216         mutex_lock(&event_mutex);
9217         mutex_lock(&trace_types_lock);
9218
9219         ret = -ENODEV;
9220
9221         /* Making sure trace array exists before destroying it. */
9222         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9223                 if (tr == this_tr) {
9224                         ret = __remove_instance(tr);
9225                         break;
9226                 }
9227         }
9228
9229         mutex_unlock(&trace_types_lock);
9230         mutex_unlock(&event_mutex);
9231
9232         return ret;
9233 }
9234 EXPORT_SYMBOL_GPL(trace_array_destroy);
9235
9236 static int instance_rmdir(const char *name)
9237 {
9238         struct trace_array *tr;
9239         int ret;
9240
9241         mutex_lock(&event_mutex);
9242         mutex_lock(&trace_types_lock);
9243
9244         ret = -ENODEV;
9245         tr = trace_array_find(name);
9246         if (tr)
9247                 ret = __remove_instance(tr);
9248
9249         mutex_unlock(&trace_types_lock);
9250         mutex_unlock(&event_mutex);
9251
9252         return ret;
9253 }
9254
9255 static __init void create_trace_instances(struct dentry *d_tracer)
9256 {
9257         struct trace_array *tr;
9258
9259         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9260                                                          instance_mkdir,
9261                                                          instance_rmdir);
9262         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9263                 return;
9264
9265         mutex_lock(&event_mutex);
9266         mutex_lock(&trace_types_lock);
9267
9268         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9269                 if (!tr->name)
9270                         continue;
9271                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9272                              "Failed to create instance directory\n"))
9273                         break;
9274         }
9275
9276         mutex_unlock(&trace_types_lock);
9277         mutex_unlock(&event_mutex);
9278 }
9279
9280 static void
9281 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9282 {
9283         struct trace_event_file *file;
9284         int cpu;
9285
9286         trace_create_file("available_tracers", 0444, d_tracer,
9287                         tr, &show_traces_fops);
9288
9289         trace_create_file("current_tracer", 0644, d_tracer,
9290                         tr, &set_tracer_fops);
9291
9292         trace_create_file("tracing_cpumask", 0644, d_tracer,
9293                           tr, &tracing_cpumask_fops);
9294
9295         trace_create_file("trace_options", 0644, d_tracer,
9296                           tr, &tracing_iter_fops);
9297
9298         trace_create_file("trace", 0644, d_tracer,
9299                           tr, &tracing_fops);
9300
9301         trace_create_file("trace_pipe", 0444, d_tracer,
9302                           tr, &tracing_pipe_fops);
9303
9304         trace_create_file("buffer_size_kb", 0644, d_tracer,
9305                           tr, &tracing_entries_fops);
9306
9307         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9308                           tr, &tracing_total_entries_fops);
9309
9310         trace_create_file("free_buffer", 0200, d_tracer,
9311                           tr, &tracing_free_buffer_fops);
9312
9313         trace_create_file("trace_marker", 0220, d_tracer,
9314                           tr, &tracing_mark_fops);
9315
9316         file = __find_event_file(tr, "ftrace", "print");
9317         if (file && file->dir)
9318                 trace_create_file("trigger", 0644, file->dir, file,
9319                                   &event_trigger_fops);
9320         tr->trace_marker_file = file;
9321
9322         trace_create_file("trace_marker_raw", 0220, d_tracer,
9323                           tr, &tracing_mark_raw_fops);
9324
9325         trace_create_file("trace_clock", 0644, d_tracer, tr,
9326                           &trace_clock_fops);
9327
9328         trace_create_file("tracing_on", 0644, d_tracer,
9329                           tr, &rb_simple_fops);
9330
9331         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9332                           &trace_time_stamp_mode_fops);
9333
9334         tr->buffer_percent = 50;
9335
9336         trace_create_file("buffer_percent", 0444, d_tracer,
9337                         tr, &buffer_percent_fops);
9338
9339         create_trace_options_dir(tr);
9340
9341 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9342         trace_create_maxlat_file(tr, d_tracer);
9343 #endif
9344
9345         if (ftrace_create_function_files(tr, d_tracer))
9346                 MEM_FAIL(1, "Could not allocate function filter files");
9347
9348 #ifdef CONFIG_TRACER_SNAPSHOT
9349         trace_create_file("snapshot", 0644, d_tracer,
9350                           tr, &snapshot_fops);
9351 #endif
9352
9353         trace_create_file("error_log", 0644, d_tracer,
9354                           tr, &tracing_err_log_fops);
9355
9356         for_each_tracing_cpu(cpu)
9357                 tracing_init_tracefs_percpu(tr, cpu);
9358
9359         ftrace_init_tracefs(tr, d_tracer);
9360 }
9361
9362 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9363 {
9364         struct vfsmount *mnt;
9365         struct file_system_type *type;
9366
9367         /*
9368          * To maintain backward compatibility for tools that mount
9369          * debugfs to get to the tracing facility, tracefs is automatically
9370          * mounted to the debugfs/tracing directory.
9371          */
9372         type = get_fs_type("tracefs");
9373         if (!type)
9374                 return NULL;
9375         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9376         put_filesystem(type);
9377         if (IS_ERR(mnt))
9378                 return NULL;
9379         mntget(mnt);
9380
9381         return mnt;
9382 }
9383
9384 /**
9385  * tracing_init_dentry - initialize top level trace array
9386  *
9387  * This is called when creating files or directories in the tracing
9388  * directory. It is called via fs_initcall() by any of the boot up code
9389  * and expects to return the dentry of the top level tracing directory.
9390  */
9391 int tracing_init_dentry(void)
9392 {
9393         struct trace_array *tr = &global_trace;
9394
9395         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9396                 pr_warn("Tracing disabled due to lockdown\n");
9397                 return -EPERM;
9398         }
9399
9400         /* The top level trace array uses  NULL as parent */
9401         if (tr->dir)
9402                 return 0;
9403
9404         if (WARN_ON(!tracefs_initialized()))
9405                 return -ENODEV;
9406
9407         /*
9408          * As there may still be users that expect the tracing
9409          * files to exist in debugfs/tracing, we must automount
9410          * the tracefs file system there, so older tools still
9411          * work with the newer kernel.
9412          */
9413         tr->dir = debugfs_create_automount("tracing", NULL,
9414                                            trace_automount, NULL);
9415
9416         return 0;
9417 }
9418
9419 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9420 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9421
9422 static struct workqueue_struct *eval_map_wq __initdata;
9423 static struct work_struct eval_map_work __initdata;
9424
9425 static void __init eval_map_work_func(struct work_struct *work)
9426 {
9427         int len;
9428
9429         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9430         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9431 }
9432
9433 static int __init trace_eval_init(void)
9434 {
9435         INIT_WORK(&eval_map_work, eval_map_work_func);
9436
9437         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9438         if (!eval_map_wq) {
9439                 pr_err("Unable to allocate eval_map_wq\n");
9440                 /* Do work here */
9441                 eval_map_work_func(&eval_map_work);
9442                 return -ENOMEM;
9443         }
9444
9445         queue_work(eval_map_wq, &eval_map_work);
9446         return 0;
9447 }
9448
9449 static int __init trace_eval_sync(void)
9450 {
9451         /* Make sure the eval map updates are finished */
9452         if (eval_map_wq)
9453                 destroy_workqueue(eval_map_wq);
9454         return 0;
9455 }
9456
9457 late_initcall_sync(trace_eval_sync);
9458
9459
9460 #ifdef CONFIG_MODULES
9461 static void trace_module_add_evals(struct module *mod)
9462 {
9463         if (!mod->num_trace_evals)
9464                 return;
9465
9466         /*
9467          * Modules with bad taint do not have events created, do
9468          * not bother with enums either.
9469          */
9470         if (trace_module_has_bad_taint(mod))
9471                 return;
9472
9473         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9474 }
9475
9476 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9477 static void trace_module_remove_evals(struct module *mod)
9478 {
9479         union trace_eval_map_item *map;
9480         union trace_eval_map_item **last = &trace_eval_maps;
9481
9482         if (!mod->num_trace_evals)
9483                 return;
9484
9485         mutex_lock(&trace_eval_mutex);
9486
9487         map = trace_eval_maps;
9488
9489         while (map) {
9490                 if (map->head.mod == mod)
9491                         break;
9492                 map = trace_eval_jmp_to_tail(map);
9493                 last = &map->tail.next;
9494                 map = map->tail.next;
9495         }
9496         if (!map)
9497                 goto out;
9498
9499         *last = trace_eval_jmp_to_tail(map)->tail.next;
9500         kfree(map);
9501  out:
9502         mutex_unlock(&trace_eval_mutex);
9503 }
9504 #else
9505 static inline void trace_module_remove_evals(struct module *mod) { }
9506 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9507
9508 static int trace_module_notify(struct notifier_block *self,
9509                                unsigned long val, void *data)
9510 {
9511         struct module *mod = data;
9512
9513         switch (val) {
9514         case MODULE_STATE_COMING:
9515                 trace_module_add_evals(mod);
9516                 break;
9517         case MODULE_STATE_GOING:
9518                 trace_module_remove_evals(mod);
9519                 break;
9520         }
9521
9522         return NOTIFY_OK;
9523 }
9524
9525 static struct notifier_block trace_module_nb = {
9526         .notifier_call = trace_module_notify,
9527         .priority = 0,
9528 };
9529 #endif /* CONFIG_MODULES */
9530
9531 static __init int tracer_init_tracefs(void)
9532 {
9533         int ret;
9534
9535         trace_access_lock_init();
9536
9537         ret = tracing_init_dentry();
9538         if (ret)
9539                 return 0;
9540
9541         event_trace_init();
9542
9543         init_tracer_tracefs(&global_trace, NULL);
9544         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9545
9546         trace_create_file("tracing_thresh", 0644, NULL,
9547                         &global_trace, &tracing_thresh_fops);
9548
9549         trace_create_file("README", 0444, NULL,
9550                         NULL, &tracing_readme_fops);
9551
9552         trace_create_file("saved_cmdlines", 0444, NULL,
9553                         NULL, &tracing_saved_cmdlines_fops);
9554
9555         trace_create_file("saved_cmdlines_size", 0644, NULL,
9556                           NULL, &tracing_saved_cmdlines_size_fops);
9557
9558         trace_create_file("saved_tgids", 0444, NULL,
9559                         NULL, &tracing_saved_tgids_fops);
9560
9561         trace_eval_init();
9562
9563         trace_create_eval_file(NULL);
9564
9565 #ifdef CONFIG_MODULES
9566         register_module_notifier(&trace_module_nb);
9567 #endif
9568
9569 #ifdef CONFIG_DYNAMIC_FTRACE
9570         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9571                         NULL, &tracing_dyn_info_fops);
9572 #endif
9573
9574         create_trace_instances(NULL);
9575
9576         update_tracer_options(&global_trace);
9577
9578         return 0;
9579 }
9580
9581 static int trace_panic_handler(struct notifier_block *this,
9582                                unsigned long event, void *unused)
9583 {
9584         if (ftrace_dump_on_oops)
9585                 ftrace_dump(ftrace_dump_on_oops);
9586         return NOTIFY_OK;
9587 }
9588
9589 static struct notifier_block trace_panic_notifier = {
9590         .notifier_call  = trace_panic_handler,
9591         .next           = NULL,
9592         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9593 };
9594
9595 static int trace_die_handler(struct notifier_block *self,
9596                              unsigned long val,
9597                              void *data)
9598 {
9599         switch (val) {
9600         case DIE_OOPS:
9601                 if (ftrace_dump_on_oops)
9602                         ftrace_dump(ftrace_dump_on_oops);
9603                 break;
9604         default:
9605                 break;
9606         }
9607         return NOTIFY_OK;
9608 }
9609
9610 static struct notifier_block trace_die_notifier = {
9611         .notifier_call = trace_die_handler,
9612         .priority = 200
9613 };
9614
9615 /*
9616  * printk is set to max of 1024, we really don't need it that big.
9617  * Nothing should be printing 1000 characters anyway.
9618  */
9619 #define TRACE_MAX_PRINT         1000
9620
9621 /*
9622  * Define here KERN_TRACE so that we have one place to modify
9623  * it if we decide to change what log level the ftrace dump
9624  * should be at.
9625  */
9626 #define KERN_TRACE              KERN_EMERG
9627
9628 void
9629 trace_printk_seq(struct trace_seq *s)
9630 {
9631         /* Probably should print a warning here. */
9632         if (s->seq.len >= TRACE_MAX_PRINT)
9633                 s->seq.len = TRACE_MAX_PRINT;
9634
9635         /*
9636          * More paranoid code. Although the buffer size is set to
9637          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9638          * an extra layer of protection.
9639          */
9640         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9641                 s->seq.len = s->seq.size - 1;
9642
9643         /* should be zero ended, but we are paranoid. */
9644         s->buffer[s->seq.len] = 0;
9645
9646         printk(KERN_TRACE "%s", s->buffer);
9647
9648         trace_seq_init(s);
9649 }
9650
9651 void trace_init_global_iter(struct trace_iterator *iter)
9652 {
9653         iter->tr = &global_trace;
9654         iter->trace = iter->tr->current_trace;
9655         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9656         iter->array_buffer = &global_trace.array_buffer;
9657
9658         if (iter->trace && iter->trace->open)
9659                 iter->trace->open(iter);
9660
9661         /* Annotate start of buffers if we had overruns */
9662         if (ring_buffer_overruns(iter->array_buffer->buffer))
9663                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9664
9665         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9666         if (trace_clocks[iter->tr->clock_id].in_ns)
9667                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9668 }
9669
9670 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9671 {
9672         /* use static because iter can be a bit big for the stack */
9673         static struct trace_iterator iter;
9674         static atomic_t dump_running;
9675         struct trace_array *tr = &global_trace;
9676         unsigned int old_userobj;
9677         unsigned long flags;
9678         int cnt = 0, cpu;
9679
9680         /* Only allow one dump user at a time. */
9681         if (atomic_inc_return(&dump_running) != 1) {
9682                 atomic_dec(&dump_running);
9683                 return;
9684         }
9685
9686         /*
9687          * Always turn off tracing when we dump.
9688          * We don't need to show trace output of what happens
9689          * between multiple crashes.
9690          *
9691          * If the user does a sysrq-z, then they can re-enable
9692          * tracing with echo 1 > tracing_on.
9693          */
9694         tracing_off();
9695
9696         local_irq_save(flags);
9697         printk_nmi_direct_enter();
9698
9699         /* Simulate the iterator */
9700         trace_init_global_iter(&iter);
9701         /* Can not use kmalloc for iter.temp and iter.fmt */
9702         iter.temp = static_temp_buf;
9703         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9704         iter.fmt = static_fmt_buf;
9705         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9706
9707         for_each_tracing_cpu(cpu) {
9708                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9709         }
9710
9711         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9712
9713         /* don't look at user memory in panic mode */
9714         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9715
9716         switch (oops_dump_mode) {
9717         case DUMP_ALL:
9718                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9719                 break;
9720         case DUMP_ORIG:
9721                 iter.cpu_file = raw_smp_processor_id();
9722                 break;
9723         case DUMP_NONE:
9724                 goto out_enable;
9725         default:
9726                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9727                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9728         }
9729
9730         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9731
9732         /* Did function tracer already get disabled? */
9733         if (ftrace_is_dead()) {
9734                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9735                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9736         }
9737
9738         /*
9739          * We need to stop all tracing on all CPUS to read
9740          * the next buffer. This is a bit expensive, but is
9741          * not done often. We fill all what we can read,
9742          * and then release the locks again.
9743          */
9744
9745         while (!trace_empty(&iter)) {
9746
9747                 if (!cnt)
9748                         printk(KERN_TRACE "---------------------------------\n");
9749
9750                 cnt++;
9751
9752                 trace_iterator_reset(&iter);
9753                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9754
9755                 if (trace_find_next_entry_inc(&iter) != NULL) {
9756                         int ret;
9757
9758                         ret = print_trace_line(&iter);
9759                         if (ret != TRACE_TYPE_NO_CONSUME)
9760                                 trace_consume(&iter);
9761                 }
9762                 touch_nmi_watchdog();
9763
9764                 trace_printk_seq(&iter.seq);
9765         }
9766
9767         if (!cnt)
9768                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9769         else
9770                 printk(KERN_TRACE "---------------------------------\n");
9771
9772  out_enable:
9773         tr->trace_flags |= old_userobj;
9774
9775         for_each_tracing_cpu(cpu) {
9776                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9777         }
9778         atomic_dec(&dump_running);
9779         printk_nmi_direct_exit();
9780         local_irq_restore(flags);
9781 }
9782 EXPORT_SYMBOL_GPL(ftrace_dump);
9783
9784 #define WRITE_BUFSIZE  4096
9785
9786 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9787                                 size_t count, loff_t *ppos,
9788                                 int (*createfn)(const char *))
9789 {
9790         char *kbuf, *buf, *tmp;
9791         int ret = 0;
9792         size_t done = 0;
9793         size_t size;
9794
9795         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9796         if (!kbuf)
9797                 return -ENOMEM;
9798
9799         while (done < count) {
9800                 size = count - done;
9801
9802                 if (size >= WRITE_BUFSIZE)
9803                         size = WRITE_BUFSIZE - 1;
9804
9805                 if (copy_from_user(kbuf, buffer + done, size)) {
9806                         ret = -EFAULT;
9807                         goto out;
9808                 }
9809                 kbuf[size] = '\0';
9810                 buf = kbuf;
9811                 do {
9812                         tmp = strchr(buf, '\n');
9813                         if (tmp) {
9814                                 *tmp = '\0';
9815                                 size = tmp - buf + 1;
9816                         } else {
9817                                 size = strlen(buf);
9818                                 if (done + size < count) {
9819                                         if (buf != kbuf)
9820                                                 break;
9821                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9822                                         pr_warn("Line length is too long: Should be less than %d\n",
9823                                                 WRITE_BUFSIZE - 2);
9824                                         ret = -EINVAL;
9825                                         goto out;
9826                                 }
9827                         }
9828                         done += size;
9829
9830                         /* Remove comments */
9831                         tmp = strchr(buf, '#');
9832
9833                         if (tmp)
9834                                 *tmp = '\0';
9835
9836                         ret = createfn(buf);
9837                         if (ret)
9838                                 goto out;
9839                         buf += size;
9840
9841                 } while (done < count);
9842         }
9843         ret = done;
9844
9845 out:
9846         kfree(kbuf);
9847
9848         return ret;
9849 }
9850
9851 __init static int tracer_alloc_buffers(void)
9852 {
9853         int ring_buf_size;
9854         int ret = -ENOMEM;
9855
9856
9857         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9858                 pr_warn("Tracing disabled due to lockdown\n");
9859                 return -EPERM;
9860         }
9861
9862         /*
9863          * Make sure we don't accidentally add more trace options
9864          * than we have bits for.
9865          */
9866         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9867
9868         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9869                 goto out;
9870
9871         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9872                 goto out_free_buffer_mask;
9873
9874         /* Only allocate trace_printk buffers if a trace_printk exists */
9875         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9876                 /* Must be called before global_trace.buffer is allocated */
9877                 trace_printk_init_buffers();
9878
9879         /* To save memory, keep the ring buffer size to its minimum */
9880         if (ring_buffer_expanded)
9881                 ring_buf_size = trace_buf_size;
9882         else
9883                 ring_buf_size = 1;
9884
9885         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9886         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9887
9888         raw_spin_lock_init(&global_trace.start_lock);
9889
9890         /*
9891          * The prepare callbacks allocates some memory for the ring buffer. We
9892          * don't free the buffer if the CPU goes down. If we were to free
9893          * the buffer, then the user would lose any trace that was in the
9894          * buffer. The memory will be removed once the "instance" is removed.
9895          */
9896         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9897                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9898                                       NULL);
9899         if (ret < 0)
9900                 goto out_free_cpumask;
9901         /* Used for event triggers */
9902         ret = -ENOMEM;
9903         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9904         if (!temp_buffer)
9905                 goto out_rm_hp_state;
9906
9907         if (trace_create_savedcmd() < 0)
9908                 goto out_free_temp_buffer;
9909
9910         /* TODO: make the number of buffers hot pluggable with CPUS */
9911         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9912                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9913                 goto out_free_savedcmd;
9914         }
9915
9916         if (global_trace.buffer_disabled)
9917                 tracing_off();
9918
9919         if (trace_boot_clock) {
9920                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9921                 if (ret < 0)
9922                         pr_warn("Trace clock %s not defined, going back to default\n",
9923                                 trace_boot_clock);
9924         }
9925
9926         /*
9927          * register_tracer() might reference current_trace, so it
9928          * needs to be set before we register anything. This is
9929          * just a bootstrap of current_trace anyway.
9930          */
9931         global_trace.current_trace = &nop_trace;
9932
9933         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9934
9935         ftrace_init_global_array_ops(&global_trace);
9936
9937         init_trace_flags_index(&global_trace);
9938
9939         register_tracer(&nop_trace);
9940
9941         /* Function tracing may start here (via kernel command line) */
9942         init_function_trace();
9943
9944         /* All seems OK, enable tracing */
9945         tracing_disabled = 0;
9946
9947         atomic_notifier_chain_register(&panic_notifier_list,
9948                                        &trace_panic_notifier);
9949
9950         register_die_notifier(&trace_die_notifier);
9951
9952         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9953
9954         INIT_LIST_HEAD(&global_trace.systems);
9955         INIT_LIST_HEAD(&global_trace.events);
9956         INIT_LIST_HEAD(&global_trace.hist_vars);
9957         INIT_LIST_HEAD(&global_trace.err_log);
9958         list_add(&global_trace.list, &ftrace_trace_arrays);
9959
9960         apply_trace_boot_options();
9961
9962         register_snapshot_cmd();
9963
9964         test_can_verify();
9965
9966         return 0;
9967
9968 out_free_savedcmd:
9969         free_saved_cmdlines_buffer(savedcmd);
9970 out_free_temp_buffer:
9971         ring_buffer_free(temp_buffer);
9972 out_rm_hp_state:
9973         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9974 out_free_cpumask:
9975         free_cpumask_var(global_trace.tracing_cpumask);
9976 out_free_buffer_mask:
9977         free_cpumask_var(tracing_buffer_mask);
9978 out:
9979         return ret;
9980 }
9981
9982 void __init early_trace_init(void)
9983 {
9984         if (tracepoint_printk) {
9985                 tracepoint_print_iter =
9986                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9987                 if (MEM_FAIL(!tracepoint_print_iter,
9988                              "Failed to allocate trace iterator\n"))
9989                         tracepoint_printk = 0;
9990                 else
9991                         static_key_enable(&tracepoint_printk_key.key);
9992         }
9993         tracer_alloc_buffers();
9994 }
9995
9996 void __init trace_init(void)
9997 {
9998         trace_event_init();
9999 }
10000
10001 __init static int clear_boot_tracer(void)
10002 {
10003         /*
10004          * The default tracer at boot buffer is an init section.
10005          * This function is called in lateinit. If we did not
10006          * find the boot tracer, then clear it out, to prevent
10007          * later registration from accessing the buffer that is
10008          * about to be freed.
10009          */
10010         if (!default_bootup_tracer)
10011                 return 0;
10012
10013         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10014                default_bootup_tracer);
10015         default_bootup_tracer = NULL;
10016
10017         return 0;
10018 }
10019
10020 fs_initcall(tracer_init_tracefs);
10021 late_initcall_sync(clear_boot_tracer);
10022
10023 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10024 __init static int tracing_set_default_clock(void)
10025 {
10026         /* sched_clock_stable() is determined in late_initcall */
10027         if (!trace_boot_clock && !sched_clock_stable()) {
10028                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10029                         pr_warn("Can not set tracing clock due to lockdown\n");
10030                         return -EPERM;
10031                 }
10032
10033                 printk(KERN_WARNING
10034                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10035                        "If you want to keep using the local clock, then add:\n"
10036                        "  \"trace_clock=local\"\n"
10037                        "on the kernel command line\n");
10038                 tracing_set_clock(&global_trace, "global");
10039         }
10040
10041         return 0;
10042 }
10043 late_initcall_sync(tracing_set_default_clock);
10044 #endif