Merge branch 'kvm-master' into kvm-next
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_array *tr,
167                                    struct trace_buffer *buffer,
168                                    unsigned long flags, int pc);
169
170 #define MAX_TRACER_SIZE         100
171 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
172 static char *default_bootup_tracer;
173
174 static bool allocate_snapshot;
175
176 static int __init set_cmdline_ftrace(char *str)
177 {
178         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
179         default_bootup_tracer = bootup_tracer_buf;
180         /* We are using ftrace early, expand it */
181         ring_buffer_expanded = true;
182         return 1;
183 }
184 __setup("ftrace=", set_cmdline_ftrace);
185
186 static int __init set_ftrace_dump_on_oops(char *str)
187 {
188         if (*str++ != '=' || !*str) {
189                 ftrace_dump_on_oops = DUMP_ALL;
190                 return 1;
191         }
192
193         if (!strcmp("orig_cpu", str)) {
194                 ftrace_dump_on_oops = DUMP_ORIG;
195                 return 1;
196         }
197
198         return 0;
199 }
200 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
201
202 static int __init stop_trace_on_warning(char *str)
203 {
204         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
205                 __disable_trace_on_warning = 1;
206         return 1;
207 }
208 __setup("traceoff_on_warning", stop_trace_on_warning);
209
210 static int __init boot_alloc_snapshot(char *str)
211 {
212         allocate_snapshot = true;
213         /* We also need the main ring buffer expanded */
214         ring_buffer_expanded = true;
215         return 1;
216 }
217 __setup("alloc_snapshot", boot_alloc_snapshot);
218
219
220 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
221
222 static int __init set_trace_boot_options(char *str)
223 {
224         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
225         return 0;
226 }
227 __setup("trace_options=", set_trace_boot_options);
228
229 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
230 static char *trace_boot_clock __initdata;
231
232 static int __init set_trace_boot_clock(char *str)
233 {
234         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
235         trace_boot_clock = trace_boot_clock_buf;
236         return 0;
237 }
238 __setup("trace_clock=", set_trace_boot_clock);
239
240 static int __init set_tracepoint_printk(char *str)
241 {
242         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
243                 tracepoint_printk = 1;
244         return 1;
245 }
246 __setup("tp_printk", set_tracepoint_printk);
247
248 unsigned long long ns2usecs(u64 nsec)
249 {
250         nsec += 500;
251         do_div(nsec, 1000);
252         return nsec;
253 }
254
255 static void
256 trace_process_export(struct trace_export *export,
257                struct ring_buffer_event *event, int flag)
258 {
259         struct trace_entry *entry;
260         unsigned int size = 0;
261
262         if (export->flags & flag) {
263                 entry = ring_buffer_event_data(event);
264                 size = ring_buffer_event_length(event);
265                 export->write(export, entry, size);
266         }
267 }
268
269 static DEFINE_MUTEX(ftrace_export_lock);
270
271 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
272
273 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
275 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
276
277 static inline void ftrace_exports_enable(struct trace_export *export)
278 {
279         if (export->flags & TRACE_EXPORT_FUNCTION)
280                 static_branch_inc(&trace_function_exports_enabled);
281
282         if (export->flags & TRACE_EXPORT_EVENT)
283                 static_branch_inc(&trace_event_exports_enabled);
284
285         if (export->flags & TRACE_EXPORT_MARKER)
286                 static_branch_inc(&trace_marker_exports_enabled);
287 }
288
289 static inline void ftrace_exports_disable(struct trace_export *export)
290 {
291         if (export->flags & TRACE_EXPORT_FUNCTION)
292                 static_branch_dec(&trace_function_exports_enabled);
293
294         if (export->flags & TRACE_EXPORT_EVENT)
295                 static_branch_dec(&trace_event_exports_enabled);
296
297         if (export->flags & TRACE_EXPORT_MARKER)
298                 static_branch_dec(&trace_marker_exports_enabled);
299 }
300
301 static void ftrace_exports(struct ring_buffer_event *event, int flag)
302 {
303         struct trace_export *export;
304
305         preempt_disable_notrace();
306
307         export = rcu_dereference_raw_check(ftrace_exports_list);
308         while (export) {
309                 trace_process_export(export, event, flag);
310                 export = rcu_dereference_raw_check(export->next);
311         }
312
313         preempt_enable_notrace();
314 }
315
316 static inline void
317 add_trace_export(struct trace_export **list, struct trace_export *export)
318 {
319         rcu_assign_pointer(export->next, *list);
320         /*
321          * We are entering export into the list but another
322          * CPU might be walking that list. We need to make sure
323          * the export->next pointer is valid before another CPU sees
324          * the export pointer included into the list.
325          */
326         rcu_assign_pointer(*list, export);
327 }
328
329 static inline int
330 rm_trace_export(struct trace_export **list, struct trace_export *export)
331 {
332         struct trace_export **p;
333
334         for (p = list; *p != NULL; p = &(*p)->next)
335                 if (*p == export)
336                         break;
337
338         if (*p != export)
339                 return -1;
340
341         rcu_assign_pointer(*p, (*p)->next);
342
343         return 0;
344 }
345
346 static inline void
347 add_ftrace_export(struct trace_export **list, struct trace_export *export)
348 {
349         ftrace_exports_enable(export);
350
351         add_trace_export(list, export);
352 }
353
354 static inline int
355 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
356 {
357         int ret;
358
359         ret = rm_trace_export(list, export);
360         ftrace_exports_disable(export);
361
362         return ret;
363 }
364
365 int register_ftrace_export(struct trace_export *export)
366 {
367         if (WARN_ON_ONCE(!export->write))
368                 return -1;
369
370         mutex_lock(&ftrace_export_lock);
371
372         add_ftrace_export(&ftrace_exports_list, export);
373
374         mutex_unlock(&ftrace_export_lock);
375
376         return 0;
377 }
378 EXPORT_SYMBOL_GPL(register_ftrace_export);
379
380 int unregister_ftrace_export(struct trace_export *export)
381 {
382         int ret;
383
384         mutex_lock(&ftrace_export_lock);
385
386         ret = rm_ftrace_export(&ftrace_exports_list, export);
387
388         mutex_unlock(&ftrace_export_lock);
389
390         return ret;
391 }
392 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
393
394 /* trace_flags holds trace_options default values */
395 #define TRACE_DEFAULT_FLAGS                                             \
396         (FUNCTION_DEFAULT_FLAGS |                                       \
397          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
398          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
399          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
400          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
401
402 /* trace_options that are only supported by global_trace */
403 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
404                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
405
406 /* trace_flags that are default zero for instances */
407 #define ZEROED_TRACE_FLAGS \
408         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
409
410 /*
411  * The global_trace is the descriptor that holds the top-level tracing
412  * buffers for the live tracing.
413  */
414 static struct trace_array global_trace = {
415         .trace_flags = TRACE_DEFAULT_FLAGS,
416 };
417
418 LIST_HEAD(ftrace_trace_arrays);
419
420 int trace_array_get(struct trace_array *this_tr)
421 {
422         struct trace_array *tr;
423         int ret = -ENODEV;
424
425         mutex_lock(&trace_types_lock);
426         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
427                 if (tr == this_tr) {
428                         tr->ref++;
429                         ret = 0;
430                         break;
431                 }
432         }
433         mutex_unlock(&trace_types_lock);
434
435         return ret;
436 }
437
438 static void __trace_array_put(struct trace_array *this_tr)
439 {
440         WARN_ON(!this_tr->ref);
441         this_tr->ref--;
442 }
443
444 /**
445  * trace_array_put - Decrement the reference counter for this trace array.
446  *
447  * NOTE: Use this when we no longer need the trace array returned by
448  * trace_array_get_by_name(). This ensures the trace array can be later
449  * destroyed.
450  *
451  */
452 void trace_array_put(struct trace_array *this_tr)
453 {
454         if (!this_tr)
455                 return;
456
457         mutex_lock(&trace_types_lock);
458         __trace_array_put(this_tr);
459         mutex_unlock(&trace_types_lock);
460 }
461 EXPORT_SYMBOL_GPL(trace_array_put);
462
463 int tracing_check_open_get_tr(struct trace_array *tr)
464 {
465         int ret;
466
467         ret = security_locked_down(LOCKDOWN_TRACEFS);
468         if (ret)
469                 return ret;
470
471         if (tracing_disabled)
472                 return -ENODEV;
473
474         if (tr && trace_array_get(tr) < 0)
475                 return -ENODEV;
476
477         return 0;
478 }
479
480 int call_filter_check_discard(struct trace_event_call *call, void *rec,
481                               struct trace_buffer *buffer,
482                               struct ring_buffer_event *event)
483 {
484         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
485             !filter_match_preds(call->filter, rec)) {
486                 __trace_event_discard_commit(buffer, event);
487                 return 1;
488         }
489
490         return 0;
491 }
492
493 void trace_free_pid_list(struct trace_pid_list *pid_list)
494 {
495         vfree(pid_list->pids);
496         kfree(pid_list);
497 }
498
499 /**
500  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
501  * @filtered_pids: The list of pids to check
502  * @search_pid: The PID to find in @filtered_pids
503  *
504  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
505  */
506 bool
507 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
508 {
509         /*
510          * If pid_max changed after filtered_pids was created, we
511          * by default ignore all pids greater than the previous pid_max.
512          */
513         if (search_pid >= filtered_pids->pid_max)
514                 return false;
515
516         return test_bit(search_pid, filtered_pids->pids);
517 }
518
519 /**
520  * trace_ignore_this_task - should a task be ignored for tracing
521  * @filtered_pids: The list of pids to check
522  * @task: The task that should be ignored if not filtered
523  *
524  * Checks if @task should be traced or not from @filtered_pids.
525  * Returns true if @task should *NOT* be traced.
526  * Returns false if @task should be traced.
527  */
528 bool
529 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
530                        struct trace_pid_list *filtered_no_pids,
531                        struct task_struct *task)
532 {
533         /*
534          * If filterd_no_pids is not empty, and the task's pid is listed
535          * in filtered_no_pids, then return true.
536          * Otherwise, if filtered_pids is empty, that means we can
537          * trace all tasks. If it has content, then only trace pids
538          * within filtered_pids.
539          */
540
541         return (filtered_pids &&
542                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
543                 (filtered_no_pids &&
544                  trace_find_filtered_pid(filtered_no_pids, task->pid));
545 }
546
547 /**
548  * trace_filter_add_remove_task - Add or remove a task from a pid_list
549  * @pid_list: The list to modify
550  * @self: The current task for fork or NULL for exit
551  * @task: The task to add or remove
552  *
553  * If adding a task, if @self is defined, the task is only added if @self
554  * is also included in @pid_list. This happens on fork and tasks should
555  * only be added when the parent is listed. If @self is NULL, then the
556  * @task pid will be removed from the list, which would happen on exit
557  * of a task.
558  */
559 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
560                                   struct task_struct *self,
561                                   struct task_struct *task)
562 {
563         if (!pid_list)
564                 return;
565
566         /* For forks, we only add if the forking task is listed */
567         if (self) {
568                 if (!trace_find_filtered_pid(pid_list, self->pid))
569                         return;
570         }
571
572         /* Sorry, but we don't support pid_max changing after setting */
573         if (task->pid >= pid_list->pid_max)
574                 return;
575
576         /* "self" is set for forks, and NULL for exits */
577         if (self)
578                 set_bit(task->pid, pid_list->pids);
579         else
580                 clear_bit(task->pid, pid_list->pids);
581 }
582
583 /**
584  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
585  * @pid_list: The pid list to show
586  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
587  * @pos: The position of the file
588  *
589  * This is used by the seq_file "next" operation to iterate the pids
590  * listed in a trace_pid_list structure.
591  *
592  * Returns the pid+1 as we want to display pid of zero, but NULL would
593  * stop the iteration.
594  */
595 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
596 {
597         unsigned long pid = (unsigned long)v;
598
599         (*pos)++;
600
601         /* pid already is +1 of the actual prevous bit */
602         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
603
604         /* Return pid + 1 to allow zero to be represented */
605         if (pid < pid_list->pid_max)
606                 return (void *)(pid + 1);
607
608         return NULL;
609 }
610
611 /**
612  * trace_pid_start - Used for seq_file to start reading pid lists
613  * @pid_list: The pid list to show
614  * @pos: The position of the file
615  *
616  * This is used by seq_file "start" operation to start the iteration
617  * of listing pids.
618  *
619  * Returns the pid+1 as we want to display pid of zero, but NULL would
620  * stop the iteration.
621  */
622 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
623 {
624         unsigned long pid;
625         loff_t l = 0;
626
627         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
628         if (pid >= pid_list->pid_max)
629                 return NULL;
630
631         /* Return pid + 1 so that zero can be the exit value */
632         for (pid++; pid && l < *pos;
633              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
634                 ;
635         return (void *)pid;
636 }
637
638 /**
639  * trace_pid_show - show the current pid in seq_file processing
640  * @m: The seq_file structure to write into
641  * @v: A void pointer of the pid (+1) value to display
642  *
643  * Can be directly used by seq_file operations to display the current
644  * pid value.
645  */
646 int trace_pid_show(struct seq_file *m, void *v)
647 {
648         unsigned long pid = (unsigned long)v - 1;
649
650         seq_printf(m, "%lu\n", pid);
651         return 0;
652 }
653
654 /* 128 should be much more than enough */
655 #define PID_BUF_SIZE            127
656
657 int trace_pid_write(struct trace_pid_list *filtered_pids,
658                     struct trace_pid_list **new_pid_list,
659                     const char __user *ubuf, size_t cnt)
660 {
661         struct trace_pid_list *pid_list;
662         struct trace_parser parser;
663         unsigned long val;
664         int nr_pids = 0;
665         ssize_t read = 0;
666         ssize_t ret = 0;
667         loff_t pos;
668         pid_t pid;
669
670         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
671                 return -ENOMEM;
672
673         /*
674          * Always recreate a new array. The write is an all or nothing
675          * operation. Always create a new array when adding new pids by
676          * the user. If the operation fails, then the current list is
677          * not modified.
678          */
679         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
680         if (!pid_list) {
681                 trace_parser_put(&parser);
682                 return -ENOMEM;
683         }
684
685         pid_list->pid_max = READ_ONCE(pid_max);
686
687         /* Only truncating will shrink pid_max */
688         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
689                 pid_list->pid_max = filtered_pids->pid_max;
690
691         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
692         if (!pid_list->pids) {
693                 trace_parser_put(&parser);
694                 kfree(pid_list);
695                 return -ENOMEM;
696         }
697
698         if (filtered_pids) {
699                 /* copy the current bits to the new max */
700                 for_each_set_bit(pid, filtered_pids->pids,
701                                  filtered_pids->pid_max) {
702                         set_bit(pid, pid_list->pids);
703                         nr_pids++;
704                 }
705         }
706
707         while (cnt > 0) {
708
709                 pos = 0;
710
711                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
712                 if (ret < 0 || !trace_parser_loaded(&parser))
713                         break;
714
715                 read += ret;
716                 ubuf += ret;
717                 cnt -= ret;
718
719                 ret = -EINVAL;
720                 if (kstrtoul(parser.buffer, 0, &val))
721                         break;
722                 if (val >= pid_list->pid_max)
723                         break;
724
725                 pid = (pid_t)val;
726
727                 set_bit(pid, pid_list->pids);
728                 nr_pids++;
729
730                 trace_parser_clear(&parser);
731                 ret = 0;
732         }
733         trace_parser_put(&parser);
734
735         if (ret < 0) {
736                 trace_free_pid_list(pid_list);
737                 return ret;
738         }
739
740         if (!nr_pids) {
741                 /* Cleared the list of pids */
742                 trace_free_pid_list(pid_list);
743                 read = ret;
744                 pid_list = NULL;
745         }
746
747         *new_pid_list = pid_list;
748
749         return read;
750 }
751
752 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
753 {
754         u64 ts;
755
756         /* Early boot up does not have a buffer yet */
757         if (!buf->buffer)
758                 return trace_clock_local();
759
760         ts = ring_buffer_time_stamp(buf->buffer, cpu);
761         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
762
763         return ts;
764 }
765
766 u64 ftrace_now(int cpu)
767 {
768         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
769 }
770
771 /**
772  * tracing_is_enabled - Show if global_trace has been disabled
773  *
774  * Shows if the global trace has been enabled or not. It uses the
775  * mirror flag "buffer_disabled" to be used in fast paths such as for
776  * the irqsoff tracer. But it may be inaccurate due to races. If you
777  * need to know the accurate state, use tracing_is_on() which is a little
778  * slower, but accurate.
779  */
780 int tracing_is_enabled(void)
781 {
782         /*
783          * For quick access (irqsoff uses this in fast path), just
784          * return the mirror variable of the state of the ring buffer.
785          * It's a little racy, but we don't really care.
786          */
787         smp_rmb();
788         return !global_trace.buffer_disabled;
789 }
790
791 /*
792  * trace_buf_size is the size in bytes that is allocated
793  * for a buffer. Note, the number of bytes is always rounded
794  * to page size.
795  *
796  * This number is purposely set to a low number of 16384.
797  * If the dump on oops happens, it will be much appreciated
798  * to not have to wait for all that output. Anyway this can be
799  * boot time and run time configurable.
800  */
801 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
802
803 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
804
805 /* trace_types holds a link list of available tracers. */
806 static struct tracer            *trace_types __read_mostly;
807
808 /*
809  * trace_types_lock is used to protect the trace_types list.
810  */
811 DEFINE_MUTEX(trace_types_lock);
812
813 /*
814  * serialize the access of the ring buffer
815  *
816  * ring buffer serializes readers, but it is low level protection.
817  * The validity of the events (which returns by ring_buffer_peek() ..etc)
818  * are not protected by ring buffer.
819  *
820  * The content of events may become garbage if we allow other process consumes
821  * these events concurrently:
822  *   A) the page of the consumed events may become a normal page
823  *      (not reader page) in ring buffer, and this page will be rewrited
824  *      by events producer.
825  *   B) The page of the consumed events may become a page for splice_read,
826  *      and this page will be returned to system.
827  *
828  * These primitives allow multi process access to different cpu ring buffer
829  * concurrently.
830  *
831  * These primitives don't distinguish read-only and read-consume access.
832  * Multi read-only access are also serialized.
833  */
834
835 #ifdef CONFIG_SMP
836 static DECLARE_RWSEM(all_cpu_access_lock);
837 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
838
839 static inline void trace_access_lock(int cpu)
840 {
841         if (cpu == RING_BUFFER_ALL_CPUS) {
842                 /* gain it for accessing the whole ring buffer. */
843                 down_write(&all_cpu_access_lock);
844         } else {
845                 /* gain it for accessing a cpu ring buffer. */
846
847                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
848                 down_read(&all_cpu_access_lock);
849
850                 /* Secondly block other access to this @cpu ring buffer. */
851                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
852         }
853 }
854
855 static inline void trace_access_unlock(int cpu)
856 {
857         if (cpu == RING_BUFFER_ALL_CPUS) {
858                 up_write(&all_cpu_access_lock);
859         } else {
860                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
861                 up_read(&all_cpu_access_lock);
862         }
863 }
864
865 static inline void trace_access_lock_init(void)
866 {
867         int cpu;
868
869         for_each_possible_cpu(cpu)
870                 mutex_init(&per_cpu(cpu_access_lock, cpu));
871 }
872
873 #else
874
875 static DEFINE_MUTEX(access_lock);
876
877 static inline void trace_access_lock(int cpu)
878 {
879         (void)cpu;
880         mutex_lock(&access_lock);
881 }
882
883 static inline void trace_access_unlock(int cpu)
884 {
885         (void)cpu;
886         mutex_unlock(&access_lock);
887 }
888
889 static inline void trace_access_lock_init(void)
890 {
891 }
892
893 #endif
894
895 #ifdef CONFIG_STACKTRACE
896 static void __ftrace_trace_stack(struct trace_buffer *buffer,
897                                  unsigned long flags,
898                                  int skip, int pc, struct pt_regs *regs);
899 static inline void ftrace_trace_stack(struct trace_array *tr,
900                                       struct trace_buffer *buffer,
901                                       unsigned long flags,
902                                       int skip, int pc, struct pt_regs *regs);
903
904 #else
905 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
906                                         unsigned long flags,
907                                         int skip, int pc, struct pt_regs *regs)
908 {
909 }
910 static inline void ftrace_trace_stack(struct trace_array *tr,
911                                       struct trace_buffer *buffer,
912                                       unsigned long flags,
913                                       int skip, int pc, struct pt_regs *regs)
914 {
915 }
916
917 #endif
918
919 static __always_inline void
920 trace_event_setup(struct ring_buffer_event *event,
921                   int type, unsigned long flags, int pc)
922 {
923         struct trace_entry *ent = ring_buffer_event_data(event);
924
925         tracing_generic_entry_update(ent, type, flags, pc);
926 }
927
928 static __always_inline struct ring_buffer_event *
929 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
930                           int type,
931                           unsigned long len,
932                           unsigned long flags, int pc)
933 {
934         struct ring_buffer_event *event;
935
936         event = ring_buffer_lock_reserve(buffer, len);
937         if (event != NULL)
938                 trace_event_setup(event, type, flags, pc);
939
940         return event;
941 }
942
943 void tracer_tracing_on(struct trace_array *tr)
944 {
945         if (tr->array_buffer.buffer)
946                 ring_buffer_record_on(tr->array_buffer.buffer);
947         /*
948          * This flag is looked at when buffers haven't been allocated
949          * yet, or by some tracers (like irqsoff), that just want to
950          * know if the ring buffer has been disabled, but it can handle
951          * races of where it gets disabled but we still do a record.
952          * As the check is in the fast path of the tracers, it is more
953          * important to be fast than accurate.
954          */
955         tr->buffer_disabled = 0;
956         /* Make the flag seen by readers */
957         smp_wmb();
958 }
959
960 /**
961  * tracing_on - enable tracing buffers
962  *
963  * This function enables tracing buffers that may have been
964  * disabled with tracing_off.
965  */
966 void tracing_on(void)
967 {
968         tracer_tracing_on(&global_trace);
969 }
970 EXPORT_SYMBOL_GPL(tracing_on);
971
972
973 static __always_inline void
974 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
975 {
976         __this_cpu_write(trace_taskinfo_save, true);
977
978         /* If this is the temp buffer, we need to commit fully */
979         if (this_cpu_read(trace_buffered_event) == event) {
980                 /* Length is in event->array[0] */
981                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
982                 /* Release the temp buffer */
983                 this_cpu_dec(trace_buffered_event_cnt);
984         } else
985                 ring_buffer_unlock_commit(buffer, event);
986 }
987
988 /**
989  * __trace_puts - write a constant string into the trace buffer.
990  * @ip:    The address of the caller
991  * @str:   The constant string to write
992  * @size:  The size of the string.
993  */
994 int __trace_puts(unsigned long ip, const char *str, int size)
995 {
996         struct ring_buffer_event *event;
997         struct trace_buffer *buffer;
998         struct print_entry *entry;
999         unsigned long irq_flags;
1000         int alloc;
1001         int pc;
1002
1003         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1004                 return 0;
1005
1006         pc = preempt_count();
1007
1008         if (unlikely(tracing_selftest_running || tracing_disabled))
1009                 return 0;
1010
1011         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1012
1013         local_save_flags(irq_flags);
1014         buffer = global_trace.array_buffer.buffer;
1015         ring_buffer_nest_start(buffer);
1016         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1017                                             irq_flags, pc);
1018         if (!event) {
1019                 size = 0;
1020                 goto out;
1021         }
1022
1023         entry = ring_buffer_event_data(event);
1024         entry->ip = ip;
1025
1026         memcpy(&entry->buf, str, size);
1027
1028         /* Add a newline if necessary */
1029         if (entry->buf[size - 1] != '\n') {
1030                 entry->buf[size] = '\n';
1031                 entry->buf[size + 1] = '\0';
1032         } else
1033                 entry->buf[size] = '\0';
1034
1035         __buffer_unlock_commit(buffer, event);
1036         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1037  out:
1038         ring_buffer_nest_end(buffer);
1039         return size;
1040 }
1041 EXPORT_SYMBOL_GPL(__trace_puts);
1042
1043 /**
1044  * __trace_bputs - write the pointer to a constant string into trace buffer
1045  * @ip:    The address of the caller
1046  * @str:   The constant string to write to the buffer to
1047  */
1048 int __trace_bputs(unsigned long ip, const char *str)
1049 {
1050         struct ring_buffer_event *event;
1051         struct trace_buffer *buffer;
1052         struct bputs_entry *entry;
1053         unsigned long irq_flags;
1054         int size = sizeof(struct bputs_entry);
1055         int ret = 0;
1056         int pc;
1057
1058         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1059                 return 0;
1060
1061         pc = preempt_count();
1062
1063         if (unlikely(tracing_selftest_running || tracing_disabled))
1064                 return 0;
1065
1066         local_save_flags(irq_flags);
1067         buffer = global_trace.array_buffer.buffer;
1068
1069         ring_buffer_nest_start(buffer);
1070         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1071                                             irq_flags, pc);
1072         if (!event)
1073                 goto out;
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip                       = ip;
1077         entry->str                      = str;
1078
1079         __buffer_unlock_commit(buffer, event);
1080         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1081
1082         ret = 1;
1083  out:
1084         ring_buffer_nest_end(buffer);
1085         return ret;
1086 }
1087 EXPORT_SYMBOL_GPL(__trace_bputs);
1088
1089 #ifdef CONFIG_TRACER_SNAPSHOT
1090 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1091                                            void *cond_data)
1092 {
1093         struct tracer *tracer = tr->current_trace;
1094         unsigned long flags;
1095
1096         if (in_nmi()) {
1097                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1098                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1099                 return;
1100         }
1101
1102         if (!tr->allocated_snapshot) {
1103                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1104                 internal_trace_puts("*** stopping trace here!   ***\n");
1105                 tracing_off();
1106                 return;
1107         }
1108
1109         /* Note, snapshot can not be used when the tracer uses it */
1110         if (tracer->use_max_tr) {
1111                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1112                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1113                 return;
1114         }
1115
1116         local_irq_save(flags);
1117         update_max_tr(tr, current, smp_processor_id(), cond_data);
1118         local_irq_restore(flags);
1119 }
1120
1121 void tracing_snapshot_instance(struct trace_array *tr)
1122 {
1123         tracing_snapshot_instance_cond(tr, NULL);
1124 }
1125
1126 /**
1127  * tracing_snapshot - take a snapshot of the current buffer.
1128  *
1129  * This causes a swap between the snapshot buffer and the current live
1130  * tracing buffer. You can use this to take snapshots of the live
1131  * trace when some condition is triggered, but continue to trace.
1132  *
1133  * Note, make sure to allocate the snapshot with either
1134  * a tracing_snapshot_alloc(), or by doing it manually
1135  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1136  *
1137  * If the snapshot buffer is not allocated, it will stop tracing.
1138  * Basically making a permanent snapshot.
1139  */
1140 void tracing_snapshot(void)
1141 {
1142         struct trace_array *tr = &global_trace;
1143
1144         tracing_snapshot_instance(tr);
1145 }
1146 EXPORT_SYMBOL_GPL(tracing_snapshot);
1147
1148 /**
1149  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1150  * @tr:         The tracing instance to snapshot
1151  * @cond_data:  The data to be tested conditionally, and possibly saved
1152  *
1153  * This is the same as tracing_snapshot() except that the snapshot is
1154  * conditional - the snapshot will only happen if the
1155  * cond_snapshot.update() implementation receiving the cond_data
1156  * returns true, which means that the trace array's cond_snapshot
1157  * update() operation used the cond_data to determine whether the
1158  * snapshot should be taken, and if it was, presumably saved it along
1159  * with the snapshot.
1160  */
1161 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1162 {
1163         tracing_snapshot_instance_cond(tr, cond_data);
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1166
1167 /**
1168  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1169  * @tr:         The tracing instance
1170  *
1171  * When the user enables a conditional snapshot using
1172  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1173  * with the snapshot.  This accessor is used to retrieve it.
1174  *
1175  * Should not be called from cond_snapshot.update(), since it takes
1176  * the tr->max_lock lock, which the code calling
1177  * cond_snapshot.update() has already done.
1178  *
1179  * Returns the cond_data associated with the trace array's snapshot.
1180  */
1181 void *tracing_cond_snapshot_data(struct trace_array *tr)
1182 {
1183         void *cond_data = NULL;
1184
1185         arch_spin_lock(&tr->max_lock);
1186
1187         if (tr->cond_snapshot)
1188                 cond_data = tr->cond_snapshot->cond_data;
1189
1190         arch_spin_unlock(&tr->max_lock);
1191
1192         return cond_data;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195
1196 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1197                                         struct array_buffer *size_buf, int cpu_id);
1198 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1199
1200 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1201 {
1202         int ret;
1203
1204         if (!tr->allocated_snapshot) {
1205
1206                 /* allocate spare buffer */
1207                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1208                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1209                 if (ret < 0)
1210                         return ret;
1211
1212                 tr->allocated_snapshot = true;
1213         }
1214
1215         return 0;
1216 }
1217
1218 static void free_snapshot(struct trace_array *tr)
1219 {
1220         /*
1221          * We don't free the ring buffer. instead, resize it because
1222          * The max_tr ring buffer has some state (e.g. ring->clock) and
1223          * we want preserve it.
1224          */
1225         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1226         set_buffer_entries(&tr->max_buffer, 1);
1227         tracing_reset_online_cpus(&tr->max_buffer);
1228         tr->allocated_snapshot = false;
1229 }
1230
1231 /**
1232  * tracing_alloc_snapshot - allocate snapshot buffer.
1233  *
1234  * This only allocates the snapshot buffer if it isn't already
1235  * allocated - it doesn't also take a snapshot.
1236  *
1237  * This is meant to be used in cases where the snapshot buffer needs
1238  * to be set up for events that can't sleep but need to be able to
1239  * trigger a snapshot.
1240  */
1241 int tracing_alloc_snapshot(void)
1242 {
1243         struct trace_array *tr = &global_trace;
1244         int ret;
1245
1246         ret = tracing_alloc_snapshot_instance(tr);
1247         WARN_ON(ret < 0);
1248
1249         return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1252
1253 /**
1254  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1255  *
1256  * This is similar to tracing_snapshot(), but it will allocate the
1257  * snapshot buffer if it isn't already allocated. Use this only
1258  * where it is safe to sleep, as the allocation may sleep.
1259  *
1260  * This causes a swap between the snapshot buffer and the current live
1261  * tracing buffer. You can use this to take snapshots of the live
1262  * trace when some condition is triggered, but continue to trace.
1263  */
1264 void tracing_snapshot_alloc(void)
1265 {
1266         int ret;
1267
1268         ret = tracing_alloc_snapshot();
1269         if (ret < 0)
1270                 return;
1271
1272         tracing_snapshot();
1273 }
1274 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1275
1276 /**
1277  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1278  * @tr:         The tracing instance
1279  * @cond_data:  User data to associate with the snapshot
1280  * @update:     Implementation of the cond_snapshot update function
1281  *
1282  * Check whether the conditional snapshot for the given instance has
1283  * already been enabled, or if the current tracer is already using a
1284  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1285  * save the cond_data and update function inside.
1286  *
1287  * Returns 0 if successful, error otherwise.
1288  */
1289 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1290                                  cond_update_fn_t update)
1291 {
1292         struct cond_snapshot *cond_snapshot;
1293         int ret = 0;
1294
1295         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1296         if (!cond_snapshot)
1297                 return -ENOMEM;
1298
1299         cond_snapshot->cond_data = cond_data;
1300         cond_snapshot->update = update;
1301
1302         mutex_lock(&trace_types_lock);
1303
1304         ret = tracing_alloc_snapshot_instance(tr);
1305         if (ret)
1306                 goto fail_unlock;
1307
1308         if (tr->current_trace->use_max_tr) {
1309                 ret = -EBUSY;
1310                 goto fail_unlock;
1311         }
1312
1313         /*
1314          * The cond_snapshot can only change to NULL without the
1315          * trace_types_lock. We don't care if we race with it going
1316          * to NULL, but we want to make sure that it's not set to
1317          * something other than NULL when we get here, which we can
1318          * do safely with only holding the trace_types_lock and not
1319          * having to take the max_lock.
1320          */
1321         if (tr->cond_snapshot) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         arch_spin_lock(&tr->max_lock);
1327         tr->cond_snapshot = cond_snapshot;
1328         arch_spin_unlock(&tr->max_lock);
1329
1330         mutex_unlock(&trace_types_lock);
1331
1332         return ret;
1333
1334  fail_unlock:
1335         mutex_unlock(&trace_types_lock);
1336         kfree(cond_snapshot);
1337         return ret;
1338 }
1339 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1340
1341 /**
1342  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1343  * @tr:         The tracing instance
1344  *
1345  * Check whether the conditional snapshot for the given instance is
1346  * enabled; if so, free the cond_snapshot associated with it,
1347  * otherwise return -EINVAL.
1348  *
1349  * Returns 0 if successful, error otherwise.
1350  */
1351 int tracing_snapshot_cond_disable(struct trace_array *tr)
1352 {
1353         int ret = 0;
1354
1355         arch_spin_lock(&tr->max_lock);
1356
1357         if (!tr->cond_snapshot)
1358                 ret = -EINVAL;
1359         else {
1360                 kfree(tr->cond_snapshot);
1361                 tr->cond_snapshot = NULL;
1362         }
1363
1364         arch_spin_unlock(&tr->max_lock);
1365
1366         return ret;
1367 }
1368 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1369 #else
1370 void tracing_snapshot(void)
1371 {
1372         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1373 }
1374 EXPORT_SYMBOL_GPL(tracing_snapshot);
1375 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1376 {
1377         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1380 int tracing_alloc_snapshot(void)
1381 {
1382         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1383         return -ENODEV;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1386 void tracing_snapshot_alloc(void)
1387 {
1388         /* Give warning */
1389         tracing_snapshot();
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1392 void *tracing_cond_snapshot_data(struct trace_array *tr)
1393 {
1394         return NULL;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1397 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1398 {
1399         return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1402 int tracing_snapshot_cond_disable(struct trace_array *tr)
1403 {
1404         return false;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1407 #endif /* CONFIG_TRACER_SNAPSHOT */
1408
1409 void tracer_tracing_off(struct trace_array *tr)
1410 {
1411         if (tr->array_buffer.buffer)
1412                 ring_buffer_record_off(tr->array_buffer.buffer);
1413         /*
1414          * This flag is looked at when buffers haven't been allocated
1415          * yet, or by some tracers (like irqsoff), that just want to
1416          * know if the ring buffer has been disabled, but it can handle
1417          * races of where it gets disabled but we still do a record.
1418          * As the check is in the fast path of the tracers, it is more
1419          * important to be fast than accurate.
1420          */
1421         tr->buffer_disabled = 1;
1422         /* Make the flag seen by readers */
1423         smp_wmb();
1424 }
1425
1426 /**
1427  * tracing_off - turn off tracing buffers
1428  *
1429  * This function stops the tracing buffers from recording data.
1430  * It does not disable any overhead the tracers themselves may
1431  * be causing. This function simply causes all recording to
1432  * the ring buffers to fail.
1433  */
1434 void tracing_off(void)
1435 {
1436         tracer_tracing_off(&global_trace);
1437 }
1438 EXPORT_SYMBOL_GPL(tracing_off);
1439
1440 void disable_trace_on_warning(void)
1441 {
1442         if (__disable_trace_on_warning) {
1443                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1444                         "Disabling tracing due to warning\n");
1445                 tracing_off();
1446         }
1447 }
1448
1449 /**
1450  * tracer_tracing_is_on - show real state of ring buffer enabled
1451  * @tr : the trace array to know if ring buffer is enabled
1452  *
1453  * Shows real state of the ring buffer if it is enabled or not.
1454  */
1455 bool tracer_tracing_is_on(struct trace_array *tr)
1456 {
1457         if (tr->array_buffer.buffer)
1458                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1459         return !tr->buffer_disabled;
1460 }
1461
1462 /**
1463  * tracing_is_on - show state of ring buffers enabled
1464  */
1465 int tracing_is_on(void)
1466 {
1467         return tracer_tracing_is_on(&global_trace);
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_is_on);
1470
1471 static int __init set_buf_size(char *str)
1472 {
1473         unsigned long buf_size;
1474
1475         if (!str)
1476                 return 0;
1477         buf_size = memparse(str, &str);
1478         /* nr_entries can not be zero */
1479         if (buf_size == 0)
1480                 return 0;
1481         trace_buf_size = buf_size;
1482         return 1;
1483 }
1484 __setup("trace_buf_size=", set_buf_size);
1485
1486 static int __init set_tracing_thresh(char *str)
1487 {
1488         unsigned long threshold;
1489         int ret;
1490
1491         if (!str)
1492                 return 0;
1493         ret = kstrtoul(str, 0, &threshold);
1494         if (ret < 0)
1495                 return 0;
1496         tracing_thresh = threshold * 1000;
1497         return 1;
1498 }
1499 __setup("tracing_thresh=", set_tracing_thresh);
1500
1501 unsigned long nsecs_to_usecs(unsigned long nsecs)
1502 {
1503         return nsecs / 1000;
1504 }
1505
1506 /*
1507  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1508  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1509  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1510  * of strings in the order that the evals (enum) were defined.
1511  */
1512 #undef C
1513 #define C(a, b) b
1514
1515 /* These must match the bit postions in trace_iterator_flags */
1516 static const char *trace_options[] = {
1517         TRACE_FLAGS
1518         NULL
1519 };
1520
1521 static struct {
1522         u64 (*func)(void);
1523         const char *name;
1524         int in_ns;              /* is this clock in nanoseconds? */
1525 } trace_clocks[] = {
1526         { trace_clock_local,            "local",        1 },
1527         { trace_clock_global,           "global",       1 },
1528         { trace_clock_counter,          "counter",      0 },
1529         { trace_clock_jiffies,          "uptime",       0 },
1530         { trace_clock,                  "perf",         1 },
1531         { ktime_get_mono_fast_ns,       "mono",         1 },
1532         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1533         { ktime_get_boot_fast_ns,       "boot",         1 },
1534         ARCH_TRACE_CLOCKS
1535 };
1536
1537 bool trace_clock_in_ns(struct trace_array *tr)
1538 {
1539         if (trace_clocks[tr->clock_id].in_ns)
1540                 return true;
1541
1542         return false;
1543 }
1544
1545 /*
1546  * trace_parser_get_init - gets the buffer for trace parser
1547  */
1548 int trace_parser_get_init(struct trace_parser *parser, int size)
1549 {
1550         memset(parser, 0, sizeof(*parser));
1551
1552         parser->buffer = kmalloc(size, GFP_KERNEL);
1553         if (!parser->buffer)
1554                 return 1;
1555
1556         parser->size = size;
1557         return 0;
1558 }
1559
1560 /*
1561  * trace_parser_put - frees the buffer for trace parser
1562  */
1563 void trace_parser_put(struct trace_parser *parser)
1564 {
1565         kfree(parser->buffer);
1566         parser->buffer = NULL;
1567 }
1568
1569 /*
1570  * trace_get_user - reads the user input string separated by  space
1571  * (matched by isspace(ch))
1572  *
1573  * For each string found the 'struct trace_parser' is updated,
1574  * and the function returns.
1575  *
1576  * Returns number of bytes read.
1577  *
1578  * See kernel/trace/trace.h for 'struct trace_parser' details.
1579  */
1580 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1581         size_t cnt, loff_t *ppos)
1582 {
1583         char ch;
1584         size_t read = 0;
1585         ssize_t ret;
1586
1587         if (!*ppos)
1588                 trace_parser_clear(parser);
1589
1590         ret = get_user(ch, ubuf++);
1591         if (ret)
1592                 goto out;
1593
1594         read++;
1595         cnt--;
1596
1597         /*
1598          * The parser is not finished with the last write,
1599          * continue reading the user input without skipping spaces.
1600          */
1601         if (!parser->cont) {
1602                 /* skip white space */
1603                 while (cnt && isspace(ch)) {
1604                         ret = get_user(ch, ubuf++);
1605                         if (ret)
1606                                 goto out;
1607                         read++;
1608                         cnt--;
1609                 }
1610
1611                 parser->idx = 0;
1612
1613                 /* only spaces were written */
1614                 if (isspace(ch) || !ch) {
1615                         *ppos += read;
1616                         ret = read;
1617                         goto out;
1618                 }
1619         }
1620
1621         /* read the non-space input */
1622         while (cnt && !isspace(ch) && ch) {
1623                 if (parser->idx < parser->size - 1)
1624                         parser->buffer[parser->idx++] = ch;
1625                 else {
1626                         ret = -EINVAL;
1627                         goto out;
1628                 }
1629                 ret = get_user(ch, ubuf++);
1630                 if (ret)
1631                         goto out;
1632                 read++;
1633                 cnt--;
1634         }
1635
1636         /* We either got finished input or we have to wait for another call. */
1637         if (isspace(ch) || !ch) {
1638                 parser->buffer[parser->idx] = 0;
1639                 parser->cont = false;
1640         } else if (parser->idx < parser->size - 1) {
1641                 parser->cont = true;
1642                 parser->buffer[parser->idx++] = ch;
1643                 /* Make sure the parsed string always terminates with '\0'. */
1644                 parser->buffer[parser->idx] = 0;
1645         } else {
1646                 ret = -EINVAL;
1647                 goto out;
1648         }
1649
1650         *ppos += read;
1651         ret = read;
1652
1653 out:
1654         return ret;
1655 }
1656
1657 /* TODO add a seq_buf_to_buffer() */
1658 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1659 {
1660         int len;
1661
1662         if (trace_seq_used(s) <= s->seq.readpos)
1663                 return -EBUSY;
1664
1665         len = trace_seq_used(s) - s->seq.readpos;
1666         if (cnt > len)
1667                 cnt = len;
1668         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1669
1670         s->seq.readpos += cnt;
1671         return cnt;
1672 }
1673
1674 unsigned long __read_mostly     tracing_thresh;
1675 static const struct file_operations tracing_max_lat_fops;
1676
1677 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1678         defined(CONFIG_FSNOTIFY)
1679
1680 static struct workqueue_struct *fsnotify_wq;
1681
1682 static void latency_fsnotify_workfn(struct work_struct *work)
1683 {
1684         struct trace_array *tr = container_of(work, struct trace_array,
1685                                               fsnotify_work);
1686         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1687 }
1688
1689 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1690 {
1691         struct trace_array *tr = container_of(iwork, struct trace_array,
1692                                               fsnotify_irqwork);
1693         queue_work(fsnotify_wq, &tr->fsnotify_work);
1694 }
1695
1696 static void trace_create_maxlat_file(struct trace_array *tr,
1697                                      struct dentry *d_tracer)
1698 {
1699         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1700         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1701         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1702                                               d_tracer, &tr->max_latency,
1703                                               &tracing_max_lat_fops);
1704 }
1705
1706 __init static int latency_fsnotify_init(void)
1707 {
1708         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1709                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1710         if (!fsnotify_wq) {
1711                 pr_err("Unable to allocate tr_max_lat_wq\n");
1712                 return -ENOMEM;
1713         }
1714         return 0;
1715 }
1716
1717 late_initcall_sync(latency_fsnotify_init);
1718
1719 void latency_fsnotify(struct trace_array *tr)
1720 {
1721         if (!fsnotify_wq)
1722                 return;
1723         /*
1724          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1725          * possible that we are called from __schedule() or do_idle(), which
1726          * could cause a deadlock.
1727          */
1728         irq_work_queue(&tr->fsnotify_irqwork);
1729 }
1730
1731 /*
1732  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1733  *  defined(CONFIG_FSNOTIFY)
1734  */
1735 #else
1736
1737 #define trace_create_maxlat_file(tr, d_tracer)                          \
1738         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1739                           &tr->max_latency, &tracing_max_lat_fops)
1740
1741 #endif
1742
1743 #ifdef CONFIG_TRACER_MAX_TRACE
1744 /*
1745  * Copy the new maximum trace into the separate maximum-trace
1746  * structure. (this way the maximum trace is permanently saved,
1747  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1748  */
1749 static void
1750 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1751 {
1752         struct array_buffer *trace_buf = &tr->array_buffer;
1753         struct array_buffer *max_buf = &tr->max_buffer;
1754         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1755         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1756
1757         max_buf->cpu = cpu;
1758         max_buf->time_start = data->preempt_timestamp;
1759
1760         max_data->saved_latency = tr->max_latency;
1761         max_data->critical_start = data->critical_start;
1762         max_data->critical_end = data->critical_end;
1763
1764         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1765         max_data->pid = tsk->pid;
1766         /*
1767          * If tsk == current, then use current_uid(), as that does not use
1768          * RCU. The irq tracer can be called out of RCU scope.
1769          */
1770         if (tsk == current)
1771                 max_data->uid = current_uid();
1772         else
1773                 max_data->uid = task_uid(tsk);
1774
1775         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1776         max_data->policy = tsk->policy;
1777         max_data->rt_priority = tsk->rt_priority;
1778
1779         /* record this tasks comm */
1780         tracing_record_cmdline(tsk);
1781         latency_fsnotify(tr);
1782 }
1783
1784 /**
1785  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1786  * @tr: tracer
1787  * @tsk: the task with the latency
1788  * @cpu: The cpu that initiated the trace.
1789  * @cond_data: User data associated with a conditional snapshot
1790  *
1791  * Flip the buffers between the @tr and the max_tr and record information
1792  * about which task was the cause of this latency.
1793  */
1794 void
1795 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1796               void *cond_data)
1797 {
1798         if (tr->stop_count)
1799                 return;
1800
1801         WARN_ON_ONCE(!irqs_disabled());
1802
1803         if (!tr->allocated_snapshot) {
1804                 /* Only the nop tracer should hit this when disabling */
1805                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1806                 return;
1807         }
1808
1809         arch_spin_lock(&tr->max_lock);
1810
1811         /* Inherit the recordable setting from array_buffer */
1812         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1813                 ring_buffer_record_on(tr->max_buffer.buffer);
1814         else
1815                 ring_buffer_record_off(tr->max_buffer.buffer);
1816
1817 #ifdef CONFIG_TRACER_SNAPSHOT
1818         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1819                 goto out_unlock;
1820 #endif
1821         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1822
1823         __update_max_tr(tr, tsk, cpu);
1824
1825  out_unlock:
1826         arch_spin_unlock(&tr->max_lock);
1827 }
1828
1829 /**
1830  * update_max_tr_single - only copy one trace over, and reset the rest
1831  * @tr: tracer
1832  * @tsk: task with the latency
1833  * @cpu: the cpu of the buffer to copy.
1834  *
1835  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1836  */
1837 void
1838 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1839 {
1840         int ret;
1841
1842         if (tr->stop_count)
1843                 return;
1844
1845         WARN_ON_ONCE(!irqs_disabled());
1846         if (!tr->allocated_snapshot) {
1847                 /* Only the nop tracer should hit this when disabling */
1848                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1849                 return;
1850         }
1851
1852         arch_spin_lock(&tr->max_lock);
1853
1854         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1855
1856         if (ret == -EBUSY) {
1857                 /*
1858                  * We failed to swap the buffer due to a commit taking
1859                  * place on this CPU. We fail to record, but we reset
1860                  * the max trace buffer (no one writes directly to it)
1861                  * and flag that it failed.
1862                  */
1863                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1864                         "Failed to swap buffers due to commit in progress\n");
1865         }
1866
1867         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1868
1869         __update_max_tr(tr, tsk, cpu);
1870         arch_spin_unlock(&tr->max_lock);
1871 }
1872 #endif /* CONFIG_TRACER_MAX_TRACE */
1873
1874 static int wait_on_pipe(struct trace_iterator *iter, int full)
1875 {
1876         /* Iterators are static, they should be filled or empty */
1877         if (trace_buffer_iter(iter, iter->cpu_file))
1878                 return 0;
1879
1880         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1881                                 full);
1882 }
1883
1884 #ifdef CONFIG_FTRACE_STARTUP_TEST
1885 static bool selftests_can_run;
1886
1887 struct trace_selftests {
1888         struct list_head                list;
1889         struct tracer                   *type;
1890 };
1891
1892 static LIST_HEAD(postponed_selftests);
1893
1894 static int save_selftest(struct tracer *type)
1895 {
1896         struct trace_selftests *selftest;
1897
1898         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1899         if (!selftest)
1900                 return -ENOMEM;
1901
1902         selftest->type = type;
1903         list_add(&selftest->list, &postponed_selftests);
1904         return 0;
1905 }
1906
1907 static int run_tracer_selftest(struct tracer *type)
1908 {
1909         struct trace_array *tr = &global_trace;
1910         struct tracer *saved_tracer = tr->current_trace;
1911         int ret;
1912
1913         if (!type->selftest || tracing_selftest_disabled)
1914                 return 0;
1915
1916         /*
1917          * If a tracer registers early in boot up (before scheduling is
1918          * initialized and such), then do not run its selftests yet.
1919          * Instead, run it a little later in the boot process.
1920          */
1921         if (!selftests_can_run)
1922                 return save_selftest(type);
1923
1924         /*
1925          * Run a selftest on this tracer.
1926          * Here we reset the trace buffer, and set the current
1927          * tracer to be this tracer. The tracer can then run some
1928          * internal tracing to verify that everything is in order.
1929          * If we fail, we do not register this tracer.
1930          */
1931         tracing_reset_online_cpus(&tr->array_buffer);
1932
1933         tr->current_trace = type;
1934
1935 #ifdef CONFIG_TRACER_MAX_TRACE
1936         if (type->use_max_tr) {
1937                 /* If we expanded the buffers, make sure the max is expanded too */
1938                 if (ring_buffer_expanded)
1939                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1940                                            RING_BUFFER_ALL_CPUS);
1941                 tr->allocated_snapshot = true;
1942         }
1943 #endif
1944
1945         /* the test is responsible for initializing and enabling */
1946         pr_info("Testing tracer %s: ", type->name);
1947         ret = type->selftest(type, tr);
1948         /* the test is responsible for resetting too */
1949         tr->current_trace = saved_tracer;
1950         if (ret) {
1951                 printk(KERN_CONT "FAILED!\n");
1952                 /* Add the warning after printing 'FAILED' */
1953                 WARN_ON(1);
1954                 return -1;
1955         }
1956         /* Only reset on passing, to avoid touching corrupted buffers */
1957         tracing_reset_online_cpus(&tr->array_buffer);
1958
1959 #ifdef CONFIG_TRACER_MAX_TRACE
1960         if (type->use_max_tr) {
1961                 tr->allocated_snapshot = false;
1962
1963                 /* Shrink the max buffer again */
1964                 if (ring_buffer_expanded)
1965                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1966                                            RING_BUFFER_ALL_CPUS);
1967         }
1968 #endif
1969
1970         printk(KERN_CONT "PASSED\n");
1971         return 0;
1972 }
1973
1974 static __init int init_trace_selftests(void)
1975 {
1976         struct trace_selftests *p, *n;
1977         struct tracer *t, **last;
1978         int ret;
1979
1980         selftests_can_run = true;
1981
1982         mutex_lock(&trace_types_lock);
1983
1984         if (list_empty(&postponed_selftests))
1985                 goto out;
1986
1987         pr_info("Running postponed tracer tests:\n");
1988
1989         tracing_selftest_running = true;
1990         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1991                 /* This loop can take minutes when sanitizers are enabled, so
1992                  * lets make sure we allow RCU processing.
1993                  */
1994                 cond_resched();
1995                 ret = run_tracer_selftest(p->type);
1996                 /* If the test fails, then warn and remove from available_tracers */
1997                 if (ret < 0) {
1998                         WARN(1, "tracer: %s failed selftest, disabling\n",
1999                              p->type->name);
2000                         last = &trace_types;
2001                         for (t = trace_types; t; t = t->next) {
2002                                 if (t == p->type) {
2003                                         *last = t->next;
2004                                         break;
2005                                 }
2006                                 last = &t->next;
2007                         }
2008                 }
2009                 list_del(&p->list);
2010                 kfree(p);
2011         }
2012         tracing_selftest_running = false;
2013
2014  out:
2015         mutex_unlock(&trace_types_lock);
2016
2017         return 0;
2018 }
2019 core_initcall(init_trace_selftests);
2020 #else
2021 static inline int run_tracer_selftest(struct tracer *type)
2022 {
2023         return 0;
2024 }
2025 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2026
2027 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2028
2029 static void __init apply_trace_boot_options(void);
2030
2031 /**
2032  * register_tracer - register a tracer with the ftrace system.
2033  * @type: the plugin for the tracer
2034  *
2035  * Register a new plugin tracer.
2036  */
2037 int __init register_tracer(struct tracer *type)
2038 {
2039         struct tracer *t;
2040         int ret = 0;
2041
2042         if (!type->name) {
2043                 pr_info("Tracer must have a name\n");
2044                 return -1;
2045         }
2046
2047         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2048                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2049                 return -1;
2050         }
2051
2052         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2053                 pr_warn("Can not register tracer %s due to lockdown\n",
2054                            type->name);
2055                 return -EPERM;
2056         }
2057
2058         mutex_lock(&trace_types_lock);
2059
2060         tracing_selftest_running = true;
2061
2062         for (t = trace_types; t; t = t->next) {
2063                 if (strcmp(type->name, t->name) == 0) {
2064                         /* already found */
2065                         pr_info("Tracer %s already registered\n",
2066                                 type->name);
2067                         ret = -1;
2068                         goto out;
2069                 }
2070         }
2071
2072         if (!type->set_flag)
2073                 type->set_flag = &dummy_set_flag;
2074         if (!type->flags) {
2075                 /*allocate a dummy tracer_flags*/
2076                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2077                 if (!type->flags) {
2078                         ret = -ENOMEM;
2079                         goto out;
2080                 }
2081                 type->flags->val = 0;
2082                 type->flags->opts = dummy_tracer_opt;
2083         } else
2084                 if (!type->flags->opts)
2085                         type->flags->opts = dummy_tracer_opt;
2086
2087         /* store the tracer for __set_tracer_option */
2088         type->flags->trace = type;
2089
2090         ret = run_tracer_selftest(type);
2091         if (ret < 0)
2092                 goto out;
2093
2094         type->next = trace_types;
2095         trace_types = type;
2096         add_tracer_options(&global_trace, type);
2097
2098  out:
2099         tracing_selftest_running = false;
2100         mutex_unlock(&trace_types_lock);
2101
2102         if (ret || !default_bootup_tracer)
2103                 goto out_unlock;
2104
2105         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2106                 goto out_unlock;
2107
2108         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2109         /* Do we want this tracer to start on bootup? */
2110         tracing_set_tracer(&global_trace, type->name);
2111         default_bootup_tracer = NULL;
2112
2113         apply_trace_boot_options();
2114
2115         /* disable other selftests, since this will break it. */
2116         tracing_selftest_disabled = true;
2117 #ifdef CONFIG_FTRACE_STARTUP_TEST
2118         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2119                type->name);
2120 #endif
2121
2122  out_unlock:
2123         return ret;
2124 }
2125
2126 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2127 {
2128         struct trace_buffer *buffer = buf->buffer;
2129
2130         if (!buffer)
2131                 return;
2132
2133         ring_buffer_record_disable(buffer);
2134
2135         /* Make sure all commits have finished */
2136         synchronize_rcu();
2137         ring_buffer_reset_cpu(buffer, cpu);
2138
2139         ring_buffer_record_enable(buffer);
2140 }
2141
2142 void tracing_reset_online_cpus(struct array_buffer *buf)
2143 {
2144         struct trace_buffer *buffer = buf->buffer;
2145
2146         if (!buffer)
2147                 return;
2148
2149         ring_buffer_record_disable(buffer);
2150
2151         /* Make sure all commits have finished */
2152         synchronize_rcu();
2153
2154         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2155
2156         ring_buffer_reset_online_cpus(buffer);
2157
2158         ring_buffer_record_enable(buffer);
2159 }
2160
2161 /* Must have trace_types_lock held */
2162 void tracing_reset_all_online_cpus(void)
2163 {
2164         struct trace_array *tr;
2165
2166         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2167                 if (!tr->clear_trace)
2168                         continue;
2169                 tr->clear_trace = false;
2170                 tracing_reset_online_cpus(&tr->array_buffer);
2171 #ifdef CONFIG_TRACER_MAX_TRACE
2172                 tracing_reset_online_cpus(&tr->max_buffer);
2173 #endif
2174         }
2175 }
2176
2177 static int *tgid_map;
2178
2179 #define SAVED_CMDLINES_DEFAULT 128
2180 #define NO_CMDLINE_MAP UINT_MAX
2181 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2182 struct saved_cmdlines_buffer {
2183         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2184         unsigned *map_cmdline_to_pid;
2185         unsigned cmdline_num;
2186         int cmdline_idx;
2187         char *saved_cmdlines;
2188 };
2189 static struct saved_cmdlines_buffer *savedcmd;
2190
2191 /* temporary disable recording */
2192 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2193
2194 static inline char *get_saved_cmdlines(int idx)
2195 {
2196         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2197 }
2198
2199 static inline void set_cmdline(int idx, const char *cmdline)
2200 {
2201         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2202 }
2203
2204 static int allocate_cmdlines_buffer(unsigned int val,
2205                                     struct saved_cmdlines_buffer *s)
2206 {
2207         s->map_cmdline_to_pid = kmalloc_array(val,
2208                                               sizeof(*s->map_cmdline_to_pid),
2209                                               GFP_KERNEL);
2210         if (!s->map_cmdline_to_pid)
2211                 return -ENOMEM;
2212
2213         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2214         if (!s->saved_cmdlines) {
2215                 kfree(s->map_cmdline_to_pid);
2216                 return -ENOMEM;
2217         }
2218
2219         s->cmdline_idx = 0;
2220         s->cmdline_num = val;
2221         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2222                sizeof(s->map_pid_to_cmdline));
2223         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2224                val * sizeof(*s->map_cmdline_to_pid));
2225
2226         return 0;
2227 }
2228
2229 static int trace_create_savedcmd(void)
2230 {
2231         int ret;
2232
2233         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2234         if (!savedcmd)
2235                 return -ENOMEM;
2236
2237         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2238         if (ret < 0) {
2239                 kfree(savedcmd);
2240                 savedcmd = NULL;
2241                 return -ENOMEM;
2242         }
2243
2244         return 0;
2245 }
2246
2247 int is_tracing_stopped(void)
2248 {
2249         return global_trace.stop_count;
2250 }
2251
2252 /**
2253  * tracing_start - quick start of the tracer
2254  *
2255  * If tracing is enabled but was stopped by tracing_stop,
2256  * this will start the tracer back up.
2257  */
2258 void tracing_start(void)
2259 {
2260         struct trace_buffer *buffer;
2261         unsigned long flags;
2262
2263         if (tracing_disabled)
2264                 return;
2265
2266         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2267         if (--global_trace.stop_count) {
2268                 if (global_trace.stop_count < 0) {
2269                         /* Someone screwed up their debugging */
2270                         WARN_ON_ONCE(1);
2271                         global_trace.stop_count = 0;
2272                 }
2273                 goto out;
2274         }
2275
2276         /* Prevent the buffers from switching */
2277         arch_spin_lock(&global_trace.max_lock);
2278
2279         buffer = global_trace.array_buffer.buffer;
2280         if (buffer)
2281                 ring_buffer_record_enable(buffer);
2282
2283 #ifdef CONFIG_TRACER_MAX_TRACE
2284         buffer = global_trace.max_buffer.buffer;
2285         if (buffer)
2286                 ring_buffer_record_enable(buffer);
2287 #endif
2288
2289         arch_spin_unlock(&global_trace.max_lock);
2290
2291  out:
2292         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2293 }
2294
2295 static void tracing_start_tr(struct trace_array *tr)
2296 {
2297         struct trace_buffer *buffer;
2298         unsigned long flags;
2299
2300         if (tracing_disabled)
2301                 return;
2302
2303         /* If global, we need to also start the max tracer */
2304         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2305                 return tracing_start();
2306
2307         raw_spin_lock_irqsave(&tr->start_lock, flags);
2308
2309         if (--tr->stop_count) {
2310                 if (tr->stop_count < 0) {
2311                         /* Someone screwed up their debugging */
2312                         WARN_ON_ONCE(1);
2313                         tr->stop_count = 0;
2314                 }
2315                 goto out;
2316         }
2317
2318         buffer = tr->array_buffer.buffer;
2319         if (buffer)
2320                 ring_buffer_record_enable(buffer);
2321
2322  out:
2323         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2324 }
2325
2326 /**
2327  * tracing_stop - quick stop of the tracer
2328  *
2329  * Light weight way to stop tracing. Use in conjunction with
2330  * tracing_start.
2331  */
2332 void tracing_stop(void)
2333 {
2334         struct trace_buffer *buffer;
2335         unsigned long flags;
2336
2337         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2338         if (global_trace.stop_count++)
2339                 goto out;
2340
2341         /* Prevent the buffers from switching */
2342         arch_spin_lock(&global_trace.max_lock);
2343
2344         buffer = global_trace.array_buffer.buffer;
2345         if (buffer)
2346                 ring_buffer_record_disable(buffer);
2347
2348 #ifdef CONFIG_TRACER_MAX_TRACE
2349         buffer = global_trace.max_buffer.buffer;
2350         if (buffer)
2351                 ring_buffer_record_disable(buffer);
2352 #endif
2353
2354         arch_spin_unlock(&global_trace.max_lock);
2355
2356  out:
2357         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2358 }
2359
2360 static void tracing_stop_tr(struct trace_array *tr)
2361 {
2362         struct trace_buffer *buffer;
2363         unsigned long flags;
2364
2365         /* If global, we need to also stop the max tracer */
2366         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2367                 return tracing_stop();
2368
2369         raw_spin_lock_irqsave(&tr->start_lock, flags);
2370         if (tr->stop_count++)
2371                 goto out;
2372
2373         buffer = tr->array_buffer.buffer;
2374         if (buffer)
2375                 ring_buffer_record_disable(buffer);
2376
2377  out:
2378         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2379 }
2380
2381 static int trace_save_cmdline(struct task_struct *tsk)
2382 {
2383         unsigned pid, idx;
2384
2385         /* treat recording of idle task as a success */
2386         if (!tsk->pid)
2387                 return 1;
2388
2389         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2390                 return 0;
2391
2392         /*
2393          * It's not the end of the world if we don't get
2394          * the lock, but we also don't want to spin
2395          * nor do we want to disable interrupts,
2396          * so if we miss here, then better luck next time.
2397          */
2398         if (!arch_spin_trylock(&trace_cmdline_lock))
2399                 return 0;
2400
2401         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2402         if (idx == NO_CMDLINE_MAP) {
2403                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2404
2405                 /*
2406                  * Check whether the cmdline buffer at idx has a pid
2407                  * mapped. We are going to overwrite that entry so we
2408                  * need to clear the map_pid_to_cmdline. Otherwise we
2409                  * would read the new comm for the old pid.
2410                  */
2411                 pid = savedcmd->map_cmdline_to_pid[idx];
2412                 if (pid != NO_CMDLINE_MAP)
2413                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2414
2415                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2417
2418                 savedcmd->cmdline_idx = idx;
2419         }
2420
2421         set_cmdline(idx, tsk->comm);
2422
2423         arch_spin_unlock(&trace_cmdline_lock);
2424
2425         return 1;
2426 }
2427
2428 static void __trace_find_cmdline(int pid, char comm[])
2429 {
2430         unsigned map;
2431
2432         if (!pid) {
2433                 strcpy(comm, "<idle>");
2434                 return;
2435         }
2436
2437         if (WARN_ON_ONCE(pid < 0)) {
2438                 strcpy(comm, "<XXX>");
2439                 return;
2440         }
2441
2442         if (pid > PID_MAX_DEFAULT) {
2443                 strcpy(comm, "<...>");
2444                 return;
2445         }
2446
2447         map = savedcmd->map_pid_to_cmdline[pid];
2448         if (map != NO_CMDLINE_MAP)
2449                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2450         else
2451                 strcpy(comm, "<...>");
2452 }
2453
2454 void trace_find_cmdline(int pid, char comm[])
2455 {
2456         preempt_disable();
2457         arch_spin_lock(&trace_cmdline_lock);
2458
2459         __trace_find_cmdline(pid, comm);
2460
2461         arch_spin_unlock(&trace_cmdline_lock);
2462         preempt_enable();
2463 }
2464
2465 int trace_find_tgid(int pid)
2466 {
2467         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2468                 return 0;
2469
2470         return tgid_map[pid];
2471 }
2472
2473 static int trace_save_tgid(struct task_struct *tsk)
2474 {
2475         /* treat recording of idle task as a success */
2476         if (!tsk->pid)
2477                 return 1;
2478
2479         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2480                 return 0;
2481
2482         tgid_map[tsk->pid] = tsk->tgid;
2483         return 1;
2484 }
2485
2486 static bool tracing_record_taskinfo_skip(int flags)
2487 {
2488         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2489                 return true;
2490         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2491                 return true;
2492         if (!__this_cpu_read(trace_taskinfo_save))
2493                 return true;
2494         return false;
2495 }
2496
2497 /**
2498  * tracing_record_taskinfo - record the task info of a task
2499  *
2500  * @task:  task to record
2501  * @flags: TRACE_RECORD_CMDLINE for recording comm
2502  *         TRACE_RECORD_TGID for recording tgid
2503  */
2504 void tracing_record_taskinfo(struct task_struct *task, int flags)
2505 {
2506         bool done;
2507
2508         if (tracing_record_taskinfo_skip(flags))
2509                 return;
2510
2511         /*
2512          * Record as much task information as possible. If some fail, continue
2513          * to try to record the others.
2514          */
2515         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2516         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2517
2518         /* If recording any information failed, retry again soon. */
2519         if (!done)
2520                 return;
2521
2522         __this_cpu_write(trace_taskinfo_save, false);
2523 }
2524
2525 /**
2526  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2527  *
2528  * @prev: previous task during sched_switch
2529  * @next: next task during sched_switch
2530  * @flags: TRACE_RECORD_CMDLINE for recording comm
2531  *         TRACE_RECORD_TGID for recording tgid
2532  */
2533 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2534                                           struct task_struct *next, int flags)
2535 {
2536         bool done;
2537
2538         if (tracing_record_taskinfo_skip(flags))
2539                 return;
2540
2541         /*
2542          * Record as much task information as possible. If some fail, continue
2543          * to try to record the others.
2544          */
2545         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2546         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2548         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2549
2550         /* If recording any information failed, retry again soon. */
2551         if (!done)
2552                 return;
2553
2554         __this_cpu_write(trace_taskinfo_save, false);
2555 }
2556
2557 /* Helpers to record a specific task information */
2558 void tracing_record_cmdline(struct task_struct *task)
2559 {
2560         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2561 }
2562
2563 void tracing_record_tgid(struct task_struct *task)
2564 {
2565         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2566 }
2567
2568 /*
2569  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2570  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2571  * simplifies those functions and keeps them in sync.
2572  */
2573 enum print_line_t trace_handle_return(struct trace_seq *s)
2574 {
2575         return trace_seq_has_overflowed(s) ?
2576                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2577 }
2578 EXPORT_SYMBOL_GPL(trace_handle_return);
2579
2580 void
2581 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2582                              unsigned long flags, int pc)
2583 {
2584         struct task_struct *tsk = current;
2585
2586         entry->preempt_count            = pc & 0xff;
2587         entry->pid                      = (tsk) ? tsk->pid : 0;
2588         entry->type                     = type;
2589         entry->flags =
2590 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2591                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2592 #else
2593                 TRACE_FLAG_IRQS_NOSUPPORT |
2594 #endif
2595                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2596                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2597                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2598                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2599                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2600 }
2601 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2602
2603 struct ring_buffer_event *
2604 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2605                           int type,
2606                           unsigned long len,
2607                           unsigned long flags, int pc)
2608 {
2609         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2610 }
2611
2612 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2613 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2614 static int trace_buffered_event_ref;
2615
2616 /**
2617  * trace_buffered_event_enable - enable buffering events
2618  *
2619  * When events are being filtered, it is quicker to use a temporary
2620  * buffer to write the event data into if there's a likely chance
2621  * that it will not be committed. The discard of the ring buffer
2622  * is not as fast as committing, and is much slower than copying
2623  * a commit.
2624  *
2625  * When an event is to be filtered, allocate per cpu buffers to
2626  * write the event data into, and if the event is filtered and discarded
2627  * it is simply dropped, otherwise, the entire data is to be committed
2628  * in one shot.
2629  */
2630 void trace_buffered_event_enable(void)
2631 {
2632         struct ring_buffer_event *event;
2633         struct page *page;
2634         int cpu;
2635
2636         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2637
2638         if (trace_buffered_event_ref++)
2639                 return;
2640
2641         for_each_tracing_cpu(cpu) {
2642                 page = alloc_pages_node(cpu_to_node(cpu),
2643                                         GFP_KERNEL | __GFP_NORETRY, 0);
2644                 if (!page)
2645                         goto failed;
2646
2647                 event = page_address(page);
2648                 memset(event, 0, sizeof(*event));
2649
2650                 per_cpu(trace_buffered_event, cpu) = event;
2651
2652                 preempt_disable();
2653                 if (cpu == smp_processor_id() &&
2654                     __this_cpu_read(trace_buffered_event) !=
2655                     per_cpu(trace_buffered_event, cpu))
2656                         WARN_ON_ONCE(1);
2657                 preempt_enable();
2658         }
2659
2660         return;
2661  failed:
2662         trace_buffered_event_disable();
2663 }
2664
2665 static void enable_trace_buffered_event(void *data)
2666 {
2667         /* Probably not needed, but do it anyway */
2668         smp_rmb();
2669         this_cpu_dec(trace_buffered_event_cnt);
2670 }
2671
2672 static void disable_trace_buffered_event(void *data)
2673 {
2674         this_cpu_inc(trace_buffered_event_cnt);
2675 }
2676
2677 /**
2678  * trace_buffered_event_disable - disable buffering events
2679  *
2680  * When a filter is removed, it is faster to not use the buffered
2681  * events, and to commit directly into the ring buffer. Free up
2682  * the temp buffers when there are no more users. This requires
2683  * special synchronization with current events.
2684  */
2685 void trace_buffered_event_disable(void)
2686 {
2687         int cpu;
2688
2689         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2690
2691         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2692                 return;
2693
2694         if (--trace_buffered_event_ref)
2695                 return;
2696
2697         preempt_disable();
2698         /* For each CPU, set the buffer as used. */
2699         smp_call_function_many(tracing_buffer_mask,
2700                                disable_trace_buffered_event, NULL, 1);
2701         preempt_enable();
2702
2703         /* Wait for all current users to finish */
2704         synchronize_rcu();
2705
2706         for_each_tracing_cpu(cpu) {
2707                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2708                 per_cpu(trace_buffered_event, cpu) = NULL;
2709         }
2710         /*
2711          * Make sure trace_buffered_event is NULL before clearing
2712          * trace_buffered_event_cnt.
2713          */
2714         smp_wmb();
2715
2716         preempt_disable();
2717         /* Do the work on each cpu */
2718         smp_call_function_many(tracing_buffer_mask,
2719                                enable_trace_buffered_event, NULL, 1);
2720         preempt_enable();
2721 }
2722
2723 static struct trace_buffer *temp_buffer;
2724
2725 struct ring_buffer_event *
2726 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2727                           struct trace_event_file *trace_file,
2728                           int type, unsigned long len,
2729                           unsigned long flags, int pc)
2730 {
2731         struct ring_buffer_event *entry;
2732         int val;
2733
2734         *current_rb = trace_file->tr->array_buffer.buffer;
2735
2736         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2737              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2738             (entry = this_cpu_read(trace_buffered_event))) {
2739                 /* Try to use the per cpu buffer first */
2740                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2741                 if (val == 1) {
2742                         trace_event_setup(entry, type, flags, pc);
2743                         entry->array[0] = len;
2744                         return entry;
2745                 }
2746                 this_cpu_dec(trace_buffered_event_cnt);
2747         }
2748
2749         entry = __trace_buffer_lock_reserve(*current_rb,
2750                                             type, len, flags, pc);
2751         /*
2752          * If tracing is off, but we have triggers enabled
2753          * we still need to look at the event data. Use the temp_buffer
2754          * to store the trace event for the trigger to use. It's recursive
2755          * safe and will not be recorded anywhere.
2756          */
2757         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2758                 *current_rb = temp_buffer;
2759                 entry = __trace_buffer_lock_reserve(*current_rb,
2760                                                     type, len, flags, pc);
2761         }
2762         return entry;
2763 }
2764 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2765
2766 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2767 static DEFINE_MUTEX(tracepoint_printk_mutex);
2768
2769 static void output_printk(struct trace_event_buffer *fbuffer)
2770 {
2771         struct trace_event_call *event_call;
2772         struct trace_event_file *file;
2773         struct trace_event *event;
2774         unsigned long flags;
2775         struct trace_iterator *iter = tracepoint_print_iter;
2776
2777         /* We should never get here if iter is NULL */
2778         if (WARN_ON_ONCE(!iter))
2779                 return;
2780
2781         event_call = fbuffer->trace_file->event_call;
2782         if (!event_call || !event_call->event.funcs ||
2783             !event_call->event.funcs->trace)
2784                 return;
2785
2786         file = fbuffer->trace_file;
2787         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2788             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2789              !filter_match_preds(file->filter, fbuffer->entry)))
2790                 return;
2791
2792         event = &fbuffer->trace_file->event_call->event;
2793
2794         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2795         trace_seq_init(&iter->seq);
2796         iter->ent = fbuffer->entry;
2797         event_call->event.funcs->trace(iter, 0, event);
2798         trace_seq_putc(&iter->seq, 0);
2799         printk("%s", iter->seq.buffer);
2800
2801         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2802 }
2803
2804 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2805                              void *buffer, size_t *lenp,
2806                              loff_t *ppos)
2807 {
2808         int save_tracepoint_printk;
2809         int ret;
2810
2811         mutex_lock(&tracepoint_printk_mutex);
2812         save_tracepoint_printk = tracepoint_printk;
2813
2814         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2815
2816         /*
2817          * This will force exiting early, as tracepoint_printk
2818          * is always zero when tracepoint_printk_iter is not allocated
2819          */
2820         if (!tracepoint_print_iter)
2821                 tracepoint_printk = 0;
2822
2823         if (save_tracepoint_printk == tracepoint_printk)
2824                 goto out;
2825
2826         if (tracepoint_printk)
2827                 static_key_enable(&tracepoint_printk_key.key);
2828         else
2829                 static_key_disable(&tracepoint_printk_key.key);
2830
2831  out:
2832         mutex_unlock(&tracepoint_printk_mutex);
2833
2834         return ret;
2835 }
2836
2837 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2838 {
2839         if (static_key_false(&tracepoint_printk_key.key))
2840                 output_printk(fbuffer);
2841
2842         if (static_branch_unlikely(&trace_event_exports_enabled))
2843                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2844         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2845                                     fbuffer->event, fbuffer->entry,
2846                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2847 }
2848 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2849
2850 /*
2851  * Skip 3:
2852  *
2853  *   trace_buffer_unlock_commit_regs()
2854  *   trace_event_buffer_commit()
2855  *   trace_event_raw_event_xxx()
2856  */
2857 # define STACK_SKIP 3
2858
2859 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2860                                      struct trace_buffer *buffer,
2861                                      struct ring_buffer_event *event,
2862                                      unsigned long flags, int pc,
2863                                      struct pt_regs *regs)
2864 {
2865         __buffer_unlock_commit(buffer, event);
2866
2867         /*
2868          * If regs is not set, then skip the necessary functions.
2869          * Note, we can still get here via blktrace, wakeup tracer
2870          * and mmiotrace, but that's ok if they lose a function or
2871          * two. They are not that meaningful.
2872          */
2873         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2874         ftrace_trace_userstack(tr, buffer, flags, pc);
2875 }
2876
2877 /*
2878  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2879  */
2880 void
2881 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2882                                    struct ring_buffer_event *event)
2883 {
2884         __buffer_unlock_commit(buffer, event);
2885 }
2886
2887 void
2888 trace_function(struct trace_array *tr,
2889                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2890                int pc)
2891 {
2892         struct trace_event_call *call = &event_function;
2893         struct trace_buffer *buffer = tr->array_buffer.buffer;
2894         struct ring_buffer_event *event;
2895         struct ftrace_entry *entry;
2896
2897         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2898                                             flags, pc);
2899         if (!event)
2900                 return;
2901         entry   = ring_buffer_event_data(event);
2902         entry->ip                       = ip;
2903         entry->parent_ip                = parent_ip;
2904
2905         if (!call_filter_check_discard(call, entry, buffer, event)) {
2906                 if (static_branch_unlikely(&trace_function_exports_enabled))
2907                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2908                 __buffer_unlock_commit(buffer, event);
2909         }
2910 }
2911
2912 #ifdef CONFIG_STACKTRACE
2913
2914 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2915 #define FTRACE_KSTACK_NESTING   4
2916
2917 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2918
2919 struct ftrace_stack {
2920         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2921 };
2922
2923
2924 struct ftrace_stacks {
2925         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2926 };
2927
2928 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2929 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2930
2931 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2932                                  unsigned long flags,
2933                                  int skip, int pc, struct pt_regs *regs)
2934 {
2935         struct trace_event_call *call = &event_kernel_stack;
2936         struct ring_buffer_event *event;
2937         unsigned int size, nr_entries;
2938         struct ftrace_stack *fstack;
2939         struct stack_entry *entry;
2940         int stackidx;
2941
2942         /*
2943          * Add one, for this function and the call to save_stack_trace()
2944          * If regs is set, then these functions will not be in the way.
2945          */
2946 #ifndef CONFIG_UNWINDER_ORC
2947         if (!regs)
2948                 skip++;
2949 #endif
2950
2951         preempt_disable_notrace();
2952
2953         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2954
2955         /* This should never happen. If it does, yell once and skip */
2956         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2957                 goto out;
2958
2959         /*
2960          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2961          * interrupt will either see the value pre increment or post
2962          * increment. If the interrupt happens pre increment it will have
2963          * restored the counter when it returns.  We just need a barrier to
2964          * keep gcc from moving things around.
2965          */
2966         barrier();
2967
2968         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2969         size = ARRAY_SIZE(fstack->calls);
2970
2971         if (regs) {
2972                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2973                                                    size, skip);
2974         } else {
2975                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2976         }
2977
2978         size = nr_entries * sizeof(unsigned long);
2979         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2980                                             sizeof(*entry) + size, flags, pc);
2981         if (!event)
2982                 goto out;
2983         entry = ring_buffer_event_data(event);
2984
2985         memcpy(&entry->caller, fstack->calls, size);
2986         entry->size = nr_entries;
2987
2988         if (!call_filter_check_discard(call, entry, buffer, event))
2989                 __buffer_unlock_commit(buffer, event);
2990
2991  out:
2992         /* Again, don't let gcc optimize things here */
2993         barrier();
2994         __this_cpu_dec(ftrace_stack_reserve);
2995         preempt_enable_notrace();
2996
2997 }
2998
2999 static inline void ftrace_trace_stack(struct trace_array *tr,
3000                                       struct trace_buffer *buffer,
3001                                       unsigned long flags,
3002                                       int skip, int pc, struct pt_regs *regs)
3003 {
3004         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3005                 return;
3006
3007         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3008 }
3009
3010 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3011                    int pc)
3012 {
3013         struct trace_buffer *buffer = tr->array_buffer.buffer;
3014
3015         if (rcu_is_watching()) {
3016                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3017                 return;
3018         }
3019
3020         /*
3021          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3022          * but if the above rcu_is_watching() failed, then the NMI
3023          * triggered someplace critical, and rcu_irq_enter() should
3024          * not be called from NMI.
3025          */
3026         if (unlikely(in_nmi()))
3027                 return;
3028
3029         rcu_irq_enter_irqson();
3030         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3031         rcu_irq_exit_irqson();
3032 }
3033
3034 /**
3035  * trace_dump_stack - record a stack back trace in the trace buffer
3036  * @skip: Number of functions to skip (helper handlers)
3037  */
3038 void trace_dump_stack(int skip)
3039 {
3040         unsigned long flags;
3041
3042         if (tracing_disabled || tracing_selftest_running)
3043                 return;
3044
3045         local_save_flags(flags);
3046
3047 #ifndef CONFIG_UNWINDER_ORC
3048         /* Skip 1 to skip this function. */
3049         skip++;
3050 #endif
3051         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3052                              flags, skip, preempt_count(), NULL);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_dump_stack);
3055
3056 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3057 static DEFINE_PER_CPU(int, user_stack_count);
3058
3059 static void
3060 ftrace_trace_userstack(struct trace_array *tr,
3061                        struct trace_buffer *buffer, unsigned long flags, int pc)
3062 {
3063         struct trace_event_call *call = &event_user_stack;
3064         struct ring_buffer_event *event;
3065         struct userstack_entry *entry;
3066
3067         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3068                 return;
3069
3070         /*
3071          * NMIs can not handle page faults, even with fix ups.
3072          * The save user stack can (and often does) fault.
3073          */
3074         if (unlikely(in_nmi()))
3075                 return;
3076
3077         /*
3078          * prevent recursion, since the user stack tracing may
3079          * trigger other kernel events.
3080          */
3081         preempt_disable();
3082         if (__this_cpu_read(user_stack_count))
3083                 goto out;
3084
3085         __this_cpu_inc(user_stack_count);
3086
3087         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3088                                             sizeof(*entry), flags, pc);
3089         if (!event)
3090                 goto out_drop_count;
3091         entry   = ring_buffer_event_data(event);
3092
3093         entry->tgid             = current->tgid;
3094         memset(&entry->caller, 0, sizeof(entry->caller));
3095
3096         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3097         if (!call_filter_check_discard(call, entry, buffer, event))
3098                 __buffer_unlock_commit(buffer, event);
3099
3100  out_drop_count:
3101         __this_cpu_dec(user_stack_count);
3102  out:
3103         preempt_enable();
3104 }
3105 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3106 static void ftrace_trace_userstack(struct trace_array *tr,
3107                                    struct trace_buffer *buffer,
3108                                    unsigned long flags, int pc)
3109 {
3110 }
3111 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3112
3113 #endif /* CONFIG_STACKTRACE */
3114
3115 /* created for use with alloc_percpu */
3116 struct trace_buffer_struct {
3117         int nesting;
3118         char buffer[4][TRACE_BUF_SIZE];
3119 };
3120
3121 static struct trace_buffer_struct *trace_percpu_buffer;
3122
3123 /*
3124  * Thise allows for lockless recording.  If we're nested too deeply, then
3125  * this returns NULL.
3126  */
3127 static char *get_trace_buf(void)
3128 {
3129         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3130
3131         if (!buffer || buffer->nesting >= 4)
3132                 return NULL;
3133
3134         buffer->nesting++;
3135
3136         /* Interrupts must see nesting incremented before we use the buffer */
3137         barrier();
3138         return &buffer->buffer[buffer->nesting - 1][0];
3139 }
3140
3141 static void put_trace_buf(void)
3142 {
3143         /* Don't let the decrement of nesting leak before this */
3144         barrier();
3145         this_cpu_dec(trace_percpu_buffer->nesting);
3146 }
3147
3148 static int alloc_percpu_trace_buffer(void)
3149 {
3150         struct trace_buffer_struct *buffers;
3151
3152         if (trace_percpu_buffer)
3153                 return 0;
3154
3155         buffers = alloc_percpu(struct trace_buffer_struct);
3156         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3157                 return -ENOMEM;
3158
3159         trace_percpu_buffer = buffers;
3160         return 0;
3161 }
3162
3163 static int buffers_allocated;
3164
3165 void trace_printk_init_buffers(void)
3166 {
3167         if (buffers_allocated)
3168                 return;
3169
3170         if (alloc_percpu_trace_buffer())
3171                 return;
3172
3173         /* trace_printk() is for debug use only. Don't use it in production. */
3174
3175         pr_warn("\n");
3176         pr_warn("**********************************************************\n");
3177         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3178         pr_warn("**                                                      **\n");
3179         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3180         pr_warn("**                                                      **\n");
3181         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3182         pr_warn("** unsafe for production use.                           **\n");
3183         pr_warn("**                                                      **\n");
3184         pr_warn("** If you see this message and you are not debugging    **\n");
3185         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3186         pr_warn("**                                                      **\n");
3187         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3188         pr_warn("**********************************************************\n");
3189
3190         /* Expand the buffers to set size */
3191         tracing_update_buffers();
3192
3193         buffers_allocated = 1;
3194
3195         /*
3196          * trace_printk_init_buffers() can be called by modules.
3197          * If that happens, then we need to start cmdline recording
3198          * directly here. If the global_trace.buffer is already
3199          * allocated here, then this was called by module code.
3200          */
3201         if (global_trace.array_buffer.buffer)
3202                 tracing_start_cmdline_record();
3203 }
3204 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3205
3206 void trace_printk_start_comm(void)
3207 {
3208         /* Start tracing comms if trace printk is set */
3209         if (!buffers_allocated)
3210                 return;
3211         tracing_start_cmdline_record();
3212 }
3213
3214 static void trace_printk_start_stop_comm(int enabled)
3215 {
3216         if (!buffers_allocated)
3217                 return;
3218
3219         if (enabled)
3220                 tracing_start_cmdline_record();
3221         else
3222                 tracing_stop_cmdline_record();
3223 }
3224
3225 /**
3226  * trace_vbprintk - write binary msg to tracing buffer
3227  * @ip:    The address of the caller
3228  * @fmt:   The string format to write to the buffer
3229  * @args:  Arguments for @fmt
3230  */
3231 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3232 {
3233         struct trace_event_call *call = &event_bprint;
3234         struct ring_buffer_event *event;
3235         struct trace_buffer *buffer;
3236         struct trace_array *tr = &global_trace;
3237         struct bprint_entry *entry;
3238         unsigned long flags;
3239         char *tbuffer;
3240         int len = 0, size, pc;
3241
3242         if (unlikely(tracing_selftest_running || tracing_disabled))
3243                 return 0;
3244
3245         /* Don't pollute graph traces with trace_vprintk internals */
3246         pause_graph_tracing();
3247
3248         pc = preempt_count();
3249         preempt_disable_notrace();
3250
3251         tbuffer = get_trace_buf();
3252         if (!tbuffer) {
3253                 len = 0;
3254                 goto out_nobuffer;
3255         }
3256
3257         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3258
3259         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3260                 goto out_put;
3261
3262         local_save_flags(flags);
3263         size = sizeof(*entry) + sizeof(u32) * len;
3264         buffer = tr->array_buffer.buffer;
3265         ring_buffer_nest_start(buffer);
3266         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3267                                             flags, pc);
3268         if (!event)
3269                 goto out;
3270         entry = ring_buffer_event_data(event);
3271         entry->ip                       = ip;
3272         entry->fmt                      = fmt;
3273
3274         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3275         if (!call_filter_check_discard(call, entry, buffer, event)) {
3276                 __buffer_unlock_commit(buffer, event);
3277                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3278         }
3279
3280 out:
3281         ring_buffer_nest_end(buffer);
3282 out_put:
3283         put_trace_buf();
3284
3285 out_nobuffer:
3286         preempt_enable_notrace();
3287         unpause_graph_tracing();
3288
3289         return len;
3290 }
3291 EXPORT_SYMBOL_GPL(trace_vbprintk);
3292
3293 __printf(3, 0)
3294 static int
3295 __trace_array_vprintk(struct trace_buffer *buffer,
3296                       unsigned long ip, const char *fmt, va_list args)
3297 {
3298         struct trace_event_call *call = &event_print;
3299         struct ring_buffer_event *event;
3300         int len = 0, size, pc;
3301         struct print_entry *entry;
3302         unsigned long flags;
3303         char *tbuffer;
3304
3305         if (tracing_disabled || tracing_selftest_running)
3306                 return 0;
3307
3308         /* Don't pollute graph traces with trace_vprintk internals */
3309         pause_graph_tracing();
3310
3311         pc = preempt_count();
3312         preempt_disable_notrace();
3313
3314
3315         tbuffer = get_trace_buf();
3316         if (!tbuffer) {
3317                 len = 0;
3318                 goto out_nobuffer;
3319         }
3320
3321         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3322
3323         local_save_flags(flags);
3324         size = sizeof(*entry) + len + 1;
3325         ring_buffer_nest_start(buffer);
3326         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3327                                             flags, pc);
3328         if (!event)
3329                 goto out;
3330         entry = ring_buffer_event_data(event);
3331         entry->ip = ip;
3332
3333         memcpy(&entry->buf, tbuffer, len + 1);
3334         if (!call_filter_check_discard(call, entry, buffer, event)) {
3335                 __buffer_unlock_commit(buffer, event);
3336                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3337         }
3338
3339 out:
3340         ring_buffer_nest_end(buffer);
3341         put_trace_buf();
3342
3343 out_nobuffer:
3344         preempt_enable_notrace();
3345         unpause_graph_tracing();
3346
3347         return len;
3348 }
3349
3350 __printf(3, 0)
3351 int trace_array_vprintk(struct trace_array *tr,
3352                         unsigned long ip, const char *fmt, va_list args)
3353 {
3354         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3355 }
3356
3357 /**
3358  * trace_array_printk - Print a message to a specific instance
3359  * @tr: The instance trace_array descriptor
3360  * @ip: The instruction pointer that this is called from.
3361  * @fmt: The format to print (printf format)
3362  *
3363  * If a subsystem sets up its own instance, they have the right to
3364  * printk strings into their tracing instance buffer using this
3365  * function. Note, this function will not write into the top level
3366  * buffer (use trace_printk() for that), as writing into the top level
3367  * buffer should only have events that can be individually disabled.
3368  * trace_printk() is only used for debugging a kernel, and should not
3369  * be ever encorporated in normal use.
3370  *
3371  * trace_array_printk() can be used, as it will not add noise to the
3372  * top level tracing buffer.
3373  *
3374  * Note, trace_array_init_printk() must be called on @tr before this
3375  * can be used.
3376  */
3377 __printf(3, 0)
3378 int trace_array_printk(struct trace_array *tr,
3379                        unsigned long ip, const char *fmt, ...)
3380 {
3381         int ret;
3382         va_list ap;
3383
3384         if (!tr)
3385                 return -ENOENT;
3386
3387         /* This is only allowed for created instances */
3388         if (tr == &global_trace)
3389                 return 0;
3390
3391         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3392                 return 0;
3393
3394         va_start(ap, fmt);
3395         ret = trace_array_vprintk(tr, ip, fmt, ap);
3396         va_end(ap);
3397         return ret;
3398 }
3399 EXPORT_SYMBOL_GPL(trace_array_printk);
3400
3401 /**
3402  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3403  * @tr: The trace array to initialize the buffers for
3404  *
3405  * As trace_array_printk() only writes into instances, they are OK to
3406  * have in the kernel (unlike trace_printk()). This needs to be called
3407  * before trace_array_printk() can be used on a trace_array.
3408  */
3409 int trace_array_init_printk(struct trace_array *tr)
3410 {
3411         if (!tr)
3412                 return -ENOENT;
3413
3414         /* This is only allowed for created instances */
3415         if (tr == &global_trace)
3416                 return -EINVAL;
3417
3418         return alloc_percpu_trace_buffer();
3419 }
3420 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3421
3422 __printf(3, 4)
3423 int trace_array_printk_buf(struct trace_buffer *buffer,
3424                            unsigned long ip, const char *fmt, ...)
3425 {
3426         int ret;
3427         va_list ap;
3428
3429         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3430                 return 0;
3431
3432         va_start(ap, fmt);
3433         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3434         va_end(ap);
3435         return ret;
3436 }
3437
3438 __printf(2, 0)
3439 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3440 {
3441         return trace_array_vprintk(&global_trace, ip, fmt, args);
3442 }
3443 EXPORT_SYMBOL_GPL(trace_vprintk);
3444
3445 static void trace_iterator_increment(struct trace_iterator *iter)
3446 {
3447         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3448
3449         iter->idx++;
3450         if (buf_iter)
3451                 ring_buffer_iter_advance(buf_iter);
3452 }
3453
3454 static struct trace_entry *
3455 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3456                 unsigned long *lost_events)
3457 {
3458         struct ring_buffer_event *event;
3459         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3460
3461         if (buf_iter) {
3462                 event = ring_buffer_iter_peek(buf_iter, ts);
3463                 if (lost_events)
3464                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3465                                 (unsigned long)-1 : 0;
3466         } else {
3467                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3468                                          lost_events);
3469         }
3470
3471         if (event) {
3472                 iter->ent_size = ring_buffer_event_length(event);
3473                 return ring_buffer_event_data(event);
3474         }
3475         iter->ent_size = 0;
3476         return NULL;
3477 }
3478
3479 static struct trace_entry *
3480 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3481                   unsigned long *missing_events, u64 *ent_ts)
3482 {
3483         struct trace_buffer *buffer = iter->array_buffer->buffer;
3484         struct trace_entry *ent, *next = NULL;
3485         unsigned long lost_events = 0, next_lost = 0;
3486         int cpu_file = iter->cpu_file;
3487         u64 next_ts = 0, ts;
3488         int next_cpu = -1;
3489         int next_size = 0;
3490         int cpu;
3491
3492         /*
3493          * If we are in a per_cpu trace file, don't bother by iterating over
3494          * all cpu and peek directly.
3495          */
3496         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3497                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3498                         return NULL;
3499                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3500                 if (ent_cpu)
3501                         *ent_cpu = cpu_file;
3502
3503                 return ent;
3504         }
3505
3506         for_each_tracing_cpu(cpu) {
3507
3508                 if (ring_buffer_empty_cpu(buffer, cpu))
3509                         continue;
3510
3511                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3512
3513                 /*
3514                  * Pick the entry with the smallest timestamp:
3515                  */
3516                 if (ent && (!next || ts < next_ts)) {
3517                         next = ent;
3518                         next_cpu = cpu;
3519                         next_ts = ts;
3520                         next_lost = lost_events;
3521                         next_size = iter->ent_size;
3522                 }
3523         }
3524
3525         iter->ent_size = next_size;
3526
3527         if (ent_cpu)
3528                 *ent_cpu = next_cpu;
3529
3530         if (ent_ts)
3531                 *ent_ts = next_ts;
3532
3533         if (missing_events)
3534                 *missing_events = next_lost;
3535
3536         return next;
3537 }
3538
3539 #define STATIC_TEMP_BUF_SIZE    128
3540 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3541
3542 /* Find the next real entry, without updating the iterator itself */
3543 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3544                                           int *ent_cpu, u64 *ent_ts)
3545 {
3546         /* __find_next_entry will reset ent_size */
3547         int ent_size = iter->ent_size;
3548         struct trace_entry *entry;
3549
3550         /*
3551          * If called from ftrace_dump(), then the iter->temp buffer
3552          * will be the static_temp_buf and not created from kmalloc.
3553          * If the entry size is greater than the buffer, we can
3554          * not save it. Just return NULL in that case. This is only
3555          * used to add markers when two consecutive events' time
3556          * stamps have a large delta. See trace_print_lat_context()
3557          */
3558         if (iter->temp == static_temp_buf &&
3559             STATIC_TEMP_BUF_SIZE < ent_size)
3560                 return NULL;
3561
3562         /*
3563          * The __find_next_entry() may call peek_next_entry(), which may
3564          * call ring_buffer_peek() that may make the contents of iter->ent
3565          * undefined. Need to copy iter->ent now.
3566          */
3567         if (iter->ent && iter->ent != iter->temp) {
3568                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3569                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3570                         void *temp;
3571                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3572                         if (!temp)
3573                                 return NULL;
3574                         kfree(iter->temp);
3575                         iter->temp = temp;
3576                         iter->temp_size = iter->ent_size;
3577                 }
3578                 memcpy(iter->temp, iter->ent, iter->ent_size);
3579                 iter->ent = iter->temp;
3580         }
3581         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3582         /* Put back the original ent_size */
3583         iter->ent_size = ent_size;
3584
3585         return entry;
3586 }
3587
3588 /* Find the next real entry, and increment the iterator to the next entry */
3589 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3590 {
3591         iter->ent = __find_next_entry(iter, &iter->cpu,
3592                                       &iter->lost_events, &iter->ts);
3593
3594         if (iter->ent)
3595                 trace_iterator_increment(iter);
3596
3597         return iter->ent ? iter : NULL;
3598 }
3599
3600 static void trace_consume(struct trace_iterator *iter)
3601 {
3602         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3603                             &iter->lost_events);
3604 }
3605
3606 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3607 {
3608         struct trace_iterator *iter = m->private;
3609         int i = (int)*pos;
3610         void *ent;
3611
3612         WARN_ON_ONCE(iter->leftover);
3613
3614         (*pos)++;
3615
3616         /* can't go backwards */
3617         if (iter->idx > i)
3618                 return NULL;
3619
3620         if (iter->idx < 0)
3621                 ent = trace_find_next_entry_inc(iter);
3622         else
3623                 ent = iter;
3624
3625         while (ent && iter->idx < i)
3626                 ent = trace_find_next_entry_inc(iter);
3627
3628         iter->pos = *pos;
3629
3630         return ent;
3631 }
3632
3633 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3634 {
3635         struct ring_buffer_iter *buf_iter;
3636         unsigned long entries = 0;
3637         u64 ts;
3638
3639         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3640
3641         buf_iter = trace_buffer_iter(iter, cpu);
3642         if (!buf_iter)
3643                 return;
3644
3645         ring_buffer_iter_reset(buf_iter);
3646
3647         /*
3648          * We could have the case with the max latency tracers
3649          * that a reset never took place on a cpu. This is evident
3650          * by the timestamp being before the start of the buffer.
3651          */
3652         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3653                 if (ts >= iter->array_buffer->time_start)
3654                         break;
3655                 entries++;
3656                 ring_buffer_iter_advance(buf_iter);
3657         }
3658
3659         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3660 }
3661
3662 /*
3663  * The current tracer is copied to avoid a global locking
3664  * all around.
3665  */
3666 static void *s_start(struct seq_file *m, loff_t *pos)
3667 {
3668         struct trace_iterator *iter = m->private;
3669         struct trace_array *tr = iter->tr;
3670         int cpu_file = iter->cpu_file;
3671         void *p = NULL;
3672         loff_t l = 0;
3673         int cpu;
3674
3675         /*
3676          * copy the tracer to avoid using a global lock all around.
3677          * iter->trace is a copy of current_trace, the pointer to the
3678          * name may be used instead of a strcmp(), as iter->trace->name
3679          * will point to the same string as current_trace->name.
3680          */
3681         mutex_lock(&trace_types_lock);
3682         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3683                 *iter->trace = *tr->current_trace;
3684         mutex_unlock(&trace_types_lock);
3685
3686 #ifdef CONFIG_TRACER_MAX_TRACE
3687         if (iter->snapshot && iter->trace->use_max_tr)
3688                 return ERR_PTR(-EBUSY);
3689 #endif
3690
3691         if (!iter->snapshot)
3692                 atomic_inc(&trace_record_taskinfo_disabled);
3693
3694         if (*pos != iter->pos) {
3695                 iter->ent = NULL;
3696                 iter->cpu = 0;
3697                 iter->idx = -1;
3698
3699                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3700                         for_each_tracing_cpu(cpu)
3701                                 tracing_iter_reset(iter, cpu);
3702                 } else
3703                         tracing_iter_reset(iter, cpu_file);
3704
3705                 iter->leftover = 0;
3706                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3707                         ;
3708
3709         } else {
3710                 /*
3711                  * If we overflowed the seq_file before, then we want
3712                  * to just reuse the trace_seq buffer again.
3713                  */
3714                 if (iter->leftover)
3715                         p = iter;
3716                 else {
3717                         l = *pos - 1;
3718                         p = s_next(m, p, &l);
3719                 }
3720         }
3721
3722         trace_event_read_lock();
3723         trace_access_lock(cpu_file);
3724         return p;
3725 }
3726
3727 static void s_stop(struct seq_file *m, void *p)
3728 {
3729         struct trace_iterator *iter = m->private;
3730
3731 #ifdef CONFIG_TRACER_MAX_TRACE
3732         if (iter->snapshot && iter->trace->use_max_tr)
3733                 return;
3734 #endif
3735
3736         if (!iter->snapshot)
3737                 atomic_dec(&trace_record_taskinfo_disabled);
3738
3739         trace_access_unlock(iter->cpu_file);
3740         trace_event_read_unlock();
3741 }
3742
3743 static void
3744 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3745                       unsigned long *entries, int cpu)
3746 {
3747         unsigned long count;
3748
3749         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3750         /*
3751          * If this buffer has skipped entries, then we hold all
3752          * entries for the trace and we need to ignore the
3753          * ones before the time stamp.
3754          */
3755         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3756                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3757                 /* total is the same as the entries */
3758                 *total = count;
3759         } else
3760                 *total = count +
3761                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3762         *entries = count;
3763 }
3764
3765 static void
3766 get_total_entries(struct array_buffer *buf,
3767                   unsigned long *total, unsigned long *entries)
3768 {
3769         unsigned long t, e;
3770         int cpu;
3771
3772         *total = 0;
3773         *entries = 0;
3774
3775         for_each_tracing_cpu(cpu) {
3776                 get_total_entries_cpu(buf, &t, &e, cpu);
3777                 *total += t;
3778                 *entries += e;
3779         }
3780 }
3781
3782 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3783 {
3784         unsigned long total, entries;
3785
3786         if (!tr)
3787                 tr = &global_trace;
3788
3789         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3790
3791         return entries;
3792 }
3793
3794 unsigned long trace_total_entries(struct trace_array *tr)
3795 {
3796         unsigned long total, entries;
3797
3798         if (!tr)
3799                 tr = &global_trace;
3800
3801         get_total_entries(&tr->array_buffer, &total, &entries);
3802
3803         return entries;
3804 }
3805
3806 static void print_lat_help_header(struct seq_file *m)
3807 {
3808         seq_puts(m, "#                    _------=> CPU#            \n"
3809                     "#                   / _-----=> irqs-off        \n"
3810                     "#                  | / _----=> need-resched    \n"
3811                     "#                  || / _---=> hardirq/softirq \n"
3812                     "#                  ||| / _--=> preempt-depth   \n"
3813                     "#                  |||| /     delay            \n"
3814                     "#  cmd     pid     ||||| time  |   caller      \n"
3815                     "#     \\   /        |||||  \\    |   /         \n");
3816 }
3817
3818 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3819 {
3820         unsigned long total;
3821         unsigned long entries;
3822
3823         get_total_entries(buf, &total, &entries);
3824         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3825                    entries, total, num_online_cpus());
3826         seq_puts(m, "#\n");
3827 }
3828
3829 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3830                                    unsigned int flags)
3831 {
3832         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3833
3834         print_event_info(buf, m);
3835
3836         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3837         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3838 }
3839
3840 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3841                                        unsigned int flags)
3842 {
3843         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3844         const char *space = "            ";
3845         int prec = tgid ? 12 : 2;
3846
3847         print_event_info(buf, m);
3848
3849         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3850         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3851         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3852         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3853         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3854         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3855         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3856 }
3857
3858 void
3859 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3860 {
3861         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3862         struct array_buffer *buf = iter->array_buffer;
3863         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3864         struct tracer *type = iter->trace;
3865         unsigned long entries;
3866         unsigned long total;
3867         const char *name = "preemption";
3868
3869         name = type->name;
3870
3871         get_total_entries(buf, &total, &entries);
3872
3873         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3874                    name, UTS_RELEASE);
3875         seq_puts(m, "# -----------------------------------"
3876                  "---------------------------------\n");
3877         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3878                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3879                    nsecs_to_usecs(data->saved_latency),
3880                    entries,
3881                    total,
3882                    buf->cpu,
3883 #if defined(CONFIG_PREEMPT_NONE)
3884                    "server",
3885 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3886                    "desktop",
3887 #elif defined(CONFIG_PREEMPT)
3888                    "preempt",
3889 #elif defined(CONFIG_PREEMPT_RT)
3890                    "preempt_rt",
3891 #else
3892                    "unknown",
3893 #endif
3894                    /* These are reserved for later use */
3895                    0, 0, 0, 0);
3896 #ifdef CONFIG_SMP
3897         seq_printf(m, " #P:%d)\n", num_online_cpus());
3898 #else
3899         seq_puts(m, ")\n");
3900 #endif
3901         seq_puts(m, "#    -----------------\n");
3902         seq_printf(m, "#    | task: %.16s-%d "
3903                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3904                    data->comm, data->pid,
3905                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3906                    data->policy, data->rt_priority);
3907         seq_puts(m, "#    -----------------\n");
3908
3909         if (data->critical_start) {
3910                 seq_puts(m, "#  => started at: ");
3911                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3912                 trace_print_seq(m, &iter->seq);
3913                 seq_puts(m, "\n#  => ended at:   ");
3914                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3915                 trace_print_seq(m, &iter->seq);
3916                 seq_puts(m, "\n#\n");
3917         }
3918
3919         seq_puts(m, "#\n");
3920 }
3921
3922 static void test_cpu_buff_start(struct trace_iterator *iter)
3923 {
3924         struct trace_seq *s = &iter->seq;
3925         struct trace_array *tr = iter->tr;
3926
3927         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3928                 return;
3929
3930         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3931                 return;
3932
3933         if (cpumask_available(iter->started) &&
3934             cpumask_test_cpu(iter->cpu, iter->started))
3935                 return;
3936
3937         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3938                 return;
3939
3940         if (cpumask_available(iter->started))
3941                 cpumask_set_cpu(iter->cpu, iter->started);
3942
3943         /* Don't print started cpu buffer for the first entry of the trace */
3944         if (iter->idx > 1)
3945                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3946                                 iter->cpu);
3947 }
3948
3949 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3950 {
3951         struct trace_array *tr = iter->tr;
3952         struct trace_seq *s = &iter->seq;
3953         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3954         struct trace_entry *entry;
3955         struct trace_event *event;
3956
3957         entry = iter->ent;
3958
3959         test_cpu_buff_start(iter);
3960
3961         event = ftrace_find_event(entry->type);
3962
3963         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3964                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3965                         trace_print_lat_context(iter);
3966                 else
3967                         trace_print_context(iter);
3968         }
3969
3970         if (trace_seq_has_overflowed(s))
3971                 return TRACE_TYPE_PARTIAL_LINE;
3972
3973         if (event)
3974                 return event->funcs->trace(iter, sym_flags, event);
3975
3976         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3977
3978         return trace_handle_return(s);
3979 }
3980
3981 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3982 {
3983         struct trace_array *tr = iter->tr;
3984         struct trace_seq *s = &iter->seq;
3985         struct trace_entry *entry;
3986         struct trace_event *event;
3987
3988         entry = iter->ent;
3989
3990         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3991                 trace_seq_printf(s, "%d %d %llu ",
3992                                  entry->pid, iter->cpu, iter->ts);
3993
3994         if (trace_seq_has_overflowed(s))
3995                 return TRACE_TYPE_PARTIAL_LINE;
3996
3997         event = ftrace_find_event(entry->type);
3998         if (event)
3999                 return event->funcs->raw(iter, 0, event);
4000
4001         trace_seq_printf(s, "%d ?\n", entry->type);
4002
4003         return trace_handle_return(s);
4004 }
4005
4006 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4007 {
4008         struct trace_array *tr = iter->tr;
4009         struct trace_seq *s = &iter->seq;
4010         unsigned char newline = '\n';
4011         struct trace_entry *entry;
4012         struct trace_event *event;
4013
4014         entry = iter->ent;
4015
4016         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4017                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4018                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4019                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4020                 if (trace_seq_has_overflowed(s))
4021                         return TRACE_TYPE_PARTIAL_LINE;
4022         }
4023
4024         event = ftrace_find_event(entry->type);
4025         if (event) {
4026                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4027                 if (ret != TRACE_TYPE_HANDLED)
4028                         return ret;
4029         }
4030
4031         SEQ_PUT_FIELD(s, newline);
4032
4033         return trace_handle_return(s);
4034 }
4035
4036 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4037 {
4038         struct trace_array *tr = iter->tr;
4039         struct trace_seq *s = &iter->seq;
4040         struct trace_entry *entry;
4041         struct trace_event *event;
4042
4043         entry = iter->ent;
4044
4045         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4046                 SEQ_PUT_FIELD(s, entry->pid);
4047                 SEQ_PUT_FIELD(s, iter->cpu);
4048                 SEQ_PUT_FIELD(s, iter->ts);
4049                 if (trace_seq_has_overflowed(s))
4050                         return TRACE_TYPE_PARTIAL_LINE;
4051         }
4052
4053         event = ftrace_find_event(entry->type);
4054         return event ? event->funcs->binary(iter, 0, event) :
4055                 TRACE_TYPE_HANDLED;
4056 }
4057
4058 int trace_empty(struct trace_iterator *iter)
4059 {
4060         struct ring_buffer_iter *buf_iter;
4061         int cpu;
4062
4063         /* If we are looking at one CPU buffer, only check that one */
4064         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4065                 cpu = iter->cpu_file;
4066                 buf_iter = trace_buffer_iter(iter, cpu);
4067                 if (buf_iter) {
4068                         if (!ring_buffer_iter_empty(buf_iter))
4069                                 return 0;
4070                 } else {
4071                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4072                                 return 0;
4073                 }
4074                 return 1;
4075         }
4076
4077         for_each_tracing_cpu(cpu) {
4078                 buf_iter = trace_buffer_iter(iter, cpu);
4079                 if (buf_iter) {
4080                         if (!ring_buffer_iter_empty(buf_iter))
4081                                 return 0;
4082                 } else {
4083                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4084                                 return 0;
4085                 }
4086         }
4087
4088         return 1;
4089 }
4090
4091 /*  Called with trace_event_read_lock() held. */
4092 enum print_line_t print_trace_line(struct trace_iterator *iter)
4093 {
4094         struct trace_array *tr = iter->tr;
4095         unsigned long trace_flags = tr->trace_flags;
4096         enum print_line_t ret;
4097
4098         if (iter->lost_events) {
4099                 if (iter->lost_events == (unsigned long)-1)
4100                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4101                                          iter->cpu);
4102                 else
4103                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4104                                          iter->cpu, iter->lost_events);
4105                 if (trace_seq_has_overflowed(&iter->seq))
4106                         return TRACE_TYPE_PARTIAL_LINE;
4107         }
4108
4109         if (iter->trace && iter->trace->print_line) {
4110                 ret = iter->trace->print_line(iter);
4111                 if (ret != TRACE_TYPE_UNHANDLED)
4112                         return ret;
4113         }
4114
4115         if (iter->ent->type == TRACE_BPUTS &&
4116                         trace_flags & TRACE_ITER_PRINTK &&
4117                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4118                 return trace_print_bputs_msg_only(iter);
4119
4120         if (iter->ent->type == TRACE_BPRINT &&
4121                         trace_flags & TRACE_ITER_PRINTK &&
4122                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4123                 return trace_print_bprintk_msg_only(iter);
4124
4125         if (iter->ent->type == TRACE_PRINT &&
4126                         trace_flags & TRACE_ITER_PRINTK &&
4127                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4128                 return trace_print_printk_msg_only(iter);
4129
4130         if (trace_flags & TRACE_ITER_BIN)
4131                 return print_bin_fmt(iter);
4132
4133         if (trace_flags & TRACE_ITER_HEX)
4134                 return print_hex_fmt(iter);
4135
4136         if (trace_flags & TRACE_ITER_RAW)
4137                 return print_raw_fmt(iter);
4138
4139         return print_trace_fmt(iter);
4140 }
4141
4142 void trace_latency_header(struct seq_file *m)
4143 {
4144         struct trace_iterator *iter = m->private;
4145         struct trace_array *tr = iter->tr;
4146
4147         /* print nothing if the buffers are empty */
4148         if (trace_empty(iter))
4149                 return;
4150
4151         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4152                 print_trace_header(m, iter);
4153
4154         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4155                 print_lat_help_header(m);
4156 }
4157
4158 void trace_default_header(struct seq_file *m)
4159 {
4160         struct trace_iterator *iter = m->private;
4161         struct trace_array *tr = iter->tr;
4162         unsigned long trace_flags = tr->trace_flags;
4163
4164         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4165                 return;
4166
4167         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4168                 /* print nothing if the buffers are empty */
4169                 if (trace_empty(iter))
4170                         return;
4171                 print_trace_header(m, iter);
4172                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4173                         print_lat_help_header(m);
4174         } else {
4175                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4176                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4177                                 print_func_help_header_irq(iter->array_buffer,
4178                                                            m, trace_flags);
4179                         else
4180                                 print_func_help_header(iter->array_buffer, m,
4181                                                        trace_flags);
4182                 }
4183         }
4184 }
4185
4186 static void test_ftrace_alive(struct seq_file *m)
4187 {
4188         if (!ftrace_is_dead())
4189                 return;
4190         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4191                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4192 }
4193
4194 #ifdef CONFIG_TRACER_MAX_TRACE
4195 static void show_snapshot_main_help(struct seq_file *m)
4196 {
4197         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4198                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4199                     "#                      Takes a snapshot of the main buffer.\n"
4200                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4201                     "#                      (Doesn't have to be '2' works with any number that\n"
4202                     "#                       is not a '0' or '1')\n");
4203 }
4204
4205 static void show_snapshot_percpu_help(struct seq_file *m)
4206 {
4207         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4208 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4209         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4210                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4211 #else
4212         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4213                     "#                     Must use main snapshot file to allocate.\n");
4214 #endif
4215         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4216                     "#                      (Doesn't have to be '2' works with any number that\n"
4217                     "#                       is not a '0' or '1')\n");
4218 }
4219
4220 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4221 {
4222         if (iter->tr->allocated_snapshot)
4223                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4224         else
4225                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4226
4227         seq_puts(m, "# Snapshot commands:\n");
4228         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4229                 show_snapshot_main_help(m);
4230         else
4231                 show_snapshot_percpu_help(m);
4232 }
4233 #else
4234 /* Should never be called */
4235 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4236 #endif
4237
4238 static int s_show(struct seq_file *m, void *v)
4239 {
4240         struct trace_iterator *iter = v;
4241         int ret;
4242
4243         if (iter->ent == NULL) {
4244                 if (iter->tr) {
4245                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4246                         seq_puts(m, "#\n");
4247                         test_ftrace_alive(m);
4248                 }
4249                 if (iter->snapshot && trace_empty(iter))
4250                         print_snapshot_help(m, iter);
4251                 else if (iter->trace && iter->trace->print_header)
4252                         iter->trace->print_header(m);
4253                 else
4254                         trace_default_header(m);
4255
4256         } else if (iter->leftover) {
4257                 /*
4258                  * If we filled the seq_file buffer earlier, we
4259                  * want to just show it now.
4260                  */
4261                 ret = trace_print_seq(m, &iter->seq);
4262
4263                 /* ret should this time be zero, but you never know */
4264                 iter->leftover = ret;
4265
4266         } else {
4267                 print_trace_line(iter);
4268                 ret = trace_print_seq(m, &iter->seq);
4269                 /*
4270                  * If we overflow the seq_file buffer, then it will
4271                  * ask us for this data again at start up.
4272                  * Use that instead.
4273                  *  ret is 0 if seq_file write succeeded.
4274                  *        -1 otherwise.
4275                  */
4276                 iter->leftover = ret;
4277         }
4278
4279         return 0;
4280 }
4281
4282 /*
4283  * Should be used after trace_array_get(), trace_types_lock
4284  * ensures that i_cdev was already initialized.
4285  */
4286 static inline int tracing_get_cpu(struct inode *inode)
4287 {
4288         if (inode->i_cdev) /* See trace_create_cpu_file() */
4289                 return (long)inode->i_cdev - 1;
4290         return RING_BUFFER_ALL_CPUS;
4291 }
4292
4293 static const struct seq_operations tracer_seq_ops = {
4294         .start          = s_start,
4295         .next           = s_next,
4296         .stop           = s_stop,
4297         .show           = s_show,
4298 };
4299
4300 static struct trace_iterator *
4301 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4302 {
4303         struct trace_array *tr = inode->i_private;
4304         struct trace_iterator *iter;
4305         int cpu;
4306
4307         if (tracing_disabled)
4308                 return ERR_PTR(-ENODEV);
4309
4310         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4311         if (!iter)
4312                 return ERR_PTR(-ENOMEM);
4313
4314         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4315                                     GFP_KERNEL);
4316         if (!iter->buffer_iter)
4317                 goto release;
4318
4319         /*
4320          * trace_find_next_entry() may need to save off iter->ent.
4321          * It will place it into the iter->temp buffer. As most
4322          * events are less than 128, allocate a buffer of that size.
4323          * If one is greater, then trace_find_next_entry() will
4324          * allocate a new buffer to adjust for the bigger iter->ent.
4325          * It's not critical if it fails to get allocated here.
4326          */
4327         iter->temp = kmalloc(128, GFP_KERNEL);
4328         if (iter->temp)
4329                 iter->temp_size = 128;
4330
4331         /*
4332          * We make a copy of the current tracer to avoid concurrent
4333          * changes on it while we are reading.
4334          */
4335         mutex_lock(&trace_types_lock);
4336         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4337         if (!iter->trace)
4338                 goto fail;
4339
4340         *iter->trace = *tr->current_trace;
4341
4342         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4343                 goto fail;
4344
4345         iter->tr = tr;
4346
4347 #ifdef CONFIG_TRACER_MAX_TRACE
4348         /* Currently only the top directory has a snapshot */
4349         if (tr->current_trace->print_max || snapshot)
4350                 iter->array_buffer = &tr->max_buffer;
4351         else
4352 #endif
4353                 iter->array_buffer = &tr->array_buffer;
4354         iter->snapshot = snapshot;
4355         iter->pos = -1;
4356         iter->cpu_file = tracing_get_cpu(inode);
4357         mutex_init(&iter->mutex);
4358
4359         /* Notify the tracer early; before we stop tracing. */
4360         if (iter->trace->open)
4361                 iter->trace->open(iter);
4362
4363         /* Annotate start of buffers if we had overruns */
4364         if (ring_buffer_overruns(iter->array_buffer->buffer))
4365                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4366
4367         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4368         if (trace_clocks[tr->clock_id].in_ns)
4369                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4370
4371         /*
4372          * If pause-on-trace is enabled, then stop the trace while
4373          * dumping, unless this is the "snapshot" file
4374          */
4375         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4376                 tracing_stop_tr(tr);
4377
4378         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4379                 for_each_tracing_cpu(cpu) {
4380                         iter->buffer_iter[cpu] =
4381                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4382                                                          cpu, GFP_KERNEL);
4383                 }
4384                 ring_buffer_read_prepare_sync();
4385                 for_each_tracing_cpu(cpu) {
4386                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4387                         tracing_iter_reset(iter, cpu);
4388                 }
4389         } else {
4390                 cpu = iter->cpu_file;
4391                 iter->buffer_iter[cpu] =
4392                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4393                                                  cpu, GFP_KERNEL);
4394                 ring_buffer_read_prepare_sync();
4395                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4396                 tracing_iter_reset(iter, cpu);
4397         }
4398
4399         mutex_unlock(&trace_types_lock);
4400
4401         return iter;
4402
4403  fail:
4404         mutex_unlock(&trace_types_lock);
4405         kfree(iter->trace);
4406         kfree(iter->temp);
4407         kfree(iter->buffer_iter);
4408 release:
4409         seq_release_private(inode, file);
4410         return ERR_PTR(-ENOMEM);
4411 }
4412
4413 int tracing_open_generic(struct inode *inode, struct file *filp)
4414 {
4415         int ret;
4416
4417         ret = tracing_check_open_get_tr(NULL);
4418         if (ret)
4419                 return ret;
4420
4421         filp->private_data = inode->i_private;
4422         return 0;
4423 }
4424
4425 bool tracing_is_disabled(void)
4426 {
4427         return (tracing_disabled) ? true: false;
4428 }
4429
4430 /*
4431  * Open and update trace_array ref count.
4432  * Must have the current trace_array passed to it.
4433  */
4434 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4435 {
4436         struct trace_array *tr = inode->i_private;
4437         int ret;
4438
4439         ret = tracing_check_open_get_tr(tr);
4440         if (ret)
4441                 return ret;
4442
4443         filp->private_data = inode->i_private;
4444
4445         return 0;
4446 }
4447
4448 static int tracing_release(struct inode *inode, struct file *file)
4449 {
4450         struct trace_array *tr = inode->i_private;
4451         struct seq_file *m = file->private_data;
4452         struct trace_iterator *iter;
4453         int cpu;
4454
4455         if (!(file->f_mode & FMODE_READ)) {
4456                 trace_array_put(tr);
4457                 return 0;
4458         }
4459
4460         /* Writes do not use seq_file */
4461         iter = m->private;
4462         mutex_lock(&trace_types_lock);
4463
4464         for_each_tracing_cpu(cpu) {
4465                 if (iter->buffer_iter[cpu])
4466                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4467         }
4468
4469         if (iter->trace && iter->trace->close)
4470                 iter->trace->close(iter);
4471
4472         if (!iter->snapshot && tr->stop_count)
4473                 /* reenable tracing if it was previously enabled */
4474                 tracing_start_tr(tr);
4475
4476         __trace_array_put(tr);
4477
4478         mutex_unlock(&trace_types_lock);
4479
4480         mutex_destroy(&iter->mutex);
4481         free_cpumask_var(iter->started);
4482         kfree(iter->temp);
4483         kfree(iter->trace);
4484         kfree(iter->buffer_iter);
4485         seq_release_private(inode, file);
4486
4487         return 0;
4488 }
4489
4490 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4491 {
4492         struct trace_array *tr = inode->i_private;
4493
4494         trace_array_put(tr);
4495         return 0;
4496 }
4497
4498 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4499 {
4500         struct trace_array *tr = inode->i_private;
4501
4502         trace_array_put(tr);
4503
4504         return single_release(inode, file);
4505 }
4506
4507 static int tracing_open(struct inode *inode, struct file *file)
4508 {
4509         struct trace_array *tr = inode->i_private;
4510         struct trace_iterator *iter;
4511         int ret;
4512
4513         ret = tracing_check_open_get_tr(tr);
4514         if (ret)
4515                 return ret;
4516
4517         /* If this file was open for write, then erase contents */
4518         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4519                 int cpu = tracing_get_cpu(inode);
4520                 struct array_buffer *trace_buf = &tr->array_buffer;
4521
4522 #ifdef CONFIG_TRACER_MAX_TRACE
4523                 if (tr->current_trace->print_max)
4524                         trace_buf = &tr->max_buffer;
4525 #endif
4526
4527                 if (cpu == RING_BUFFER_ALL_CPUS)
4528                         tracing_reset_online_cpus(trace_buf);
4529                 else
4530                         tracing_reset_cpu(trace_buf, cpu);
4531         }
4532
4533         if (file->f_mode & FMODE_READ) {
4534                 iter = __tracing_open(inode, file, false);
4535                 if (IS_ERR(iter))
4536                         ret = PTR_ERR(iter);
4537                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4538                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4539         }
4540
4541         if (ret < 0)
4542                 trace_array_put(tr);
4543
4544         return ret;
4545 }
4546
4547 /*
4548  * Some tracers are not suitable for instance buffers.
4549  * A tracer is always available for the global array (toplevel)
4550  * or if it explicitly states that it is.
4551  */
4552 static bool
4553 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4554 {
4555         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4556 }
4557
4558 /* Find the next tracer that this trace array may use */
4559 static struct tracer *
4560 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4561 {
4562         while (t && !trace_ok_for_array(t, tr))
4563                 t = t->next;
4564
4565         return t;
4566 }
4567
4568 static void *
4569 t_next(struct seq_file *m, void *v, loff_t *pos)
4570 {
4571         struct trace_array *tr = m->private;
4572         struct tracer *t = v;
4573
4574         (*pos)++;
4575
4576         if (t)
4577                 t = get_tracer_for_array(tr, t->next);
4578
4579         return t;
4580 }
4581
4582 static void *t_start(struct seq_file *m, loff_t *pos)
4583 {
4584         struct trace_array *tr = m->private;
4585         struct tracer *t;
4586         loff_t l = 0;
4587
4588         mutex_lock(&trace_types_lock);
4589
4590         t = get_tracer_for_array(tr, trace_types);
4591         for (; t && l < *pos; t = t_next(m, t, &l))
4592                         ;
4593
4594         return t;
4595 }
4596
4597 static void t_stop(struct seq_file *m, void *p)
4598 {
4599         mutex_unlock(&trace_types_lock);
4600 }
4601
4602 static int t_show(struct seq_file *m, void *v)
4603 {
4604         struct tracer *t = v;
4605
4606         if (!t)
4607                 return 0;
4608
4609         seq_puts(m, t->name);
4610         if (t->next)
4611                 seq_putc(m, ' ');
4612         else
4613                 seq_putc(m, '\n');
4614
4615         return 0;
4616 }
4617
4618 static const struct seq_operations show_traces_seq_ops = {
4619         .start          = t_start,
4620         .next           = t_next,
4621         .stop           = t_stop,
4622         .show           = t_show,
4623 };
4624
4625 static int show_traces_open(struct inode *inode, struct file *file)
4626 {
4627         struct trace_array *tr = inode->i_private;
4628         struct seq_file *m;
4629         int ret;
4630
4631         ret = tracing_check_open_get_tr(tr);
4632         if (ret)
4633                 return ret;
4634
4635         ret = seq_open(file, &show_traces_seq_ops);
4636         if (ret) {
4637                 trace_array_put(tr);
4638                 return ret;
4639         }
4640
4641         m = file->private_data;
4642         m->private = tr;
4643
4644         return 0;
4645 }
4646
4647 static int show_traces_release(struct inode *inode, struct file *file)
4648 {
4649         struct trace_array *tr = inode->i_private;
4650
4651         trace_array_put(tr);
4652         return seq_release(inode, file);
4653 }
4654
4655 static ssize_t
4656 tracing_write_stub(struct file *filp, const char __user *ubuf,
4657                    size_t count, loff_t *ppos)
4658 {
4659         return count;
4660 }
4661
4662 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4663 {
4664         int ret;
4665
4666         if (file->f_mode & FMODE_READ)
4667                 ret = seq_lseek(file, offset, whence);
4668         else
4669                 file->f_pos = ret = 0;
4670
4671         return ret;
4672 }
4673
4674 static const struct file_operations tracing_fops = {
4675         .open           = tracing_open,
4676         .read           = seq_read,
4677         .write          = tracing_write_stub,
4678         .llseek         = tracing_lseek,
4679         .release        = tracing_release,
4680 };
4681
4682 static const struct file_operations show_traces_fops = {
4683         .open           = show_traces_open,
4684         .read           = seq_read,
4685         .llseek         = seq_lseek,
4686         .release        = show_traces_release,
4687 };
4688
4689 static ssize_t
4690 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4691                      size_t count, loff_t *ppos)
4692 {
4693         struct trace_array *tr = file_inode(filp)->i_private;
4694         char *mask_str;
4695         int len;
4696
4697         len = snprintf(NULL, 0, "%*pb\n",
4698                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4699         mask_str = kmalloc(len, GFP_KERNEL);
4700         if (!mask_str)
4701                 return -ENOMEM;
4702
4703         len = snprintf(mask_str, len, "%*pb\n",
4704                        cpumask_pr_args(tr->tracing_cpumask));
4705         if (len >= count) {
4706                 count = -EINVAL;
4707                 goto out_err;
4708         }
4709         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4710
4711 out_err:
4712         kfree(mask_str);
4713
4714         return count;
4715 }
4716
4717 int tracing_set_cpumask(struct trace_array *tr,
4718                         cpumask_var_t tracing_cpumask_new)
4719 {
4720         int cpu;
4721
4722         if (!tr)
4723                 return -EINVAL;
4724
4725         local_irq_disable();
4726         arch_spin_lock(&tr->max_lock);
4727         for_each_tracing_cpu(cpu) {
4728                 /*
4729                  * Increase/decrease the disabled counter if we are
4730                  * about to flip a bit in the cpumask:
4731                  */
4732                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4733                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4734                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4735                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4736                 }
4737                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4738                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4739                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4740                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4741                 }
4742         }
4743         arch_spin_unlock(&tr->max_lock);
4744         local_irq_enable();
4745
4746         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4747
4748         return 0;
4749 }
4750
4751 static ssize_t
4752 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4753                       size_t count, loff_t *ppos)
4754 {
4755         struct trace_array *tr = file_inode(filp)->i_private;
4756         cpumask_var_t tracing_cpumask_new;
4757         int err;
4758
4759         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4760                 return -ENOMEM;
4761
4762         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4763         if (err)
4764                 goto err_free;
4765
4766         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4767         if (err)
4768                 goto err_free;
4769
4770         free_cpumask_var(tracing_cpumask_new);
4771
4772         return count;
4773
4774 err_free:
4775         free_cpumask_var(tracing_cpumask_new);
4776
4777         return err;
4778 }
4779
4780 static const struct file_operations tracing_cpumask_fops = {
4781         .open           = tracing_open_generic_tr,
4782         .read           = tracing_cpumask_read,
4783         .write          = tracing_cpumask_write,
4784         .release        = tracing_release_generic_tr,
4785         .llseek         = generic_file_llseek,
4786 };
4787
4788 static int tracing_trace_options_show(struct seq_file *m, void *v)
4789 {
4790         struct tracer_opt *trace_opts;
4791         struct trace_array *tr = m->private;
4792         u32 tracer_flags;
4793         int i;
4794
4795         mutex_lock(&trace_types_lock);
4796         tracer_flags = tr->current_trace->flags->val;
4797         trace_opts = tr->current_trace->flags->opts;
4798
4799         for (i = 0; trace_options[i]; i++) {
4800                 if (tr->trace_flags & (1 << i))
4801                         seq_printf(m, "%s\n", trace_options[i]);
4802                 else
4803                         seq_printf(m, "no%s\n", trace_options[i]);
4804         }
4805
4806         for (i = 0; trace_opts[i].name; i++) {
4807                 if (tracer_flags & trace_opts[i].bit)
4808                         seq_printf(m, "%s\n", trace_opts[i].name);
4809                 else
4810                         seq_printf(m, "no%s\n", trace_opts[i].name);
4811         }
4812         mutex_unlock(&trace_types_lock);
4813
4814         return 0;
4815 }
4816
4817 static int __set_tracer_option(struct trace_array *tr,
4818                                struct tracer_flags *tracer_flags,
4819                                struct tracer_opt *opts, int neg)
4820 {
4821         struct tracer *trace = tracer_flags->trace;
4822         int ret;
4823
4824         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4825         if (ret)
4826                 return ret;
4827
4828         if (neg)
4829                 tracer_flags->val &= ~opts->bit;
4830         else
4831                 tracer_flags->val |= opts->bit;
4832         return 0;
4833 }
4834
4835 /* Try to assign a tracer specific option */
4836 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4837 {
4838         struct tracer *trace = tr->current_trace;
4839         struct tracer_flags *tracer_flags = trace->flags;
4840         struct tracer_opt *opts = NULL;
4841         int i;
4842
4843         for (i = 0; tracer_flags->opts[i].name; i++) {
4844                 opts = &tracer_flags->opts[i];
4845
4846                 if (strcmp(cmp, opts->name) == 0)
4847                         return __set_tracer_option(tr, trace->flags, opts, neg);
4848         }
4849
4850         return -EINVAL;
4851 }
4852
4853 /* Some tracers require overwrite to stay enabled */
4854 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4855 {
4856         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4857                 return -1;
4858
4859         return 0;
4860 }
4861
4862 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4863 {
4864         if ((mask == TRACE_ITER_RECORD_TGID) ||
4865             (mask == TRACE_ITER_RECORD_CMD))
4866                 lockdep_assert_held(&event_mutex);
4867
4868         /* do nothing if flag is already set */
4869         if (!!(tr->trace_flags & mask) == !!enabled)
4870                 return 0;
4871
4872         /* Give the tracer a chance to approve the change */
4873         if (tr->current_trace->flag_changed)
4874                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4875                         return -EINVAL;
4876
4877         if (enabled)
4878                 tr->trace_flags |= mask;
4879         else
4880                 tr->trace_flags &= ~mask;
4881
4882         if (mask == TRACE_ITER_RECORD_CMD)
4883                 trace_event_enable_cmd_record(enabled);
4884
4885         if (mask == TRACE_ITER_RECORD_TGID) {
4886                 if (!tgid_map)
4887                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4888                                            sizeof(*tgid_map),
4889                                            GFP_KERNEL);
4890                 if (!tgid_map) {
4891                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4892                         return -ENOMEM;
4893                 }
4894
4895                 trace_event_enable_tgid_record(enabled);
4896         }
4897
4898         if (mask == TRACE_ITER_EVENT_FORK)
4899                 trace_event_follow_fork(tr, enabled);
4900
4901         if (mask == TRACE_ITER_FUNC_FORK)
4902                 ftrace_pid_follow_fork(tr, enabled);
4903
4904         if (mask == TRACE_ITER_OVERWRITE) {
4905                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4906 #ifdef CONFIG_TRACER_MAX_TRACE
4907                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4908 #endif
4909         }
4910
4911         if (mask == TRACE_ITER_PRINTK) {
4912                 trace_printk_start_stop_comm(enabled);
4913                 trace_printk_control(enabled);
4914         }
4915
4916         return 0;
4917 }
4918
4919 int trace_set_options(struct trace_array *tr, char *option)
4920 {
4921         char *cmp;
4922         int neg = 0;
4923         int ret;
4924         size_t orig_len = strlen(option);
4925         int len;
4926
4927         cmp = strstrip(option);
4928
4929         len = str_has_prefix(cmp, "no");
4930         if (len)
4931                 neg = 1;
4932
4933         cmp += len;
4934
4935         mutex_lock(&event_mutex);
4936         mutex_lock(&trace_types_lock);
4937
4938         ret = match_string(trace_options, -1, cmp);
4939         /* If no option could be set, test the specific tracer options */
4940         if (ret < 0)
4941                 ret = set_tracer_option(tr, cmp, neg);
4942         else
4943                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4944
4945         mutex_unlock(&trace_types_lock);
4946         mutex_unlock(&event_mutex);
4947
4948         /*
4949          * If the first trailing whitespace is replaced with '\0' by strstrip,
4950          * turn it back into a space.
4951          */
4952         if (orig_len > strlen(option))
4953                 option[strlen(option)] = ' ';
4954
4955         return ret;
4956 }
4957
4958 static void __init apply_trace_boot_options(void)
4959 {
4960         char *buf = trace_boot_options_buf;
4961         char *option;
4962
4963         while (true) {
4964                 option = strsep(&buf, ",");
4965
4966                 if (!option)
4967                         break;
4968
4969                 if (*option)
4970                         trace_set_options(&global_trace, option);
4971
4972                 /* Put back the comma to allow this to be called again */
4973                 if (buf)
4974                         *(buf - 1) = ',';
4975         }
4976 }
4977
4978 static ssize_t
4979 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4980                         size_t cnt, loff_t *ppos)
4981 {
4982         struct seq_file *m = filp->private_data;
4983         struct trace_array *tr = m->private;
4984         char buf[64];
4985         int ret;
4986
4987         if (cnt >= sizeof(buf))
4988                 return -EINVAL;
4989
4990         if (copy_from_user(buf, ubuf, cnt))
4991                 return -EFAULT;
4992
4993         buf[cnt] = 0;
4994
4995         ret = trace_set_options(tr, buf);
4996         if (ret < 0)
4997                 return ret;
4998
4999         *ppos += cnt;
5000
5001         return cnt;
5002 }
5003
5004 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5005 {
5006         struct trace_array *tr = inode->i_private;
5007         int ret;
5008
5009         ret = tracing_check_open_get_tr(tr);
5010         if (ret)
5011                 return ret;
5012
5013         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5014         if (ret < 0)
5015                 trace_array_put(tr);
5016
5017         return ret;
5018 }
5019
5020 static const struct file_operations tracing_iter_fops = {
5021         .open           = tracing_trace_options_open,
5022         .read           = seq_read,
5023         .llseek         = seq_lseek,
5024         .release        = tracing_single_release_tr,
5025         .write          = tracing_trace_options_write,
5026 };
5027
5028 static const char readme_msg[] =
5029         "tracing mini-HOWTO:\n\n"
5030         "# echo 0 > tracing_on : quick way to disable tracing\n"
5031         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5032         " Important files:\n"
5033         "  trace\t\t\t- The static contents of the buffer\n"
5034         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5035         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5036         "  current_tracer\t- function and latency tracers\n"
5037         "  available_tracers\t- list of configured tracers for current_tracer\n"
5038         "  error_log\t- error log for failed commands (that support it)\n"
5039         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5040         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5041         "  trace_clock\t\t-change the clock used to order events\n"
5042         "       local:   Per cpu clock but may not be synced across CPUs\n"
5043         "      global:   Synced across CPUs but slows tracing down.\n"
5044         "     counter:   Not a clock, but just an increment\n"
5045         "      uptime:   Jiffy counter from time of boot\n"
5046         "        perf:   Same clock that perf events use\n"
5047 #ifdef CONFIG_X86_64
5048         "     x86-tsc:   TSC cycle counter\n"
5049 #endif
5050         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5051         "       delta:   Delta difference against a buffer-wide timestamp\n"
5052         "    absolute:   Absolute (standalone) timestamp\n"
5053         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5054         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5055         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5056         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5057         "\t\t\t  Remove sub-buffer with rmdir\n"
5058         "  trace_options\t\t- Set format or modify how tracing happens\n"
5059         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5060         "\t\t\t  option name\n"
5061         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5062 #ifdef CONFIG_DYNAMIC_FTRACE
5063         "\n  available_filter_functions - list of functions that can be filtered on\n"
5064         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5065         "\t\t\t  functions\n"
5066         "\t     accepts: func_full_name or glob-matching-pattern\n"
5067         "\t     modules: Can select a group via module\n"
5068         "\t      Format: :mod:<module-name>\n"
5069         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5070         "\t    triggers: a command to perform when function is hit\n"
5071         "\t      Format: <function>:<trigger>[:count]\n"
5072         "\t     trigger: traceon, traceoff\n"
5073         "\t\t      enable_event:<system>:<event>\n"
5074         "\t\t      disable_event:<system>:<event>\n"
5075 #ifdef CONFIG_STACKTRACE
5076         "\t\t      stacktrace\n"
5077 #endif
5078 #ifdef CONFIG_TRACER_SNAPSHOT
5079         "\t\t      snapshot\n"
5080 #endif
5081         "\t\t      dump\n"
5082         "\t\t      cpudump\n"
5083         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5084         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5085         "\t     The first one will disable tracing every time do_fault is hit\n"
5086         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5087         "\t       The first time do trap is hit and it disables tracing, the\n"
5088         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5089         "\t       the counter will not decrement. It only decrements when the\n"
5090         "\t       trigger did work\n"
5091         "\t     To remove trigger without count:\n"
5092         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5093         "\t     To remove trigger with a count:\n"
5094         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5095         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5096         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5097         "\t    modules: Can select a group via module command :mod:\n"
5098         "\t    Does not accept triggers\n"
5099 #endif /* CONFIG_DYNAMIC_FTRACE */
5100 #ifdef CONFIG_FUNCTION_TRACER
5101         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5102         "\t\t    (function)\n"
5103         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5104         "\t\t    (function)\n"
5105 #endif
5106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5107         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5108         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5109         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5110 #endif
5111 #ifdef CONFIG_TRACER_SNAPSHOT
5112         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5113         "\t\t\t  snapshot buffer. Read the contents for more\n"
5114         "\t\t\t  information\n"
5115 #endif
5116 #ifdef CONFIG_STACK_TRACER
5117         "  stack_trace\t\t- Shows the max stack trace when active\n"
5118         "  stack_max_size\t- Shows current max stack size that was traced\n"
5119         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5120         "\t\t\t  new trace)\n"
5121 #ifdef CONFIG_DYNAMIC_FTRACE
5122         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5123         "\t\t\t  traces\n"
5124 #endif
5125 #endif /* CONFIG_STACK_TRACER */
5126 #ifdef CONFIG_DYNAMIC_EVENTS
5127         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5128         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5129 #endif
5130 #ifdef CONFIG_KPROBE_EVENTS
5131         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5132         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5133 #endif
5134 #ifdef CONFIG_UPROBE_EVENTS
5135         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5136         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5137 #endif
5138 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5139         "\t  accepts: event-definitions (one definition per line)\n"
5140         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5141         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5142 #ifdef CONFIG_HIST_TRIGGERS
5143         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5144 #endif
5145         "\t           -:[<group>/]<event>\n"
5146 #ifdef CONFIG_KPROBE_EVENTS
5147         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5148   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5149 #endif
5150 #ifdef CONFIG_UPROBE_EVENTS
5151   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5152 #endif
5153         "\t     args: <name>=fetcharg[:type]\n"
5154         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5155 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5156         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5157 #else
5158         "\t           $stack<index>, $stack, $retval, $comm,\n"
5159 #endif
5160         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5161         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5162         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5163         "\t           <type>\\[<array-size>\\]\n"
5164 #ifdef CONFIG_HIST_TRIGGERS
5165         "\t    field: <stype> <name>;\n"
5166         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5167         "\t           [unsigned] char/int/long\n"
5168 #endif
5169 #endif
5170         "  events/\t\t- Directory containing all trace event subsystems:\n"
5171         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5172         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5173         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5174         "\t\t\t  events\n"
5175         "      filter\t\t- If set, only events passing filter are traced\n"
5176         "  events/<system>/<event>/\t- Directory containing control files for\n"
5177         "\t\t\t  <event>:\n"
5178         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5179         "      filter\t\t- If set, only events passing filter are traced\n"
5180         "      trigger\t\t- If set, a command to perform when event is hit\n"
5181         "\t    Format: <trigger>[:count][if <filter>]\n"
5182         "\t   trigger: traceon, traceoff\n"
5183         "\t            enable_event:<system>:<event>\n"
5184         "\t            disable_event:<system>:<event>\n"
5185 #ifdef CONFIG_HIST_TRIGGERS
5186         "\t            enable_hist:<system>:<event>\n"
5187         "\t            disable_hist:<system>:<event>\n"
5188 #endif
5189 #ifdef CONFIG_STACKTRACE
5190         "\t\t    stacktrace\n"
5191 #endif
5192 #ifdef CONFIG_TRACER_SNAPSHOT
5193         "\t\t    snapshot\n"
5194 #endif
5195 #ifdef CONFIG_HIST_TRIGGERS
5196         "\t\t    hist (see below)\n"
5197 #endif
5198         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5199         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5200         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5201         "\t                  events/block/block_unplug/trigger\n"
5202         "\t   The first disables tracing every time block_unplug is hit.\n"
5203         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5204         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5205         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5206         "\t   Like function triggers, the counter is only decremented if it\n"
5207         "\t    enabled or disabled tracing.\n"
5208         "\t   To remove a trigger without a count:\n"
5209         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5210         "\t   To remove a trigger with a count:\n"
5211         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5212         "\t   Filters can be ignored when removing a trigger.\n"
5213 #ifdef CONFIG_HIST_TRIGGERS
5214         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5215         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5216         "\t            [:values=<field1[,field2,...]>]\n"
5217         "\t            [:sort=<field1[,field2,...]>]\n"
5218         "\t            [:size=#entries]\n"
5219         "\t            [:pause][:continue][:clear]\n"
5220         "\t            [:name=histname1]\n"
5221         "\t            [:<handler>.<action>]\n"
5222         "\t            [if <filter>]\n\n"
5223         "\t    When a matching event is hit, an entry is added to a hash\n"
5224         "\t    table using the key(s) and value(s) named, and the value of a\n"
5225         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5226         "\t    correspond to fields in the event's format description.  Keys\n"
5227         "\t    can be any field, or the special string 'stacktrace'.\n"
5228         "\t    Compound keys consisting of up to two fields can be specified\n"
5229         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5230         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5231         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5232         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5233         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5234         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5235         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5236         "\t    its histogram data will be shared with other triggers of the\n"
5237         "\t    same name, and trigger hits will update this common data.\n\n"
5238         "\t    Reading the 'hist' file for the event will dump the hash\n"
5239         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5240         "\t    triggers attached to an event, there will be a table for each\n"
5241         "\t    trigger in the output.  The table displayed for a named\n"
5242         "\t    trigger will be the same as any other instance having the\n"
5243         "\t    same name.  The default format used to display a given field\n"
5244         "\t    can be modified by appending any of the following modifiers\n"
5245         "\t    to the field name, as applicable:\n\n"
5246         "\t            .hex        display a number as a hex value\n"
5247         "\t            .sym        display an address as a symbol\n"
5248         "\t            .sym-offset display an address as a symbol and offset\n"
5249         "\t            .execname   display a common_pid as a program name\n"
5250         "\t            .syscall    display a syscall id as a syscall name\n"
5251         "\t            .log2       display log2 value rather than raw number\n"
5252         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5253         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5254         "\t    trigger or to start a hist trigger but not log any events\n"
5255         "\t    until told to do so.  'continue' can be used to start or\n"
5256         "\t    restart a paused hist trigger.\n\n"
5257         "\t    The 'clear' parameter will clear the contents of a running\n"
5258         "\t    hist trigger and leave its current paused/active state\n"
5259         "\t    unchanged.\n\n"
5260         "\t    The enable_hist and disable_hist triggers can be used to\n"
5261         "\t    have one event conditionally start and stop another event's\n"
5262         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5263         "\t    the enable_event and disable_event triggers.\n\n"
5264         "\t    Hist trigger handlers and actions are executed whenever a\n"
5265         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5266         "\t        <handler>.<action>\n\n"
5267         "\t    The available handlers are:\n\n"
5268         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5269         "\t        onmax(var)               - invoke if var exceeds current max\n"
5270         "\t        onchange(var)            - invoke action if var changes\n\n"
5271         "\t    The available actions are:\n\n"
5272         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5273         "\t        save(field,...)                      - save current event fields\n"
5274 #ifdef CONFIG_TRACER_SNAPSHOT
5275         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5276 #endif
5277 #ifdef CONFIG_SYNTH_EVENTS
5278         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5279         "\t  Write into this file to define/undefine new synthetic events.\n"
5280         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5281 #endif
5282 #endif
5283 ;
5284
5285 static ssize_t
5286 tracing_readme_read(struct file *filp, char __user *ubuf,
5287                        size_t cnt, loff_t *ppos)
5288 {
5289         return simple_read_from_buffer(ubuf, cnt, ppos,
5290                                         readme_msg, strlen(readme_msg));
5291 }
5292
5293 static const struct file_operations tracing_readme_fops = {
5294         .open           = tracing_open_generic,
5295         .read           = tracing_readme_read,
5296         .llseek         = generic_file_llseek,
5297 };
5298
5299 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5300 {
5301         int *ptr = v;
5302
5303         if (*pos || m->count)
5304                 ptr++;
5305
5306         (*pos)++;
5307
5308         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5309                 if (trace_find_tgid(*ptr))
5310                         return ptr;
5311         }
5312
5313         return NULL;
5314 }
5315
5316 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5317 {
5318         void *v;
5319         loff_t l = 0;
5320
5321         if (!tgid_map)
5322                 return NULL;
5323
5324         v = &tgid_map[0];
5325         while (l <= *pos) {
5326                 v = saved_tgids_next(m, v, &l);
5327                 if (!v)
5328                         return NULL;
5329         }
5330
5331         return v;
5332 }
5333
5334 static void saved_tgids_stop(struct seq_file *m, void *v)
5335 {
5336 }
5337
5338 static int saved_tgids_show(struct seq_file *m, void *v)
5339 {
5340         int pid = (int *)v - tgid_map;
5341
5342         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5343         return 0;
5344 }
5345
5346 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5347         .start          = saved_tgids_start,
5348         .stop           = saved_tgids_stop,
5349         .next           = saved_tgids_next,
5350         .show           = saved_tgids_show,
5351 };
5352
5353 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5354 {
5355         int ret;
5356
5357         ret = tracing_check_open_get_tr(NULL);
5358         if (ret)
5359                 return ret;
5360
5361         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5362 }
5363
5364
5365 static const struct file_operations tracing_saved_tgids_fops = {
5366         .open           = tracing_saved_tgids_open,
5367         .read           = seq_read,
5368         .llseek         = seq_lseek,
5369         .release        = seq_release,
5370 };
5371
5372 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5373 {
5374         unsigned int *ptr = v;
5375
5376         if (*pos || m->count)
5377                 ptr++;
5378
5379         (*pos)++;
5380
5381         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5382              ptr++) {
5383                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5384                         continue;
5385
5386                 return ptr;
5387         }
5388
5389         return NULL;
5390 }
5391
5392 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5393 {
5394         void *v;
5395         loff_t l = 0;
5396
5397         preempt_disable();
5398         arch_spin_lock(&trace_cmdline_lock);
5399
5400         v = &savedcmd->map_cmdline_to_pid[0];
5401         while (l <= *pos) {
5402                 v = saved_cmdlines_next(m, v, &l);
5403                 if (!v)
5404                         return NULL;
5405         }
5406
5407         return v;
5408 }
5409
5410 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5411 {
5412         arch_spin_unlock(&trace_cmdline_lock);
5413         preempt_enable();
5414 }
5415
5416 static int saved_cmdlines_show(struct seq_file *m, void *v)
5417 {
5418         char buf[TASK_COMM_LEN];
5419         unsigned int *pid = v;
5420
5421         __trace_find_cmdline(*pid, buf);
5422         seq_printf(m, "%d %s\n", *pid, buf);
5423         return 0;
5424 }
5425
5426 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5427         .start          = saved_cmdlines_start,
5428         .next           = saved_cmdlines_next,
5429         .stop           = saved_cmdlines_stop,
5430         .show           = saved_cmdlines_show,
5431 };
5432
5433 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5434 {
5435         int ret;
5436
5437         ret = tracing_check_open_get_tr(NULL);
5438         if (ret)
5439                 return ret;
5440
5441         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5442 }
5443
5444 static const struct file_operations tracing_saved_cmdlines_fops = {
5445         .open           = tracing_saved_cmdlines_open,
5446         .read           = seq_read,
5447         .llseek         = seq_lseek,
5448         .release        = seq_release,
5449 };
5450
5451 static ssize_t
5452 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5453                                  size_t cnt, loff_t *ppos)
5454 {
5455         char buf[64];
5456         int r;
5457
5458         arch_spin_lock(&trace_cmdline_lock);
5459         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5460         arch_spin_unlock(&trace_cmdline_lock);
5461
5462         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5463 }
5464
5465 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5466 {
5467         kfree(s->saved_cmdlines);
5468         kfree(s->map_cmdline_to_pid);
5469         kfree(s);
5470 }
5471
5472 static int tracing_resize_saved_cmdlines(unsigned int val)
5473 {
5474         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5475
5476         s = kmalloc(sizeof(*s), GFP_KERNEL);
5477         if (!s)
5478                 return -ENOMEM;
5479
5480         if (allocate_cmdlines_buffer(val, s) < 0) {
5481                 kfree(s);
5482                 return -ENOMEM;
5483         }
5484
5485         arch_spin_lock(&trace_cmdline_lock);
5486         savedcmd_temp = savedcmd;
5487         savedcmd = s;
5488         arch_spin_unlock(&trace_cmdline_lock);
5489         free_saved_cmdlines_buffer(savedcmd_temp);
5490
5491         return 0;
5492 }
5493
5494 static ssize_t
5495 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5496                                   size_t cnt, loff_t *ppos)
5497 {
5498         unsigned long val;
5499         int ret;
5500
5501         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5502         if (ret)
5503                 return ret;
5504
5505         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5506         if (!val || val > PID_MAX_DEFAULT)
5507                 return -EINVAL;
5508
5509         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5510         if (ret < 0)
5511                 return ret;
5512
5513         *ppos += cnt;
5514
5515         return cnt;
5516 }
5517
5518 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5519         .open           = tracing_open_generic,
5520         .read           = tracing_saved_cmdlines_size_read,
5521         .write          = tracing_saved_cmdlines_size_write,
5522 };
5523
5524 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5525 static union trace_eval_map_item *
5526 update_eval_map(union trace_eval_map_item *ptr)
5527 {
5528         if (!ptr->map.eval_string) {
5529                 if (ptr->tail.next) {
5530                         ptr = ptr->tail.next;
5531                         /* Set ptr to the next real item (skip head) */
5532                         ptr++;
5533                 } else
5534                         return NULL;
5535         }
5536         return ptr;
5537 }
5538
5539 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5540 {
5541         union trace_eval_map_item *ptr = v;
5542
5543         /*
5544          * Paranoid! If ptr points to end, we don't want to increment past it.
5545          * This really should never happen.
5546          */
5547         (*pos)++;
5548         ptr = update_eval_map(ptr);
5549         if (WARN_ON_ONCE(!ptr))
5550                 return NULL;
5551
5552         ptr++;
5553         ptr = update_eval_map(ptr);
5554
5555         return ptr;
5556 }
5557
5558 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5559 {
5560         union trace_eval_map_item *v;
5561         loff_t l = 0;
5562
5563         mutex_lock(&trace_eval_mutex);
5564
5565         v = trace_eval_maps;
5566         if (v)
5567                 v++;
5568
5569         while (v && l < *pos) {
5570                 v = eval_map_next(m, v, &l);
5571         }
5572
5573         return v;
5574 }
5575
5576 static void eval_map_stop(struct seq_file *m, void *v)
5577 {
5578         mutex_unlock(&trace_eval_mutex);
5579 }
5580
5581 static int eval_map_show(struct seq_file *m, void *v)
5582 {
5583         union trace_eval_map_item *ptr = v;
5584
5585         seq_printf(m, "%s %ld (%s)\n",
5586                    ptr->map.eval_string, ptr->map.eval_value,
5587                    ptr->map.system);
5588
5589         return 0;
5590 }
5591
5592 static const struct seq_operations tracing_eval_map_seq_ops = {
5593         .start          = eval_map_start,
5594         .next           = eval_map_next,
5595         .stop           = eval_map_stop,
5596         .show           = eval_map_show,
5597 };
5598
5599 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5600 {
5601         int ret;
5602
5603         ret = tracing_check_open_get_tr(NULL);
5604         if (ret)
5605                 return ret;
5606
5607         return seq_open(filp, &tracing_eval_map_seq_ops);
5608 }
5609
5610 static const struct file_operations tracing_eval_map_fops = {
5611         .open           = tracing_eval_map_open,
5612         .read           = seq_read,
5613         .llseek         = seq_lseek,
5614         .release        = seq_release,
5615 };
5616
5617 static inline union trace_eval_map_item *
5618 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5619 {
5620         /* Return tail of array given the head */
5621         return ptr + ptr->head.length + 1;
5622 }
5623
5624 static void
5625 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5626                            int len)
5627 {
5628         struct trace_eval_map **stop;
5629         struct trace_eval_map **map;
5630         union trace_eval_map_item *map_array;
5631         union trace_eval_map_item *ptr;
5632
5633         stop = start + len;
5634
5635         /*
5636          * The trace_eval_maps contains the map plus a head and tail item,
5637          * where the head holds the module and length of array, and the
5638          * tail holds a pointer to the next list.
5639          */
5640         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5641         if (!map_array) {
5642                 pr_warn("Unable to allocate trace eval mapping\n");
5643                 return;
5644         }
5645
5646         mutex_lock(&trace_eval_mutex);
5647
5648         if (!trace_eval_maps)
5649                 trace_eval_maps = map_array;
5650         else {
5651                 ptr = trace_eval_maps;
5652                 for (;;) {
5653                         ptr = trace_eval_jmp_to_tail(ptr);
5654                         if (!ptr->tail.next)
5655                                 break;
5656                         ptr = ptr->tail.next;
5657
5658                 }
5659                 ptr->tail.next = map_array;
5660         }
5661         map_array->head.mod = mod;
5662         map_array->head.length = len;
5663         map_array++;
5664
5665         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5666                 map_array->map = **map;
5667                 map_array++;
5668         }
5669         memset(map_array, 0, sizeof(*map_array));
5670
5671         mutex_unlock(&trace_eval_mutex);
5672 }
5673
5674 static void trace_create_eval_file(struct dentry *d_tracer)
5675 {
5676         trace_create_file("eval_map", 0444, d_tracer,
5677                           NULL, &tracing_eval_map_fops);
5678 }
5679
5680 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5681 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5682 static inline void trace_insert_eval_map_file(struct module *mod,
5683                               struct trace_eval_map **start, int len) { }
5684 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5685
5686 static void trace_insert_eval_map(struct module *mod,
5687                                   struct trace_eval_map **start, int len)
5688 {
5689         struct trace_eval_map **map;
5690
5691         if (len <= 0)
5692                 return;
5693
5694         map = start;
5695
5696         trace_event_eval_update(map, len);
5697
5698         trace_insert_eval_map_file(mod, start, len);
5699 }
5700
5701 static ssize_t
5702 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5703                        size_t cnt, loff_t *ppos)
5704 {
5705         struct trace_array *tr = filp->private_data;
5706         char buf[MAX_TRACER_SIZE+2];
5707         int r;
5708
5709         mutex_lock(&trace_types_lock);
5710         r = sprintf(buf, "%s\n", tr->current_trace->name);
5711         mutex_unlock(&trace_types_lock);
5712
5713         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5714 }
5715
5716 int tracer_init(struct tracer *t, struct trace_array *tr)
5717 {
5718         tracing_reset_online_cpus(&tr->array_buffer);
5719         return t->init(tr);
5720 }
5721
5722 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5723 {
5724         int cpu;
5725
5726         for_each_tracing_cpu(cpu)
5727                 per_cpu_ptr(buf->data, cpu)->entries = val;
5728 }
5729
5730 #ifdef CONFIG_TRACER_MAX_TRACE
5731 /* resize @tr's buffer to the size of @size_tr's entries */
5732 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5733                                         struct array_buffer *size_buf, int cpu_id)
5734 {
5735         int cpu, ret = 0;
5736
5737         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5738                 for_each_tracing_cpu(cpu) {
5739                         ret = ring_buffer_resize(trace_buf->buffer,
5740                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5741                         if (ret < 0)
5742                                 break;
5743                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5744                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5745                 }
5746         } else {
5747                 ret = ring_buffer_resize(trace_buf->buffer,
5748                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5749                 if (ret == 0)
5750                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5751                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5752         }
5753
5754         return ret;
5755 }
5756 #endif /* CONFIG_TRACER_MAX_TRACE */
5757
5758 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5759                                         unsigned long size, int cpu)
5760 {
5761         int ret;
5762
5763         /*
5764          * If kernel or user changes the size of the ring buffer
5765          * we use the size that was given, and we can forget about
5766          * expanding it later.
5767          */
5768         ring_buffer_expanded = true;
5769
5770         /* May be called before buffers are initialized */
5771         if (!tr->array_buffer.buffer)
5772                 return 0;
5773
5774         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5775         if (ret < 0)
5776                 return ret;
5777
5778 #ifdef CONFIG_TRACER_MAX_TRACE
5779         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5780             !tr->current_trace->use_max_tr)
5781                 goto out;
5782
5783         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5784         if (ret < 0) {
5785                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5786                                                      &tr->array_buffer, cpu);
5787                 if (r < 0) {
5788                         /*
5789                          * AARGH! We are left with different
5790                          * size max buffer!!!!
5791                          * The max buffer is our "snapshot" buffer.
5792                          * When a tracer needs a snapshot (one of the
5793                          * latency tracers), it swaps the max buffer
5794                          * with the saved snap shot. We succeeded to
5795                          * update the size of the main buffer, but failed to
5796                          * update the size of the max buffer. But when we tried
5797                          * to reset the main buffer to the original size, we
5798                          * failed there too. This is very unlikely to
5799                          * happen, but if it does, warn and kill all
5800                          * tracing.
5801                          */
5802                         WARN_ON(1);
5803                         tracing_disabled = 1;
5804                 }
5805                 return ret;
5806         }
5807
5808         if (cpu == RING_BUFFER_ALL_CPUS)
5809                 set_buffer_entries(&tr->max_buffer, size);
5810         else
5811                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5812
5813  out:
5814 #endif /* CONFIG_TRACER_MAX_TRACE */
5815
5816         if (cpu == RING_BUFFER_ALL_CPUS)
5817                 set_buffer_entries(&tr->array_buffer, size);
5818         else
5819                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5820
5821         return ret;
5822 }
5823
5824 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5825                                   unsigned long size, int cpu_id)
5826 {
5827         int ret = size;
5828
5829         mutex_lock(&trace_types_lock);
5830
5831         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5832                 /* make sure, this cpu is enabled in the mask */
5833                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5834                         ret = -EINVAL;
5835                         goto out;
5836                 }
5837         }
5838
5839         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5840         if (ret < 0)
5841                 ret = -ENOMEM;
5842
5843 out:
5844         mutex_unlock(&trace_types_lock);
5845
5846         return ret;
5847 }
5848
5849
5850 /**
5851  * tracing_update_buffers - used by tracing facility to expand ring buffers
5852  *
5853  * To save on memory when the tracing is never used on a system with it
5854  * configured in. The ring buffers are set to a minimum size. But once
5855  * a user starts to use the tracing facility, then they need to grow
5856  * to their default size.
5857  *
5858  * This function is to be called when a tracer is about to be used.
5859  */
5860 int tracing_update_buffers(void)
5861 {
5862         int ret = 0;
5863
5864         mutex_lock(&trace_types_lock);
5865         if (!ring_buffer_expanded)
5866                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5867                                                 RING_BUFFER_ALL_CPUS);
5868         mutex_unlock(&trace_types_lock);
5869
5870         return ret;
5871 }
5872
5873 struct trace_option_dentry;
5874
5875 static void
5876 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5877
5878 /*
5879  * Used to clear out the tracer before deletion of an instance.
5880  * Must have trace_types_lock held.
5881  */
5882 static void tracing_set_nop(struct trace_array *tr)
5883 {
5884         if (tr->current_trace == &nop_trace)
5885                 return;
5886         
5887         tr->current_trace->enabled--;
5888
5889         if (tr->current_trace->reset)
5890                 tr->current_trace->reset(tr);
5891
5892         tr->current_trace = &nop_trace;
5893 }
5894
5895 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5896 {
5897         /* Only enable if the directory has been created already. */
5898         if (!tr->dir)
5899                 return;
5900
5901         create_trace_option_files(tr, t);
5902 }
5903
5904 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5905 {
5906         struct tracer *t;
5907 #ifdef CONFIG_TRACER_MAX_TRACE
5908         bool had_max_tr;
5909 #endif
5910         int ret = 0;
5911
5912         mutex_lock(&trace_types_lock);
5913
5914         if (!ring_buffer_expanded) {
5915                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5916                                                 RING_BUFFER_ALL_CPUS);
5917                 if (ret < 0)
5918                         goto out;
5919                 ret = 0;
5920         }
5921
5922         for (t = trace_types; t; t = t->next) {
5923                 if (strcmp(t->name, buf) == 0)
5924                         break;
5925         }
5926         if (!t) {
5927                 ret = -EINVAL;
5928                 goto out;
5929         }
5930         if (t == tr->current_trace)
5931                 goto out;
5932
5933 #ifdef CONFIG_TRACER_SNAPSHOT
5934         if (t->use_max_tr) {
5935                 arch_spin_lock(&tr->max_lock);
5936                 if (tr->cond_snapshot)
5937                         ret = -EBUSY;
5938                 arch_spin_unlock(&tr->max_lock);
5939                 if (ret)
5940                         goto out;
5941         }
5942 #endif
5943         /* Some tracers won't work on kernel command line */
5944         if (system_state < SYSTEM_RUNNING && t->noboot) {
5945                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5946                         t->name);
5947                 goto out;
5948         }
5949
5950         /* Some tracers are only allowed for the top level buffer */
5951         if (!trace_ok_for_array(t, tr)) {
5952                 ret = -EINVAL;
5953                 goto out;
5954         }
5955
5956         /* If trace pipe files are being read, we can't change the tracer */
5957         if (tr->trace_ref) {
5958                 ret = -EBUSY;
5959                 goto out;
5960         }
5961
5962         trace_branch_disable();
5963
5964         tr->current_trace->enabled--;
5965
5966         if (tr->current_trace->reset)
5967                 tr->current_trace->reset(tr);
5968
5969         /* Current trace needs to be nop_trace before synchronize_rcu */
5970         tr->current_trace = &nop_trace;
5971
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5973         had_max_tr = tr->allocated_snapshot;
5974
5975         if (had_max_tr && !t->use_max_tr) {
5976                 /*
5977                  * We need to make sure that the update_max_tr sees that
5978                  * current_trace changed to nop_trace to keep it from
5979                  * swapping the buffers after we resize it.
5980                  * The update_max_tr is called from interrupts disabled
5981                  * so a synchronized_sched() is sufficient.
5982                  */
5983                 synchronize_rcu();
5984                 free_snapshot(tr);
5985         }
5986 #endif
5987
5988 #ifdef CONFIG_TRACER_MAX_TRACE
5989         if (t->use_max_tr && !had_max_tr) {
5990                 ret = tracing_alloc_snapshot_instance(tr);
5991                 if (ret < 0)
5992                         goto out;
5993         }
5994 #endif
5995
5996         if (t->init) {
5997                 ret = tracer_init(t, tr);
5998                 if (ret)
5999                         goto out;
6000         }
6001
6002         tr->current_trace = t;
6003         tr->current_trace->enabled++;
6004         trace_branch_enable(tr);
6005  out:
6006         mutex_unlock(&trace_types_lock);
6007
6008         return ret;
6009 }
6010
6011 static ssize_t
6012 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6013                         size_t cnt, loff_t *ppos)
6014 {
6015         struct trace_array *tr = filp->private_data;
6016         char buf[MAX_TRACER_SIZE+1];
6017         int i;
6018         size_t ret;
6019         int err;
6020
6021         ret = cnt;
6022
6023         if (cnt > MAX_TRACER_SIZE)
6024                 cnt = MAX_TRACER_SIZE;
6025
6026         if (copy_from_user(buf, ubuf, cnt))
6027                 return -EFAULT;
6028
6029         buf[cnt] = 0;
6030
6031         /* strip ending whitespace. */
6032         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6033                 buf[i] = 0;
6034
6035         err = tracing_set_tracer(tr, buf);
6036         if (err)
6037                 return err;
6038
6039         *ppos += ret;
6040
6041         return ret;
6042 }
6043
6044 static ssize_t
6045 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6046                    size_t cnt, loff_t *ppos)
6047 {
6048         char buf[64];
6049         int r;
6050
6051         r = snprintf(buf, sizeof(buf), "%ld\n",
6052                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6053         if (r > sizeof(buf))
6054                 r = sizeof(buf);
6055         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6056 }
6057
6058 static ssize_t
6059 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6060                     size_t cnt, loff_t *ppos)
6061 {
6062         unsigned long val;
6063         int ret;
6064
6065         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6066         if (ret)
6067                 return ret;
6068
6069         *ptr = val * 1000;
6070
6071         return cnt;
6072 }
6073
6074 static ssize_t
6075 tracing_thresh_read(struct file *filp, char __user *ubuf,
6076                     size_t cnt, loff_t *ppos)
6077 {
6078         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6079 }
6080
6081 static ssize_t
6082 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6083                      size_t cnt, loff_t *ppos)
6084 {
6085         struct trace_array *tr = filp->private_data;
6086         int ret;
6087
6088         mutex_lock(&trace_types_lock);
6089         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6090         if (ret < 0)
6091                 goto out;
6092
6093         if (tr->current_trace->update_thresh) {
6094                 ret = tr->current_trace->update_thresh(tr);
6095                 if (ret < 0)
6096                         goto out;
6097         }
6098
6099         ret = cnt;
6100 out:
6101         mutex_unlock(&trace_types_lock);
6102
6103         return ret;
6104 }
6105
6106 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6107
6108 static ssize_t
6109 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6110                      size_t cnt, loff_t *ppos)
6111 {
6112         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6113 }
6114
6115 static ssize_t
6116 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6117                       size_t cnt, loff_t *ppos)
6118 {
6119         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6120 }
6121
6122 #endif
6123
6124 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6125 {
6126         struct trace_array *tr = inode->i_private;
6127         struct trace_iterator *iter;
6128         int ret;
6129
6130         ret = tracing_check_open_get_tr(tr);
6131         if (ret)
6132                 return ret;
6133
6134         mutex_lock(&trace_types_lock);
6135
6136         /* create a buffer to store the information to pass to userspace */
6137         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6138         if (!iter) {
6139                 ret = -ENOMEM;
6140                 __trace_array_put(tr);
6141                 goto out;
6142         }
6143
6144         trace_seq_init(&iter->seq);
6145         iter->trace = tr->current_trace;
6146
6147         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6148                 ret = -ENOMEM;
6149                 goto fail;
6150         }
6151
6152         /* trace pipe does not show start of buffer */
6153         cpumask_setall(iter->started);
6154
6155         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6156                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6157
6158         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6159         if (trace_clocks[tr->clock_id].in_ns)
6160                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6161
6162         iter->tr = tr;
6163         iter->array_buffer = &tr->array_buffer;
6164         iter->cpu_file = tracing_get_cpu(inode);
6165         mutex_init(&iter->mutex);
6166         filp->private_data = iter;
6167
6168         if (iter->trace->pipe_open)
6169                 iter->trace->pipe_open(iter);
6170
6171         nonseekable_open(inode, filp);
6172
6173         tr->trace_ref++;
6174 out:
6175         mutex_unlock(&trace_types_lock);
6176         return ret;
6177
6178 fail:
6179         kfree(iter);
6180         __trace_array_put(tr);
6181         mutex_unlock(&trace_types_lock);
6182         return ret;
6183 }
6184
6185 static int tracing_release_pipe(struct inode *inode, struct file *file)
6186 {
6187         struct trace_iterator *iter = file->private_data;
6188         struct trace_array *tr = inode->i_private;
6189
6190         mutex_lock(&trace_types_lock);
6191
6192         tr->trace_ref--;
6193
6194         if (iter->trace->pipe_close)
6195                 iter->trace->pipe_close(iter);
6196
6197         mutex_unlock(&trace_types_lock);
6198
6199         free_cpumask_var(iter->started);
6200         mutex_destroy(&iter->mutex);
6201         kfree(iter);
6202
6203         trace_array_put(tr);
6204
6205         return 0;
6206 }
6207
6208 static __poll_t
6209 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6210 {
6211         struct trace_array *tr = iter->tr;
6212
6213         /* Iterators are static, they should be filled or empty */
6214         if (trace_buffer_iter(iter, iter->cpu_file))
6215                 return EPOLLIN | EPOLLRDNORM;
6216
6217         if (tr->trace_flags & TRACE_ITER_BLOCK)
6218                 /*
6219                  * Always select as readable when in blocking mode
6220                  */
6221                 return EPOLLIN | EPOLLRDNORM;
6222         else
6223                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6224                                              filp, poll_table);
6225 }
6226
6227 static __poll_t
6228 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6229 {
6230         struct trace_iterator *iter = filp->private_data;
6231
6232         return trace_poll(iter, filp, poll_table);
6233 }
6234
6235 /* Must be called with iter->mutex held. */
6236 static int tracing_wait_pipe(struct file *filp)
6237 {
6238         struct trace_iterator *iter = filp->private_data;
6239         int ret;
6240
6241         while (trace_empty(iter)) {
6242
6243                 if ((filp->f_flags & O_NONBLOCK)) {
6244                         return -EAGAIN;
6245                 }
6246
6247                 /*
6248                  * We block until we read something and tracing is disabled.
6249                  * We still block if tracing is disabled, but we have never
6250                  * read anything. This allows a user to cat this file, and
6251                  * then enable tracing. But after we have read something,
6252                  * we give an EOF when tracing is again disabled.
6253                  *
6254                  * iter->pos will be 0 if we haven't read anything.
6255                  */
6256                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6257                         break;
6258
6259                 mutex_unlock(&iter->mutex);
6260
6261                 ret = wait_on_pipe(iter, 0);
6262
6263                 mutex_lock(&iter->mutex);
6264
6265                 if (ret)
6266                         return ret;
6267         }
6268
6269         return 1;
6270 }
6271
6272 /*
6273  * Consumer reader.
6274  */
6275 static ssize_t
6276 tracing_read_pipe(struct file *filp, char __user *ubuf,
6277                   size_t cnt, loff_t *ppos)
6278 {
6279         struct trace_iterator *iter = filp->private_data;
6280         ssize_t sret;
6281
6282         /*
6283          * Avoid more than one consumer on a single file descriptor
6284          * This is just a matter of traces coherency, the ring buffer itself
6285          * is protected.
6286          */
6287         mutex_lock(&iter->mutex);
6288
6289         /* return any leftover data */
6290         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6291         if (sret != -EBUSY)
6292                 goto out;
6293
6294         trace_seq_init(&iter->seq);
6295
6296         if (iter->trace->read) {
6297                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6298                 if (sret)
6299                         goto out;
6300         }
6301
6302 waitagain:
6303         sret = tracing_wait_pipe(filp);
6304         if (sret <= 0)
6305                 goto out;
6306
6307         /* stop when tracing is finished */
6308         if (trace_empty(iter)) {
6309                 sret = 0;
6310                 goto out;
6311         }
6312
6313         if (cnt >= PAGE_SIZE)
6314                 cnt = PAGE_SIZE - 1;
6315
6316         /* reset all but tr, trace, and overruns */
6317         memset(&iter->seq, 0,
6318                sizeof(struct trace_iterator) -
6319                offsetof(struct trace_iterator, seq));
6320         cpumask_clear(iter->started);
6321         trace_seq_init(&iter->seq);
6322         iter->pos = -1;
6323
6324         trace_event_read_lock();
6325         trace_access_lock(iter->cpu_file);
6326         while (trace_find_next_entry_inc(iter) != NULL) {
6327                 enum print_line_t ret;
6328                 int save_len = iter->seq.seq.len;
6329
6330                 ret = print_trace_line(iter);
6331                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6332                         /* don't print partial lines */
6333                         iter->seq.seq.len = save_len;
6334                         break;
6335                 }
6336                 if (ret != TRACE_TYPE_NO_CONSUME)
6337                         trace_consume(iter);
6338
6339                 if (trace_seq_used(&iter->seq) >= cnt)
6340                         break;
6341
6342                 /*
6343                  * Setting the full flag means we reached the trace_seq buffer
6344                  * size and we should leave by partial output condition above.
6345                  * One of the trace_seq_* functions is not used properly.
6346                  */
6347                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6348                           iter->ent->type);
6349         }
6350         trace_access_unlock(iter->cpu_file);
6351         trace_event_read_unlock();
6352
6353         /* Now copy what we have to the user */
6354         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6355         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6356                 trace_seq_init(&iter->seq);
6357
6358         /*
6359          * If there was nothing to send to user, in spite of consuming trace
6360          * entries, go back to wait for more entries.
6361          */
6362         if (sret == -EBUSY)
6363                 goto waitagain;
6364
6365 out:
6366         mutex_unlock(&iter->mutex);
6367
6368         return sret;
6369 }
6370
6371 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6372                                      unsigned int idx)
6373 {
6374         __free_page(spd->pages[idx]);
6375 }
6376
6377 static size_t
6378 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6379 {
6380         size_t count;
6381         int save_len;
6382         int ret;
6383
6384         /* Seq buffer is page-sized, exactly what we need. */
6385         for (;;) {
6386                 save_len = iter->seq.seq.len;
6387                 ret = print_trace_line(iter);
6388
6389                 if (trace_seq_has_overflowed(&iter->seq)) {
6390                         iter->seq.seq.len = save_len;
6391                         break;
6392                 }
6393
6394                 /*
6395                  * This should not be hit, because it should only
6396                  * be set if the iter->seq overflowed. But check it
6397                  * anyway to be safe.
6398                  */
6399                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6400                         iter->seq.seq.len = save_len;
6401                         break;
6402                 }
6403
6404                 count = trace_seq_used(&iter->seq) - save_len;
6405                 if (rem < count) {
6406                         rem = 0;
6407                         iter->seq.seq.len = save_len;
6408                         break;
6409                 }
6410
6411                 if (ret != TRACE_TYPE_NO_CONSUME)
6412                         trace_consume(iter);
6413                 rem -= count;
6414                 if (!trace_find_next_entry_inc(iter))   {
6415                         rem = 0;
6416                         iter->ent = NULL;
6417                         break;
6418                 }
6419         }
6420
6421         return rem;
6422 }
6423
6424 static ssize_t tracing_splice_read_pipe(struct file *filp,
6425                                         loff_t *ppos,
6426                                         struct pipe_inode_info *pipe,
6427                                         size_t len,
6428                                         unsigned int flags)
6429 {
6430         struct page *pages_def[PIPE_DEF_BUFFERS];
6431         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6432         struct trace_iterator *iter = filp->private_data;
6433         struct splice_pipe_desc spd = {
6434                 .pages          = pages_def,
6435                 .partial        = partial_def,
6436                 .nr_pages       = 0, /* This gets updated below. */
6437                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6438                 .ops            = &default_pipe_buf_ops,
6439                 .spd_release    = tracing_spd_release_pipe,
6440         };
6441         ssize_t ret;
6442         size_t rem;
6443         unsigned int i;
6444
6445         if (splice_grow_spd(pipe, &spd))
6446                 return -ENOMEM;
6447
6448         mutex_lock(&iter->mutex);
6449
6450         if (iter->trace->splice_read) {
6451                 ret = iter->trace->splice_read(iter, filp,
6452                                                ppos, pipe, len, flags);
6453                 if (ret)
6454                         goto out_err;
6455         }
6456
6457         ret = tracing_wait_pipe(filp);
6458         if (ret <= 0)
6459                 goto out_err;
6460
6461         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6462                 ret = -EFAULT;
6463                 goto out_err;
6464         }
6465
6466         trace_event_read_lock();
6467         trace_access_lock(iter->cpu_file);
6468
6469         /* Fill as many pages as possible. */
6470         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6471                 spd.pages[i] = alloc_page(GFP_KERNEL);
6472                 if (!spd.pages[i])
6473                         break;
6474
6475                 rem = tracing_fill_pipe_page(rem, iter);
6476
6477                 /* Copy the data into the page, so we can start over. */
6478                 ret = trace_seq_to_buffer(&iter->seq,
6479                                           page_address(spd.pages[i]),
6480                                           trace_seq_used(&iter->seq));
6481                 if (ret < 0) {
6482                         __free_page(spd.pages[i]);
6483                         break;
6484                 }
6485                 spd.partial[i].offset = 0;
6486                 spd.partial[i].len = trace_seq_used(&iter->seq);
6487
6488                 trace_seq_init(&iter->seq);
6489         }
6490
6491         trace_access_unlock(iter->cpu_file);
6492         trace_event_read_unlock();
6493         mutex_unlock(&iter->mutex);
6494
6495         spd.nr_pages = i;
6496
6497         if (i)
6498                 ret = splice_to_pipe(pipe, &spd);
6499         else
6500                 ret = 0;
6501 out:
6502         splice_shrink_spd(&spd);
6503         return ret;
6504
6505 out_err:
6506         mutex_unlock(&iter->mutex);
6507         goto out;
6508 }
6509
6510 static ssize_t
6511 tracing_entries_read(struct file *filp, char __user *ubuf,
6512                      size_t cnt, loff_t *ppos)
6513 {
6514         struct inode *inode = file_inode(filp);
6515         struct trace_array *tr = inode->i_private;
6516         int cpu = tracing_get_cpu(inode);
6517         char buf[64];
6518         int r = 0;
6519         ssize_t ret;
6520
6521         mutex_lock(&trace_types_lock);
6522
6523         if (cpu == RING_BUFFER_ALL_CPUS) {
6524                 int cpu, buf_size_same;
6525                 unsigned long size;
6526
6527                 size = 0;
6528                 buf_size_same = 1;
6529                 /* check if all cpu sizes are same */
6530                 for_each_tracing_cpu(cpu) {
6531                         /* fill in the size from first enabled cpu */
6532                         if (size == 0)
6533                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6534                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6535                                 buf_size_same = 0;
6536                                 break;
6537                         }
6538                 }
6539
6540                 if (buf_size_same) {
6541                         if (!ring_buffer_expanded)
6542                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6543                                             size >> 10,
6544                                             trace_buf_size >> 10);
6545                         else
6546                                 r = sprintf(buf, "%lu\n", size >> 10);
6547                 } else
6548                         r = sprintf(buf, "X\n");
6549         } else
6550                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6551
6552         mutex_unlock(&trace_types_lock);
6553
6554         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555         return ret;
6556 }
6557
6558 static ssize_t
6559 tracing_entries_write(struct file *filp, const char __user *ubuf,
6560                       size_t cnt, loff_t *ppos)
6561 {
6562         struct inode *inode = file_inode(filp);
6563         struct trace_array *tr = inode->i_private;
6564         unsigned long val;
6565         int ret;
6566
6567         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6568         if (ret)
6569                 return ret;
6570
6571         /* must have at least 1 entry */
6572         if (!val)
6573                 return -EINVAL;
6574
6575         /* value is in KB */
6576         val <<= 10;
6577         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6578         if (ret < 0)
6579                 return ret;
6580
6581         *ppos += cnt;
6582
6583         return cnt;
6584 }
6585
6586 static ssize_t
6587 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6588                                 size_t cnt, loff_t *ppos)
6589 {
6590         struct trace_array *tr = filp->private_data;
6591         char buf[64];
6592         int r, cpu;
6593         unsigned long size = 0, expanded_size = 0;
6594
6595         mutex_lock(&trace_types_lock);
6596         for_each_tracing_cpu(cpu) {
6597                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6598                 if (!ring_buffer_expanded)
6599                         expanded_size += trace_buf_size >> 10;
6600         }
6601         if (ring_buffer_expanded)
6602                 r = sprintf(buf, "%lu\n", size);
6603         else
6604                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6605         mutex_unlock(&trace_types_lock);
6606
6607         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6608 }
6609
6610 static ssize_t
6611 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6612                           size_t cnt, loff_t *ppos)
6613 {
6614         /*
6615          * There is no need to read what the user has written, this function
6616          * is just to make sure that there is no error when "echo" is used
6617          */
6618
6619         *ppos += cnt;
6620
6621         return cnt;
6622 }
6623
6624 static int
6625 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6626 {
6627         struct trace_array *tr = inode->i_private;
6628
6629         /* disable tracing ? */
6630         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6631                 tracer_tracing_off(tr);
6632         /* resize the ring buffer to 0 */
6633         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6634
6635         trace_array_put(tr);
6636
6637         return 0;
6638 }
6639
6640 static ssize_t
6641 tracing_mark_write(struct file *filp, const char __user *ubuf,
6642                                         size_t cnt, loff_t *fpos)
6643 {
6644         struct trace_array *tr = filp->private_data;
6645         struct ring_buffer_event *event;
6646         enum event_trigger_type tt = ETT_NONE;
6647         struct trace_buffer *buffer;
6648         struct print_entry *entry;
6649         unsigned long irq_flags;
6650         ssize_t written;
6651         int size;
6652         int len;
6653
6654 /* Used in tracing_mark_raw_write() as well */
6655 #define FAULTED_STR "<faulted>"
6656 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6657
6658         if (tracing_disabled)
6659                 return -EINVAL;
6660
6661         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6662                 return -EINVAL;
6663
6664         if (cnt > TRACE_BUF_SIZE)
6665                 cnt = TRACE_BUF_SIZE;
6666
6667         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6668
6669         local_save_flags(irq_flags);
6670         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6671
6672         /* If less than "<faulted>", then make sure we can still add that */
6673         if (cnt < FAULTED_SIZE)
6674                 size += FAULTED_SIZE - cnt;
6675
6676         buffer = tr->array_buffer.buffer;
6677         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6678                                             irq_flags, preempt_count());
6679         if (unlikely(!event))
6680                 /* Ring buffer disabled, return as if not open for write */
6681                 return -EBADF;
6682
6683         entry = ring_buffer_event_data(event);
6684         entry->ip = _THIS_IP_;
6685
6686         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6687         if (len) {
6688                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6689                 cnt = FAULTED_SIZE;
6690                 written = -EFAULT;
6691         } else
6692                 written = cnt;
6693
6694         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6695                 /* do not add \n before testing triggers, but add \0 */
6696                 entry->buf[cnt] = '\0';
6697                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6698         }
6699
6700         if (entry->buf[cnt - 1] != '\n') {
6701                 entry->buf[cnt] = '\n';
6702                 entry->buf[cnt + 1] = '\0';
6703         } else
6704                 entry->buf[cnt] = '\0';
6705
6706         if (static_branch_unlikely(&trace_marker_exports_enabled))
6707                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6708         __buffer_unlock_commit(buffer, event);
6709
6710         if (tt)
6711                 event_triggers_post_call(tr->trace_marker_file, tt);
6712
6713         if (written > 0)
6714                 *fpos += written;
6715
6716         return written;
6717 }
6718
6719 /* Limit it for now to 3K (including tag) */
6720 #define RAW_DATA_MAX_SIZE (1024*3)
6721
6722 static ssize_t
6723 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6724                                         size_t cnt, loff_t *fpos)
6725 {
6726         struct trace_array *tr = filp->private_data;
6727         struct ring_buffer_event *event;
6728         struct trace_buffer *buffer;
6729         struct raw_data_entry *entry;
6730         unsigned long irq_flags;
6731         ssize_t written;
6732         int size;
6733         int len;
6734
6735 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6736
6737         if (tracing_disabled)
6738                 return -EINVAL;
6739
6740         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6741                 return -EINVAL;
6742
6743         /* The marker must at least have a tag id */
6744         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6745                 return -EINVAL;
6746
6747         if (cnt > TRACE_BUF_SIZE)
6748                 cnt = TRACE_BUF_SIZE;
6749
6750         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6751
6752         local_save_flags(irq_flags);
6753         size = sizeof(*entry) + cnt;
6754         if (cnt < FAULT_SIZE_ID)
6755                 size += FAULT_SIZE_ID - cnt;
6756
6757         buffer = tr->array_buffer.buffer;
6758         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6759                                             irq_flags, preempt_count());
6760         if (!event)
6761                 /* Ring buffer disabled, return as if not open for write */
6762                 return -EBADF;
6763
6764         entry = ring_buffer_event_data(event);
6765
6766         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6767         if (len) {
6768                 entry->id = -1;
6769                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6770                 written = -EFAULT;
6771         } else
6772                 written = cnt;
6773
6774         __buffer_unlock_commit(buffer, event);
6775
6776         if (written > 0)
6777                 *fpos += written;
6778
6779         return written;
6780 }
6781
6782 static int tracing_clock_show(struct seq_file *m, void *v)
6783 {
6784         struct trace_array *tr = m->private;
6785         int i;
6786
6787         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6788                 seq_printf(m,
6789                         "%s%s%s%s", i ? " " : "",
6790                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6791                         i == tr->clock_id ? "]" : "");
6792         seq_putc(m, '\n');
6793
6794         return 0;
6795 }
6796
6797 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6798 {
6799         int i;
6800
6801         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6802                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6803                         break;
6804         }
6805         if (i == ARRAY_SIZE(trace_clocks))
6806                 return -EINVAL;
6807
6808         mutex_lock(&trace_types_lock);
6809
6810         tr->clock_id = i;
6811
6812         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6813
6814         /*
6815          * New clock may not be consistent with the previous clock.
6816          * Reset the buffer so that it doesn't have incomparable timestamps.
6817          */
6818         tracing_reset_online_cpus(&tr->array_buffer);
6819
6820 #ifdef CONFIG_TRACER_MAX_TRACE
6821         if (tr->max_buffer.buffer)
6822                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6823         tracing_reset_online_cpus(&tr->max_buffer);
6824 #endif
6825
6826         mutex_unlock(&trace_types_lock);
6827
6828         return 0;
6829 }
6830
6831 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6832                                    size_t cnt, loff_t *fpos)
6833 {
6834         struct seq_file *m = filp->private_data;
6835         struct trace_array *tr = m->private;
6836         char buf[64];
6837         const char *clockstr;
6838         int ret;
6839
6840         if (cnt >= sizeof(buf))
6841                 return -EINVAL;
6842
6843         if (copy_from_user(buf, ubuf, cnt))
6844                 return -EFAULT;
6845
6846         buf[cnt] = 0;
6847
6848         clockstr = strstrip(buf);
6849
6850         ret = tracing_set_clock(tr, clockstr);
6851         if (ret)
6852                 return ret;
6853
6854         *fpos += cnt;
6855
6856         return cnt;
6857 }
6858
6859 static int tracing_clock_open(struct inode *inode, struct file *file)
6860 {
6861         struct trace_array *tr = inode->i_private;
6862         int ret;
6863
6864         ret = tracing_check_open_get_tr(tr);
6865         if (ret)
6866                 return ret;
6867
6868         ret = single_open(file, tracing_clock_show, inode->i_private);
6869         if (ret < 0)
6870                 trace_array_put(tr);
6871
6872         return ret;
6873 }
6874
6875 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6876 {
6877         struct trace_array *tr = m->private;
6878
6879         mutex_lock(&trace_types_lock);
6880
6881         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6882                 seq_puts(m, "delta [absolute]\n");
6883         else
6884                 seq_puts(m, "[delta] absolute\n");
6885
6886         mutex_unlock(&trace_types_lock);
6887
6888         return 0;
6889 }
6890
6891 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6892 {
6893         struct trace_array *tr = inode->i_private;
6894         int ret;
6895
6896         ret = tracing_check_open_get_tr(tr);
6897         if (ret)
6898                 return ret;
6899
6900         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6901         if (ret < 0)
6902                 trace_array_put(tr);
6903
6904         return ret;
6905 }
6906
6907 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6908 {
6909         int ret = 0;
6910
6911         mutex_lock(&trace_types_lock);
6912
6913         if (abs && tr->time_stamp_abs_ref++)
6914                 goto out;
6915
6916         if (!abs) {
6917                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6918                         ret = -EINVAL;
6919                         goto out;
6920                 }
6921
6922                 if (--tr->time_stamp_abs_ref)
6923                         goto out;
6924         }
6925
6926         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6927
6928 #ifdef CONFIG_TRACER_MAX_TRACE
6929         if (tr->max_buffer.buffer)
6930                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6931 #endif
6932  out:
6933         mutex_unlock(&trace_types_lock);
6934
6935         return ret;
6936 }
6937
6938 struct ftrace_buffer_info {
6939         struct trace_iterator   iter;
6940         void                    *spare;
6941         unsigned int            spare_cpu;
6942         unsigned int            read;
6943 };
6944
6945 #ifdef CONFIG_TRACER_SNAPSHOT
6946 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6947 {
6948         struct trace_array *tr = inode->i_private;
6949         struct trace_iterator *iter;
6950         struct seq_file *m;
6951         int ret;
6952
6953         ret = tracing_check_open_get_tr(tr);
6954         if (ret)
6955                 return ret;
6956
6957         if (file->f_mode & FMODE_READ) {
6958                 iter = __tracing_open(inode, file, true);
6959                 if (IS_ERR(iter))
6960                         ret = PTR_ERR(iter);
6961         } else {
6962                 /* Writes still need the seq_file to hold the private data */
6963                 ret = -ENOMEM;
6964                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6965                 if (!m)
6966                         goto out;
6967                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6968                 if (!iter) {
6969                         kfree(m);
6970                         goto out;
6971                 }
6972                 ret = 0;
6973
6974                 iter->tr = tr;
6975                 iter->array_buffer = &tr->max_buffer;
6976                 iter->cpu_file = tracing_get_cpu(inode);
6977                 m->private = iter;
6978                 file->private_data = m;
6979         }
6980 out:
6981         if (ret < 0)
6982                 trace_array_put(tr);
6983
6984         return ret;
6985 }
6986
6987 static ssize_t
6988 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6989                        loff_t *ppos)
6990 {
6991         struct seq_file *m = filp->private_data;
6992         struct trace_iterator *iter = m->private;
6993         struct trace_array *tr = iter->tr;
6994         unsigned long val;
6995         int ret;
6996
6997         ret = tracing_update_buffers();
6998         if (ret < 0)
6999                 return ret;
7000
7001         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002         if (ret)
7003                 return ret;
7004
7005         mutex_lock(&trace_types_lock);
7006
7007         if (tr->current_trace->use_max_tr) {
7008                 ret = -EBUSY;
7009                 goto out;
7010         }
7011
7012         arch_spin_lock(&tr->max_lock);
7013         if (tr->cond_snapshot)
7014                 ret = -EBUSY;
7015         arch_spin_unlock(&tr->max_lock);
7016         if (ret)
7017                 goto out;
7018
7019         switch (val) {
7020         case 0:
7021                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7022                         ret = -EINVAL;
7023                         break;
7024                 }
7025                 if (tr->allocated_snapshot)
7026                         free_snapshot(tr);
7027                 break;
7028         case 1:
7029 /* Only allow per-cpu swap if the ring buffer supports it */
7030 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7031                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7032                         ret = -EINVAL;
7033                         break;
7034                 }
7035 #endif
7036                 if (tr->allocated_snapshot)
7037                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7038                                         &tr->array_buffer, iter->cpu_file);
7039                 else
7040                         ret = tracing_alloc_snapshot_instance(tr);
7041                 if (ret < 0)
7042                         break;
7043                 local_irq_disable();
7044                 /* Now, we're going to swap */
7045                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7046                         update_max_tr(tr, current, smp_processor_id(), NULL);
7047                 else
7048                         update_max_tr_single(tr, current, iter->cpu_file);
7049                 local_irq_enable();
7050                 break;
7051         default:
7052                 if (tr->allocated_snapshot) {
7053                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7054                                 tracing_reset_online_cpus(&tr->max_buffer);
7055                         else
7056                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7057                 }
7058                 break;
7059         }
7060
7061         if (ret >= 0) {
7062                 *ppos += cnt;
7063                 ret = cnt;
7064         }
7065 out:
7066         mutex_unlock(&trace_types_lock);
7067         return ret;
7068 }
7069
7070 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7071 {
7072         struct seq_file *m = file->private_data;
7073         int ret;
7074
7075         ret = tracing_release(inode, file);
7076
7077         if (file->f_mode & FMODE_READ)
7078                 return ret;
7079
7080         /* If write only, the seq_file is just a stub */
7081         if (m)
7082                 kfree(m->private);
7083         kfree(m);
7084
7085         return 0;
7086 }
7087
7088 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7089 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7090                                     size_t count, loff_t *ppos);
7091 static int tracing_buffers_release(struct inode *inode, struct file *file);
7092 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7093                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7094
7095 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7096 {
7097         struct ftrace_buffer_info *info;
7098         int ret;
7099
7100         /* The following checks for tracefs lockdown */
7101         ret = tracing_buffers_open(inode, filp);
7102         if (ret < 0)
7103                 return ret;
7104
7105         info = filp->private_data;
7106
7107         if (info->iter.trace->use_max_tr) {
7108                 tracing_buffers_release(inode, filp);
7109                 return -EBUSY;
7110         }
7111
7112         info->iter.snapshot = true;
7113         info->iter.array_buffer = &info->iter.tr->max_buffer;
7114
7115         return ret;
7116 }
7117
7118 #endif /* CONFIG_TRACER_SNAPSHOT */
7119
7120
7121 static const struct file_operations tracing_thresh_fops = {
7122         .open           = tracing_open_generic,
7123         .read           = tracing_thresh_read,
7124         .write          = tracing_thresh_write,
7125         .llseek         = generic_file_llseek,
7126 };
7127
7128 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7129 static const struct file_operations tracing_max_lat_fops = {
7130         .open           = tracing_open_generic,
7131         .read           = tracing_max_lat_read,
7132         .write          = tracing_max_lat_write,
7133         .llseek         = generic_file_llseek,
7134 };
7135 #endif
7136
7137 static const struct file_operations set_tracer_fops = {
7138         .open           = tracing_open_generic,
7139         .read           = tracing_set_trace_read,
7140         .write          = tracing_set_trace_write,
7141         .llseek         = generic_file_llseek,
7142 };
7143
7144 static const struct file_operations tracing_pipe_fops = {
7145         .open           = tracing_open_pipe,
7146         .poll           = tracing_poll_pipe,
7147         .read           = tracing_read_pipe,
7148         .splice_read    = tracing_splice_read_pipe,
7149         .release        = tracing_release_pipe,
7150         .llseek         = no_llseek,
7151 };
7152
7153 static const struct file_operations tracing_entries_fops = {
7154         .open           = tracing_open_generic_tr,
7155         .read           = tracing_entries_read,
7156         .write          = tracing_entries_write,
7157         .llseek         = generic_file_llseek,
7158         .release        = tracing_release_generic_tr,
7159 };
7160
7161 static const struct file_operations tracing_total_entries_fops = {
7162         .open           = tracing_open_generic_tr,
7163         .read           = tracing_total_entries_read,
7164         .llseek         = generic_file_llseek,
7165         .release        = tracing_release_generic_tr,
7166 };
7167
7168 static const struct file_operations tracing_free_buffer_fops = {
7169         .open           = tracing_open_generic_tr,
7170         .write          = tracing_free_buffer_write,
7171         .release        = tracing_free_buffer_release,
7172 };
7173
7174 static const struct file_operations tracing_mark_fops = {
7175         .open           = tracing_open_generic_tr,
7176         .write          = tracing_mark_write,
7177         .llseek         = generic_file_llseek,
7178         .release        = tracing_release_generic_tr,
7179 };
7180
7181 static const struct file_operations tracing_mark_raw_fops = {
7182         .open           = tracing_open_generic_tr,
7183         .write          = tracing_mark_raw_write,
7184         .llseek         = generic_file_llseek,
7185         .release        = tracing_release_generic_tr,
7186 };
7187
7188 static const struct file_operations trace_clock_fops = {
7189         .open           = tracing_clock_open,
7190         .read           = seq_read,
7191         .llseek         = seq_lseek,
7192         .release        = tracing_single_release_tr,
7193         .write          = tracing_clock_write,
7194 };
7195
7196 static const struct file_operations trace_time_stamp_mode_fops = {
7197         .open           = tracing_time_stamp_mode_open,
7198         .read           = seq_read,
7199         .llseek         = seq_lseek,
7200         .release        = tracing_single_release_tr,
7201 };
7202
7203 #ifdef CONFIG_TRACER_SNAPSHOT
7204 static const struct file_operations snapshot_fops = {
7205         .open           = tracing_snapshot_open,
7206         .read           = seq_read,
7207         .write          = tracing_snapshot_write,
7208         .llseek         = tracing_lseek,
7209         .release        = tracing_snapshot_release,
7210 };
7211
7212 static const struct file_operations snapshot_raw_fops = {
7213         .open           = snapshot_raw_open,
7214         .read           = tracing_buffers_read,
7215         .release        = tracing_buffers_release,
7216         .splice_read    = tracing_buffers_splice_read,
7217         .llseek         = no_llseek,
7218 };
7219
7220 #endif /* CONFIG_TRACER_SNAPSHOT */
7221
7222 #define TRACING_LOG_ERRS_MAX    8
7223 #define TRACING_LOG_LOC_MAX     128
7224
7225 #define CMD_PREFIX "  Command: "
7226
7227 struct err_info {
7228         const char      **errs; /* ptr to loc-specific array of err strings */
7229         u8              type;   /* index into errs -> specific err string */
7230         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7231         u64             ts;
7232 };
7233
7234 struct tracing_log_err {
7235         struct list_head        list;
7236         struct err_info         info;
7237         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7238         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7239 };
7240
7241 static DEFINE_MUTEX(tracing_err_log_lock);
7242
7243 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7244 {
7245         struct tracing_log_err *err;
7246
7247         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7248                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7249                 if (!err)
7250                         err = ERR_PTR(-ENOMEM);
7251                 tr->n_err_log_entries++;
7252
7253                 return err;
7254         }
7255
7256         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7257         list_del(&err->list);
7258
7259         return err;
7260 }
7261
7262 /**
7263  * err_pos - find the position of a string within a command for error careting
7264  * @cmd: The tracing command that caused the error
7265  * @str: The string to position the caret at within @cmd
7266  *
7267  * Finds the position of the first occurence of @str within @cmd.  The
7268  * return value can be passed to tracing_log_err() for caret placement
7269  * within @cmd.
7270  *
7271  * Returns the index within @cmd of the first occurence of @str or 0
7272  * if @str was not found.
7273  */
7274 unsigned int err_pos(char *cmd, const char *str)
7275 {
7276         char *found;
7277
7278         if (WARN_ON(!strlen(cmd)))
7279                 return 0;
7280
7281         found = strstr(cmd, str);
7282         if (found)
7283                 return found - cmd;
7284
7285         return 0;
7286 }
7287
7288 /**
7289  * tracing_log_err - write an error to the tracing error log
7290  * @tr: The associated trace array for the error (NULL for top level array)
7291  * @loc: A string describing where the error occurred
7292  * @cmd: The tracing command that caused the error
7293  * @errs: The array of loc-specific static error strings
7294  * @type: The index into errs[], which produces the specific static err string
7295  * @pos: The position the caret should be placed in the cmd
7296  *
7297  * Writes an error into tracing/error_log of the form:
7298  *
7299  * <loc>: error: <text>
7300  *   Command: <cmd>
7301  *              ^
7302  *
7303  * tracing/error_log is a small log file containing the last
7304  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7305  * unless there has been a tracing error, and the error log can be
7306  * cleared and have its memory freed by writing the empty string in
7307  * truncation mode to it i.e. echo > tracing/error_log.
7308  *
7309  * NOTE: the @errs array along with the @type param are used to
7310  * produce a static error string - this string is not copied and saved
7311  * when the error is logged - only a pointer to it is saved.  See
7312  * existing callers for examples of how static strings are typically
7313  * defined for use with tracing_log_err().
7314  */
7315 void tracing_log_err(struct trace_array *tr,
7316                      const char *loc, const char *cmd,
7317                      const char **errs, u8 type, u8 pos)
7318 {
7319         struct tracing_log_err *err;
7320
7321         if (!tr)
7322                 tr = &global_trace;
7323
7324         mutex_lock(&tracing_err_log_lock);
7325         err = get_tracing_log_err(tr);
7326         if (PTR_ERR(err) == -ENOMEM) {
7327                 mutex_unlock(&tracing_err_log_lock);
7328                 return;
7329         }
7330
7331         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7332         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7333
7334         err->info.errs = errs;
7335         err->info.type = type;
7336         err->info.pos = pos;
7337         err->info.ts = local_clock();
7338
7339         list_add_tail(&err->list, &tr->err_log);
7340         mutex_unlock(&tracing_err_log_lock);
7341 }
7342
7343 static void clear_tracing_err_log(struct trace_array *tr)
7344 {
7345         struct tracing_log_err *err, *next;
7346
7347         mutex_lock(&tracing_err_log_lock);
7348         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7349                 list_del(&err->list);
7350                 kfree(err);
7351         }
7352
7353         tr->n_err_log_entries = 0;
7354         mutex_unlock(&tracing_err_log_lock);
7355 }
7356
7357 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7358 {
7359         struct trace_array *tr = m->private;
7360
7361         mutex_lock(&tracing_err_log_lock);
7362
7363         return seq_list_start(&tr->err_log, *pos);
7364 }
7365
7366 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7367 {
7368         struct trace_array *tr = m->private;
7369
7370         return seq_list_next(v, &tr->err_log, pos);
7371 }
7372
7373 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7374 {
7375         mutex_unlock(&tracing_err_log_lock);
7376 }
7377
7378 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7379 {
7380         u8 i;
7381
7382         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7383                 seq_putc(m, ' ');
7384         for (i = 0; i < pos; i++)
7385                 seq_putc(m, ' ');
7386         seq_puts(m, "^\n");
7387 }
7388
7389 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7390 {
7391         struct tracing_log_err *err = v;
7392
7393         if (err) {
7394                 const char *err_text = err->info.errs[err->info.type];
7395                 u64 sec = err->info.ts;
7396                 u32 nsec;
7397
7398                 nsec = do_div(sec, NSEC_PER_SEC);
7399                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7400                            err->loc, err_text);
7401                 seq_printf(m, "%s", err->cmd);
7402                 tracing_err_log_show_pos(m, err->info.pos);
7403         }
7404
7405         return 0;
7406 }
7407
7408 static const struct seq_operations tracing_err_log_seq_ops = {
7409         .start  = tracing_err_log_seq_start,
7410         .next   = tracing_err_log_seq_next,
7411         .stop   = tracing_err_log_seq_stop,
7412         .show   = tracing_err_log_seq_show
7413 };
7414
7415 static int tracing_err_log_open(struct inode *inode, struct file *file)
7416 {
7417         struct trace_array *tr = inode->i_private;
7418         int ret = 0;
7419
7420         ret = tracing_check_open_get_tr(tr);
7421         if (ret)
7422                 return ret;
7423
7424         /* If this file was opened for write, then erase contents */
7425         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7426                 clear_tracing_err_log(tr);
7427
7428         if (file->f_mode & FMODE_READ) {
7429                 ret = seq_open(file, &tracing_err_log_seq_ops);
7430                 if (!ret) {
7431                         struct seq_file *m = file->private_data;
7432                         m->private = tr;
7433                 } else {
7434                         trace_array_put(tr);
7435                 }
7436         }
7437         return ret;
7438 }
7439
7440 static ssize_t tracing_err_log_write(struct file *file,
7441                                      const char __user *buffer,
7442                                      size_t count, loff_t *ppos)
7443 {
7444         return count;
7445 }
7446
7447 static int tracing_err_log_release(struct inode *inode, struct file *file)
7448 {
7449         struct trace_array *tr = inode->i_private;
7450
7451         trace_array_put(tr);
7452
7453         if (file->f_mode & FMODE_READ)
7454                 seq_release(inode, file);
7455
7456         return 0;
7457 }
7458
7459 static const struct file_operations tracing_err_log_fops = {
7460         .open           = tracing_err_log_open,
7461         .write          = tracing_err_log_write,
7462         .read           = seq_read,
7463         .llseek         = seq_lseek,
7464         .release        = tracing_err_log_release,
7465 };
7466
7467 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7468 {
7469         struct trace_array *tr = inode->i_private;
7470         struct ftrace_buffer_info *info;
7471         int ret;
7472
7473         ret = tracing_check_open_get_tr(tr);
7474         if (ret)
7475                 return ret;
7476
7477         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7478         if (!info) {
7479                 trace_array_put(tr);
7480                 return -ENOMEM;
7481         }
7482
7483         mutex_lock(&trace_types_lock);
7484
7485         info->iter.tr           = tr;
7486         info->iter.cpu_file     = tracing_get_cpu(inode);
7487         info->iter.trace        = tr->current_trace;
7488         info->iter.array_buffer = &tr->array_buffer;
7489         info->spare             = NULL;
7490         /* Force reading ring buffer for first read */
7491         info->read              = (unsigned int)-1;
7492
7493         filp->private_data = info;
7494
7495         tr->trace_ref++;
7496
7497         mutex_unlock(&trace_types_lock);
7498
7499         ret = nonseekable_open(inode, filp);
7500         if (ret < 0)
7501                 trace_array_put(tr);
7502
7503         return ret;
7504 }
7505
7506 static __poll_t
7507 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7508 {
7509         struct ftrace_buffer_info *info = filp->private_data;
7510         struct trace_iterator *iter = &info->iter;
7511
7512         return trace_poll(iter, filp, poll_table);
7513 }
7514
7515 static ssize_t
7516 tracing_buffers_read(struct file *filp, char __user *ubuf,
7517                      size_t count, loff_t *ppos)
7518 {
7519         struct ftrace_buffer_info *info = filp->private_data;
7520         struct trace_iterator *iter = &info->iter;
7521         ssize_t ret = 0;
7522         ssize_t size;
7523
7524         if (!count)
7525                 return 0;
7526
7527 #ifdef CONFIG_TRACER_MAX_TRACE
7528         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7529                 return -EBUSY;
7530 #endif
7531
7532         if (!info->spare) {
7533                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7534                                                           iter->cpu_file);
7535                 if (IS_ERR(info->spare)) {
7536                         ret = PTR_ERR(info->spare);
7537                         info->spare = NULL;
7538                 } else {
7539                         info->spare_cpu = iter->cpu_file;
7540                 }
7541         }
7542         if (!info->spare)
7543                 return ret;
7544
7545         /* Do we have previous read data to read? */
7546         if (info->read < PAGE_SIZE)
7547                 goto read;
7548
7549  again:
7550         trace_access_lock(iter->cpu_file);
7551         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7552                                     &info->spare,
7553                                     count,
7554                                     iter->cpu_file, 0);
7555         trace_access_unlock(iter->cpu_file);
7556
7557         if (ret < 0) {
7558                 if (trace_empty(iter)) {
7559                         if ((filp->f_flags & O_NONBLOCK))
7560                                 return -EAGAIN;
7561
7562                         ret = wait_on_pipe(iter, 0);
7563                         if (ret)
7564                                 return ret;
7565
7566                         goto again;
7567                 }
7568                 return 0;
7569         }
7570
7571         info->read = 0;
7572  read:
7573         size = PAGE_SIZE - info->read;
7574         if (size > count)
7575                 size = count;
7576
7577         ret = copy_to_user(ubuf, info->spare + info->read, size);
7578         if (ret == size)
7579                 return -EFAULT;
7580
7581         size -= ret;
7582
7583         *ppos += size;
7584         info->read += size;
7585
7586         return size;
7587 }
7588
7589 static int tracing_buffers_release(struct inode *inode, struct file *file)
7590 {
7591         struct ftrace_buffer_info *info = file->private_data;
7592         struct trace_iterator *iter = &info->iter;
7593
7594         mutex_lock(&trace_types_lock);
7595
7596         iter->tr->trace_ref--;
7597
7598         __trace_array_put(iter->tr);
7599
7600         if (info->spare)
7601                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7602                                            info->spare_cpu, info->spare);
7603         kvfree(info);
7604
7605         mutex_unlock(&trace_types_lock);
7606
7607         return 0;
7608 }
7609
7610 struct buffer_ref {
7611         struct trace_buffer     *buffer;
7612         void                    *page;
7613         int                     cpu;
7614         refcount_t              refcount;
7615 };
7616
7617 static void buffer_ref_release(struct buffer_ref *ref)
7618 {
7619         if (!refcount_dec_and_test(&ref->refcount))
7620                 return;
7621         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7622         kfree(ref);
7623 }
7624
7625 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7626                                     struct pipe_buffer *buf)
7627 {
7628         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7629
7630         buffer_ref_release(ref);
7631         buf->private = 0;
7632 }
7633
7634 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7635                                 struct pipe_buffer *buf)
7636 {
7637         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7638
7639         if (refcount_read(&ref->refcount) > INT_MAX/2)
7640                 return false;
7641
7642         refcount_inc(&ref->refcount);
7643         return true;
7644 }
7645
7646 /* Pipe buffer operations for a buffer. */
7647 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7648         .release                = buffer_pipe_buf_release,
7649         .get                    = buffer_pipe_buf_get,
7650 };
7651
7652 /*
7653  * Callback from splice_to_pipe(), if we need to release some pages
7654  * at the end of the spd in case we error'ed out in filling the pipe.
7655  */
7656 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7657 {
7658         struct buffer_ref *ref =
7659                 (struct buffer_ref *)spd->partial[i].private;
7660
7661         buffer_ref_release(ref);
7662         spd->partial[i].private = 0;
7663 }
7664
7665 static ssize_t
7666 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7667                             struct pipe_inode_info *pipe, size_t len,
7668                             unsigned int flags)
7669 {
7670         struct ftrace_buffer_info *info = file->private_data;
7671         struct trace_iterator *iter = &info->iter;
7672         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7673         struct page *pages_def[PIPE_DEF_BUFFERS];
7674         struct splice_pipe_desc spd = {
7675                 .pages          = pages_def,
7676                 .partial        = partial_def,
7677                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7678                 .ops            = &buffer_pipe_buf_ops,
7679                 .spd_release    = buffer_spd_release,
7680         };
7681         struct buffer_ref *ref;
7682         int entries, i;
7683         ssize_t ret = 0;
7684
7685 #ifdef CONFIG_TRACER_MAX_TRACE
7686         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7687                 return -EBUSY;
7688 #endif
7689
7690         if (*ppos & (PAGE_SIZE - 1))
7691                 return -EINVAL;
7692
7693         if (len & (PAGE_SIZE - 1)) {
7694                 if (len < PAGE_SIZE)
7695                         return -EINVAL;
7696                 len &= PAGE_MASK;
7697         }
7698
7699         if (splice_grow_spd(pipe, &spd))
7700                 return -ENOMEM;
7701
7702  again:
7703         trace_access_lock(iter->cpu_file);
7704         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7705
7706         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7707                 struct page *page;
7708                 int r;
7709
7710                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7711                 if (!ref) {
7712                         ret = -ENOMEM;
7713                         break;
7714                 }
7715
7716                 refcount_set(&ref->refcount, 1);
7717                 ref->buffer = iter->array_buffer->buffer;
7718                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7719                 if (IS_ERR(ref->page)) {
7720                         ret = PTR_ERR(ref->page);
7721                         ref->page = NULL;
7722                         kfree(ref);
7723                         break;
7724                 }
7725                 ref->cpu = iter->cpu_file;
7726
7727                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7728                                           len, iter->cpu_file, 1);
7729                 if (r < 0) {
7730                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7731                                                    ref->page);
7732                         kfree(ref);
7733                         break;
7734                 }
7735
7736                 page = virt_to_page(ref->page);
7737
7738                 spd.pages[i] = page;
7739                 spd.partial[i].len = PAGE_SIZE;
7740                 spd.partial[i].offset = 0;
7741                 spd.partial[i].private = (unsigned long)ref;
7742                 spd.nr_pages++;
7743                 *ppos += PAGE_SIZE;
7744
7745                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7746         }
7747
7748         trace_access_unlock(iter->cpu_file);
7749         spd.nr_pages = i;
7750
7751         /* did we read anything? */
7752         if (!spd.nr_pages) {
7753                 if (ret)
7754                         goto out;
7755
7756                 ret = -EAGAIN;
7757                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7758                         goto out;
7759
7760                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7761                 if (ret)
7762                         goto out;
7763
7764                 goto again;
7765         }
7766
7767         ret = splice_to_pipe(pipe, &spd);
7768 out:
7769         splice_shrink_spd(&spd);
7770
7771         return ret;
7772 }
7773
7774 static const struct file_operations tracing_buffers_fops = {
7775         .open           = tracing_buffers_open,
7776         .read           = tracing_buffers_read,
7777         .poll           = tracing_buffers_poll,
7778         .release        = tracing_buffers_release,
7779         .splice_read    = tracing_buffers_splice_read,
7780         .llseek         = no_llseek,
7781 };
7782
7783 static ssize_t
7784 tracing_stats_read(struct file *filp, char __user *ubuf,
7785                    size_t count, loff_t *ppos)
7786 {
7787         struct inode *inode = file_inode(filp);
7788         struct trace_array *tr = inode->i_private;
7789         struct array_buffer *trace_buf = &tr->array_buffer;
7790         int cpu = tracing_get_cpu(inode);
7791         struct trace_seq *s;
7792         unsigned long cnt;
7793         unsigned long long t;
7794         unsigned long usec_rem;
7795
7796         s = kmalloc(sizeof(*s), GFP_KERNEL);
7797         if (!s)
7798                 return -ENOMEM;
7799
7800         trace_seq_init(s);
7801
7802         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7803         trace_seq_printf(s, "entries: %ld\n", cnt);
7804
7805         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7806         trace_seq_printf(s, "overrun: %ld\n", cnt);
7807
7808         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7809         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7810
7811         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7812         trace_seq_printf(s, "bytes: %ld\n", cnt);
7813
7814         if (trace_clocks[tr->clock_id].in_ns) {
7815                 /* local or global for trace_clock */
7816                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7817                 usec_rem = do_div(t, USEC_PER_SEC);
7818                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7819                                                                 t, usec_rem);
7820
7821                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7822                 usec_rem = do_div(t, USEC_PER_SEC);
7823                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7824         } else {
7825                 /* counter or tsc mode for trace_clock */
7826                 trace_seq_printf(s, "oldest event ts: %llu\n",
7827                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7828
7829                 trace_seq_printf(s, "now ts: %llu\n",
7830                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7831         }
7832
7833         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7834         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7835
7836         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7837         trace_seq_printf(s, "read events: %ld\n", cnt);
7838
7839         count = simple_read_from_buffer(ubuf, count, ppos,
7840                                         s->buffer, trace_seq_used(s));
7841
7842         kfree(s);
7843
7844         return count;
7845 }
7846
7847 static const struct file_operations tracing_stats_fops = {
7848         .open           = tracing_open_generic_tr,
7849         .read           = tracing_stats_read,
7850         .llseek         = generic_file_llseek,
7851         .release        = tracing_release_generic_tr,
7852 };
7853
7854 #ifdef CONFIG_DYNAMIC_FTRACE
7855
7856 static ssize_t
7857 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7858                   size_t cnt, loff_t *ppos)
7859 {
7860         ssize_t ret;
7861         char *buf;
7862         int r;
7863
7864         /* 256 should be plenty to hold the amount needed */
7865         buf = kmalloc(256, GFP_KERNEL);
7866         if (!buf)
7867                 return -ENOMEM;
7868
7869         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7870                       ftrace_update_tot_cnt,
7871                       ftrace_number_of_pages,
7872                       ftrace_number_of_groups);
7873
7874         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7875         kfree(buf);
7876         return ret;
7877 }
7878
7879 static const struct file_operations tracing_dyn_info_fops = {
7880         .open           = tracing_open_generic,
7881         .read           = tracing_read_dyn_info,
7882         .llseek         = generic_file_llseek,
7883 };
7884 #endif /* CONFIG_DYNAMIC_FTRACE */
7885
7886 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7887 static void
7888 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7889                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7890                 void *data)
7891 {
7892         tracing_snapshot_instance(tr);
7893 }
7894
7895 static void
7896 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7897                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7898                       void *data)
7899 {
7900         struct ftrace_func_mapper *mapper = data;
7901         long *count = NULL;
7902
7903         if (mapper)
7904                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7905
7906         if (count) {
7907
7908                 if (*count <= 0)
7909                         return;
7910
7911                 (*count)--;
7912         }
7913
7914         tracing_snapshot_instance(tr);
7915 }
7916
7917 static int
7918 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7919                       struct ftrace_probe_ops *ops, void *data)
7920 {
7921         struct ftrace_func_mapper *mapper = data;
7922         long *count = NULL;
7923
7924         seq_printf(m, "%ps:", (void *)ip);
7925
7926         seq_puts(m, "snapshot");
7927
7928         if (mapper)
7929                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7930
7931         if (count)
7932                 seq_printf(m, ":count=%ld\n", *count);
7933         else
7934                 seq_puts(m, ":unlimited\n");
7935
7936         return 0;
7937 }
7938
7939 static int
7940 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7941                      unsigned long ip, void *init_data, void **data)
7942 {
7943         struct ftrace_func_mapper *mapper = *data;
7944
7945         if (!mapper) {
7946                 mapper = allocate_ftrace_func_mapper();
7947                 if (!mapper)
7948                         return -ENOMEM;
7949                 *data = mapper;
7950         }
7951
7952         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7953 }
7954
7955 static void
7956 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7957                      unsigned long ip, void *data)
7958 {
7959         struct ftrace_func_mapper *mapper = data;
7960
7961         if (!ip) {
7962                 if (!mapper)
7963                         return;
7964                 free_ftrace_func_mapper(mapper, NULL);
7965                 return;
7966         }
7967
7968         ftrace_func_mapper_remove_ip(mapper, ip);
7969 }
7970
7971 static struct ftrace_probe_ops snapshot_probe_ops = {
7972         .func                   = ftrace_snapshot,
7973         .print                  = ftrace_snapshot_print,
7974 };
7975
7976 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7977         .func                   = ftrace_count_snapshot,
7978         .print                  = ftrace_snapshot_print,
7979         .init                   = ftrace_snapshot_init,
7980         .free                   = ftrace_snapshot_free,
7981 };
7982
7983 static int
7984 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7985                                char *glob, char *cmd, char *param, int enable)
7986 {
7987         struct ftrace_probe_ops *ops;
7988         void *count = (void *)-1;
7989         char *number;
7990         int ret;
7991
7992         if (!tr)
7993                 return -ENODEV;
7994
7995         /* hash funcs only work with set_ftrace_filter */
7996         if (!enable)
7997                 return -EINVAL;
7998
7999         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8000
8001         if (glob[0] == '!')
8002                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8003
8004         if (!param)
8005                 goto out_reg;
8006
8007         number = strsep(&param, ":");
8008
8009         if (!strlen(number))
8010                 goto out_reg;
8011
8012         /*
8013          * We use the callback data field (which is a pointer)
8014          * as our counter.
8015          */
8016         ret = kstrtoul(number, 0, (unsigned long *)&count);
8017         if (ret)
8018                 return ret;
8019
8020  out_reg:
8021         ret = tracing_alloc_snapshot_instance(tr);
8022         if (ret < 0)
8023                 goto out;
8024
8025         ret = register_ftrace_function_probe(glob, tr, ops, count);
8026
8027  out:
8028         return ret < 0 ? ret : 0;
8029 }
8030
8031 static struct ftrace_func_command ftrace_snapshot_cmd = {
8032         .name                   = "snapshot",
8033         .func                   = ftrace_trace_snapshot_callback,
8034 };
8035
8036 static __init int register_snapshot_cmd(void)
8037 {
8038         return register_ftrace_command(&ftrace_snapshot_cmd);
8039 }
8040 #else
8041 static inline __init int register_snapshot_cmd(void) { return 0; }
8042 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8043
8044 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8045 {
8046         if (WARN_ON(!tr->dir))
8047                 return ERR_PTR(-ENODEV);
8048
8049         /* Top directory uses NULL as the parent */
8050         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8051                 return NULL;
8052
8053         /* All sub buffers have a descriptor */
8054         return tr->dir;
8055 }
8056
8057 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8058 {
8059         struct dentry *d_tracer;
8060
8061         if (tr->percpu_dir)
8062                 return tr->percpu_dir;
8063
8064         d_tracer = tracing_get_dentry(tr);
8065         if (IS_ERR(d_tracer))
8066                 return NULL;
8067
8068         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8069
8070         MEM_FAIL(!tr->percpu_dir,
8071                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8072
8073         return tr->percpu_dir;
8074 }
8075
8076 static struct dentry *
8077 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8078                       void *data, long cpu, const struct file_operations *fops)
8079 {
8080         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8081
8082         if (ret) /* See tracing_get_cpu() */
8083                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8084         return ret;
8085 }
8086
8087 static void
8088 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8089 {
8090         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8091         struct dentry *d_cpu;
8092         char cpu_dir[30]; /* 30 characters should be more than enough */
8093
8094         if (!d_percpu)
8095                 return;
8096
8097         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8098         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8099         if (!d_cpu) {
8100                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8101                 return;
8102         }
8103
8104         /* per cpu trace_pipe */
8105         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8106                                 tr, cpu, &tracing_pipe_fops);
8107
8108         /* per cpu trace */
8109         trace_create_cpu_file("trace", 0644, d_cpu,
8110                                 tr, cpu, &tracing_fops);
8111
8112         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8113                                 tr, cpu, &tracing_buffers_fops);
8114
8115         trace_create_cpu_file("stats", 0444, d_cpu,
8116                                 tr, cpu, &tracing_stats_fops);
8117
8118         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8119                                 tr, cpu, &tracing_entries_fops);
8120
8121 #ifdef CONFIG_TRACER_SNAPSHOT
8122         trace_create_cpu_file("snapshot", 0644, d_cpu,
8123                                 tr, cpu, &snapshot_fops);
8124
8125         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8126                                 tr, cpu, &snapshot_raw_fops);
8127 #endif
8128 }
8129
8130 #ifdef CONFIG_FTRACE_SELFTEST
8131 /* Let selftest have access to static functions in this file */
8132 #include "trace_selftest.c"
8133 #endif
8134
8135 static ssize_t
8136 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8137                         loff_t *ppos)
8138 {
8139         struct trace_option_dentry *topt = filp->private_data;
8140         char *buf;
8141
8142         if (topt->flags->val & topt->opt->bit)
8143                 buf = "1\n";
8144         else
8145                 buf = "0\n";
8146
8147         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8148 }
8149
8150 static ssize_t
8151 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8152                          loff_t *ppos)
8153 {
8154         struct trace_option_dentry *topt = filp->private_data;
8155         unsigned long val;
8156         int ret;
8157
8158         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8159         if (ret)
8160                 return ret;
8161
8162         if (val != 0 && val != 1)
8163                 return -EINVAL;
8164
8165         if (!!(topt->flags->val & topt->opt->bit) != val) {
8166                 mutex_lock(&trace_types_lock);
8167                 ret = __set_tracer_option(topt->tr, topt->flags,
8168                                           topt->opt, !val);
8169                 mutex_unlock(&trace_types_lock);
8170                 if (ret)
8171                         return ret;
8172         }
8173
8174         *ppos += cnt;
8175
8176         return cnt;
8177 }
8178
8179
8180 static const struct file_operations trace_options_fops = {
8181         .open = tracing_open_generic,
8182         .read = trace_options_read,
8183         .write = trace_options_write,
8184         .llseek = generic_file_llseek,
8185 };
8186
8187 /*
8188  * In order to pass in both the trace_array descriptor as well as the index
8189  * to the flag that the trace option file represents, the trace_array
8190  * has a character array of trace_flags_index[], which holds the index
8191  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8192  * The address of this character array is passed to the flag option file
8193  * read/write callbacks.
8194  *
8195  * In order to extract both the index and the trace_array descriptor,
8196  * get_tr_index() uses the following algorithm.
8197  *
8198  *   idx = *ptr;
8199  *
8200  * As the pointer itself contains the address of the index (remember
8201  * index[1] == 1).
8202  *
8203  * Then to get the trace_array descriptor, by subtracting that index
8204  * from the ptr, we get to the start of the index itself.
8205  *
8206  *   ptr - idx == &index[0]
8207  *
8208  * Then a simple container_of() from that pointer gets us to the
8209  * trace_array descriptor.
8210  */
8211 static void get_tr_index(void *data, struct trace_array **ptr,
8212                          unsigned int *pindex)
8213 {
8214         *pindex = *(unsigned char *)data;
8215
8216         *ptr = container_of(data - *pindex, struct trace_array,
8217                             trace_flags_index);
8218 }
8219
8220 static ssize_t
8221 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8222                         loff_t *ppos)
8223 {
8224         void *tr_index = filp->private_data;
8225         struct trace_array *tr;
8226         unsigned int index;
8227         char *buf;
8228
8229         get_tr_index(tr_index, &tr, &index);
8230
8231         if (tr->trace_flags & (1 << index))
8232                 buf = "1\n";
8233         else
8234                 buf = "0\n";
8235
8236         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8237 }
8238
8239 static ssize_t
8240 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8241                          loff_t *ppos)
8242 {
8243         void *tr_index = filp->private_data;
8244         struct trace_array *tr;
8245         unsigned int index;
8246         unsigned long val;
8247         int ret;
8248
8249         get_tr_index(tr_index, &tr, &index);
8250
8251         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8252         if (ret)
8253                 return ret;
8254
8255         if (val != 0 && val != 1)
8256                 return -EINVAL;
8257
8258         mutex_lock(&event_mutex);
8259         mutex_lock(&trace_types_lock);
8260         ret = set_tracer_flag(tr, 1 << index, val);
8261         mutex_unlock(&trace_types_lock);
8262         mutex_unlock(&event_mutex);
8263
8264         if (ret < 0)
8265                 return ret;
8266
8267         *ppos += cnt;
8268
8269         return cnt;
8270 }
8271
8272 static const struct file_operations trace_options_core_fops = {
8273         .open = tracing_open_generic,
8274         .read = trace_options_core_read,
8275         .write = trace_options_core_write,
8276         .llseek = generic_file_llseek,
8277 };
8278
8279 struct dentry *trace_create_file(const char *name,
8280                                  umode_t mode,
8281                                  struct dentry *parent,
8282                                  void *data,
8283                                  const struct file_operations *fops)
8284 {
8285         struct dentry *ret;
8286
8287         ret = tracefs_create_file(name, mode, parent, data, fops);
8288         if (!ret)
8289                 pr_warn("Could not create tracefs '%s' entry\n", name);
8290
8291         return ret;
8292 }
8293
8294
8295 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8296 {
8297         struct dentry *d_tracer;
8298
8299         if (tr->options)
8300                 return tr->options;
8301
8302         d_tracer = tracing_get_dentry(tr);
8303         if (IS_ERR(d_tracer))
8304                 return NULL;
8305
8306         tr->options = tracefs_create_dir("options", d_tracer);
8307         if (!tr->options) {
8308                 pr_warn("Could not create tracefs directory 'options'\n");
8309                 return NULL;
8310         }
8311
8312         return tr->options;
8313 }
8314
8315 static void
8316 create_trace_option_file(struct trace_array *tr,
8317                          struct trace_option_dentry *topt,
8318                          struct tracer_flags *flags,
8319                          struct tracer_opt *opt)
8320 {
8321         struct dentry *t_options;
8322
8323         t_options = trace_options_init_dentry(tr);
8324         if (!t_options)
8325                 return;
8326
8327         topt->flags = flags;
8328         topt->opt = opt;
8329         topt->tr = tr;
8330
8331         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8332                                     &trace_options_fops);
8333
8334 }
8335
8336 static void
8337 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8338 {
8339         struct trace_option_dentry *topts;
8340         struct trace_options *tr_topts;
8341         struct tracer_flags *flags;
8342         struct tracer_opt *opts;
8343         int cnt;
8344         int i;
8345
8346         if (!tracer)
8347                 return;
8348
8349         flags = tracer->flags;
8350
8351         if (!flags || !flags->opts)
8352                 return;
8353
8354         /*
8355          * If this is an instance, only create flags for tracers
8356          * the instance may have.
8357          */
8358         if (!trace_ok_for_array(tracer, tr))
8359                 return;
8360
8361         for (i = 0; i < tr->nr_topts; i++) {
8362                 /* Make sure there's no duplicate flags. */
8363                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8364                         return;
8365         }
8366
8367         opts = flags->opts;
8368
8369         for (cnt = 0; opts[cnt].name; cnt++)
8370                 ;
8371
8372         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8373         if (!topts)
8374                 return;
8375
8376         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8377                             GFP_KERNEL);
8378         if (!tr_topts) {
8379                 kfree(topts);
8380                 return;
8381         }
8382
8383         tr->topts = tr_topts;
8384         tr->topts[tr->nr_topts].tracer = tracer;
8385         tr->topts[tr->nr_topts].topts = topts;
8386         tr->nr_topts++;
8387
8388         for (cnt = 0; opts[cnt].name; cnt++) {
8389                 create_trace_option_file(tr, &topts[cnt], flags,
8390                                          &opts[cnt]);
8391                 MEM_FAIL(topts[cnt].entry == NULL,
8392                           "Failed to create trace option: %s",
8393                           opts[cnt].name);
8394         }
8395 }
8396
8397 static struct dentry *
8398 create_trace_option_core_file(struct trace_array *tr,
8399                               const char *option, long index)
8400 {
8401         struct dentry *t_options;
8402
8403         t_options = trace_options_init_dentry(tr);
8404         if (!t_options)
8405                 return NULL;
8406
8407         return trace_create_file(option, 0644, t_options,
8408                                  (void *)&tr->trace_flags_index[index],
8409                                  &trace_options_core_fops);
8410 }
8411
8412 static void create_trace_options_dir(struct trace_array *tr)
8413 {
8414         struct dentry *t_options;
8415         bool top_level = tr == &global_trace;
8416         int i;
8417
8418         t_options = trace_options_init_dentry(tr);
8419         if (!t_options)
8420                 return;
8421
8422         for (i = 0; trace_options[i]; i++) {
8423                 if (top_level ||
8424                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8425                         create_trace_option_core_file(tr, trace_options[i], i);
8426         }
8427 }
8428
8429 static ssize_t
8430 rb_simple_read(struct file *filp, char __user *ubuf,
8431                size_t cnt, loff_t *ppos)
8432 {
8433         struct trace_array *tr = filp->private_data;
8434         char buf[64];
8435         int r;
8436
8437         r = tracer_tracing_is_on(tr);
8438         r = sprintf(buf, "%d\n", r);
8439
8440         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8441 }
8442
8443 static ssize_t
8444 rb_simple_write(struct file *filp, const char __user *ubuf,
8445                 size_t cnt, loff_t *ppos)
8446 {
8447         struct trace_array *tr = filp->private_data;
8448         struct trace_buffer *buffer = tr->array_buffer.buffer;
8449         unsigned long val;
8450         int ret;
8451
8452         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8453         if (ret)
8454                 return ret;
8455
8456         if (buffer) {
8457                 mutex_lock(&trace_types_lock);
8458                 if (!!val == tracer_tracing_is_on(tr)) {
8459                         val = 0; /* do nothing */
8460                 } else if (val) {
8461                         tracer_tracing_on(tr);
8462                         if (tr->current_trace->start)
8463                                 tr->current_trace->start(tr);
8464                 } else {
8465                         tracer_tracing_off(tr);
8466                         if (tr->current_trace->stop)
8467                                 tr->current_trace->stop(tr);
8468                 }
8469                 mutex_unlock(&trace_types_lock);
8470         }
8471
8472         (*ppos)++;
8473
8474         return cnt;
8475 }
8476
8477 static const struct file_operations rb_simple_fops = {
8478         .open           = tracing_open_generic_tr,
8479         .read           = rb_simple_read,
8480         .write          = rb_simple_write,
8481         .release        = tracing_release_generic_tr,
8482         .llseek         = default_llseek,
8483 };
8484
8485 static ssize_t
8486 buffer_percent_read(struct file *filp, char __user *ubuf,
8487                     size_t cnt, loff_t *ppos)
8488 {
8489         struct trace_array *tr = filp->private_data;
8490         char buf[64];
8491         int r;
8492
8493         r = tr->buffer_percent;
8494         r = sprintf(buf, "%d\n", r);
8495
8496         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8497 }
8498
8499 static ssize_t
8500 buffer_percent_write(struct file *filp, const char __user *ubuf,
8501                      size_t cnt, loff_t *ppos)
8502 {
8503         struct trace_array *tr = filp->private_data;
8504         unsigned long val;
8505         int ret;
8506
8507         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8508         if (ret)
8509                 return ret;
8510
8511         if (val > 100)
8512                 return -EINVAL;
8513
8514         if (!val)
8515                 val = 1;
8516
8517         tr->buffer_percent = val;
8518
8519         (*ppos)++;
8520
8521         return cnt;
8522 }
8523
8524 static const struct file_operations buffer_percent_fops = {
8525         .open           = tracing_open_generic_tr,
8526         .read           = buffer_percent_read,
8527         .write          = buffer_percent_write,
8528         .release        = tracing_release_generic_tr,
8529         .llseek         = default_llseek,
8530 };
8531
8532 static struct dentry *trace_instance_dir;
8533
8534 static void
8535 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8536
8537 static int
8538 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8539 {
8540         enum ring_buffer_flags rb_flags;
8541
8542         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8543
8544         buf->tr = tr;
8545
8546         buf->buffer = ring_buffer_alloc(size, rb_flags);
8547         if (!buf->buffer)
8548                 return -ENOMEM;
8549
8550         buf->data = alloc_percpu(struct trace_array_cpu);
8551         if (!buf->data) {
8552                 ring_buffer_free(buf->buffer);
8553                 buf->buffer = NULL;
8554                 return -ENOMEM;
8555         }
8556
8557         /* Allocate the first page for all buffers */
8558         set_buffer_entries(&tr->array_buffer,
8559                            ring_buffer_size(tr->array_buffer.buffer, 0));
8560
8561         return 0;
8562 }
8563
8564 static int allocate_trace_buffers(struct trace_array *tr, int size)
8565 {
8566         int ret;
8567
8568         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8569         if (ret)
8570                 return ret;
8571
8572 #ifdef CONFIG_TRACER_MAX_TRACE
8573         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8574                                     allocate_snapshot ? size : 1);
8575         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8576                 ring_buffer_free(tr->array_buffer.buffer);
8577                 tr->array_buffer.buffer = NULL;
8578                 free_percpu(tr->array_buffer.data);
8579                 tr->array_buffer.data = NULL;
8580                 return -ENOMEM;
8581         }
8582         tr->allocated_snapshot = allocate_snapshot;
8583
8584         /*
8585          * Only the top level trace array gets its snapshot allocated
8586          * from the kernel command line.
8587          */
8588         allocate_snapshot = false;
8589 #endif
8590
8591         return 0;
8592 }
8593
8594 static void free_trace_buffer(struct array_buffer *buf)
8595 {
8596         if (buf->buffer) {
8597                 ring_buffer_free(buf->buffer);
8598                 buf->buffer = NULL;
8599                 free_percpu(buf->data);
8600                 buf->data = NULL;
8601         }
8602 }
8603
8604 static void free_trace_buffers(struct trace_array *tr)
8605 {
8606         if (!tr)
8607                 return;
8608
8609         free_trace_buffer(&tr->array_buffer);
8610
8611 #ifdef CONFIG_TRACER_MAX_TRACE
8612         free_trace_buffer(&tr->max_buffer);
8613 #endif
8614 }
8615
8616 static void init_trace_flags_index(struct trace_array *tr)
8617 {
8618         int i;
8619
8620         /* Used by the trace options files */
8621         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8622                 tr->trace_flags_index[i] = i;
8623 }
8624
8625 static void __update_tracer_options(struct trace_array *tr)
8626 {
8627         struct tracer *t;
8628
8629         for (t = trace_types; t; t = t->next)
8630                 add_tracer_options(tr, t);
8631 }
8632
8633 static void update_tracer_options(struct trace_array *tr)
8634 {
8635         mutex_lock(&trace_types_lock);
8636         __update_tracer_options(tr);
8637         mutex_unlock(&trace_types_lock);
8638 }
8639
8640 /* Must have trace_types_lock held */
8641 struct trace_array *trace_array_find(const char *instance)
8642 {
8643         struct trace_array *tr, *found = NULL;
8644
8645         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8646                 if (tr->name && strcmp(tr->name, instance) == 0) {
8647                         found = tr;
8648                         break;
8649                 }
8650         }
8651
8652         return found;
8653 }
8654
8655 struct trace_array *trace_array_find_get(const char *instance)
8656 {
8657         struct trace_array *tr;
8658
8659         mutex_lock(&trace_types_lock);
8660         tr = trace_array_find(instance);
8661         if (tr)
8662                 tr->ref++;
8663         mutex_unlock(&trace_types_lock);
8664
8665         return tr;
8666 }
8667
8668 static int trace_array_create_dir(struct trace_array *tr)
8669 {
8670         int ret;
8671
8672         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8673         if (!tr->dir)
8674                 return -EINVAL;
8675
8676         ret = event_trace_add_tracer(tr->dir, tr);
8677         if (ret)
8678                 tracefs_remove(tr->dir);
8679
8680         init_tracer_tracefs(tr, tr->dir);
8681         __update_tracer_options(tr);
8682
8683         return ret;
8684 }
8685
8686 static struct trace_array *trace_array_create(const char *name)
8687 {
8688         struct trace_array *tr;
8689         int ret;
8690
8691         ret = -ENOMEM;
8692         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8693         if (!tr)
8694                 return ERR_PTR(ret);
8695
8696         tr->name = kstrdup(name, GFP_KERNEL);
8697         if (!tr->name)
8698                 goto out_free_tr;
8699
8700         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8701                 goto out_free_tr;
8702
8703         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8704
8705         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8706
8707         raw_spin_lock_init(&tr->start_lock);
8708
8709         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8710
8711         tr->current_trace = &nop_trace;
8712
8713         INIT_LIST_HEAD(&tr->systems);
8714         INIT_LIST_HEAD(&tr->events);
8715         INIT_LIST_HEAD(&tr->hist_vars);
8716         INIT_LIST_HEAD(&tr->err_log);
8717
8718         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8719                 goto out_free_tr;
8720
8721         if (ftrace_allocate_ftrace_ops(tr) < 0)
8722                 goto out_free_tr;
8723
8724         ftrace_init_trace_array(tr);
8725
8726         init_trace_flags_index(tr);
8727
8728         if (trace_instance_dir) {
8729                 ret = trace_array_create_dir(tr);
8730                 if (ret)
8731                         goto out_free_tr;
8732         } else
8733                 __trace_early_add_events(tr);
8734
8735         list_add(&tr->list, &ftrace_trace_arrays);
8736
8737         tr->ref++;
8738
8739         return tr;
8740
8741  out_free_tr:
8742         ftrace_free_ftrace_ops(tr);
8743         free_trace_buffers(tr);
8744         free_cpumask_var(tr->tracing_cpumask);
8745         kfree(tr->name);
8746         kfree(tr);
8747
8748         return ERR_PTR(ret);
8749 }
8750
8751 static int instance_mkdir(const char *name)
8752 {
8753         struct trace_array *tr;
8754         int ret;
8755
8756         mutex_lock(&event_mutex);
8757         mutex_lock(&trace_types_lock);
8758
8759         ret = -EEXIST;
8760         if (trace_array_find(name))
8761                 goto out_unlock;
8762
8763         tr = trace_array_create(name);
8764
8765         ret = PTR_ERR_OR_ZERO(tr);
8766
8767 out_unlock:
8768         mutex_unlock(&trace_types_lock);
8769         mutex_unlock(&event_mutex);
8770         return ret;
8771 }
8772
8773 /**
8774  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8775  * @name: The name of the trace array to be looked up/created.
8776  *
8777  * Returns pointer to trace array with given name.
8778  * NULL, if it cannot be created.
8779  *
8780  * NOTE: This function increments the reference counter associated with the
8781  * trace array returned. This makes sure it cannot be freed while in use.
8782  * Use trace_array_put() once the trace array is no longer needed.
8783  * If the trace_array is to be freed, trace_array_destroy() needs to
8784  * be called after the trace_array_put(), or simply let user space delete
8785  * it from the tracefs instances directory. But until the
8786  * trace_array_put() is called, user space can not delete it.
8787  *
8788  */
8789 struct trace_array *trace_array_get_by_name(const char *name)
8790 {
8791         struct trace_array *tr;
8792
8793         mutex_lock(&event_mutex);
8794         mutex_lock(&trace_types_lock);
8795
8796         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8797                 if (tr->name && strcmp(tr->name, name) == 0)
8798                         goto out_unlock;
8799         }
8800
8801         tr = trace_array_create(name);
8802
8803         if (IS_ERR(tr))
8804                 tr = NULL;
8805 out_unlock:
8806         if (tr)
8807                 tr->ref++;
8808
8809         mutex_unlock(&trace_types_lock);
8810         mutex_unlock(&event_mutex);
8811         return tr;
8812 }
8813 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8814
8815 static int __remove_instance(struct trace_array *tr)
8816 {
8817         int i;
8818
8819         /* Reference counter for a newly created trace array = 1. */
8820         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8821                 return -EBUSY;
8822
8823         list_del(&tr->list);
8824
8825         /* Disable all the flags that were enabled coming in */
8826         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8827                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8828                         set_tracer_flag(tr, 1 << i, 0);
8829         }
8830
8831         tracing_set_nop(tr);
8832         clear_ftrace_function_probes(tr);
8833         event_trace_del_tracer(tr);
8834         ftrace_clear_pids(tr);
8835         ftrace_destroy_function_files(tr);
8836         tracefs_remove(tr->dir);
8837         free_trace_buffers(tr);
8838
8839         for (i = 0; i < tr->nr_topts; i++) {
8840                 kfree(tr->topts[i].topts);
8841         }
8842         kfree(tr->topts);
8843
8844         free_cpumask_var(tr->tracing_cpumask);
8845         kfree(tr->name);
8846         kfree(tr);
8847
8848         return 0;
8849 }
8850
8851 int trace_array_destroy(struct trace_array *this_tr)
8852 {
8853         struct trace_array *tr;
8854         int ret;
8855
8856         if (!this_tr)
8857                 return -EINVAL;
8858
8859         mutex_lock(&event_mutex);
8860         mutex_lock(&trace_types_lock);
8861
8862         ret = -ENODEV;
8863
8864         /* Making sure trace array exists before destroying it. */
8865         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8866                 if (tr == this_tr) {
8867                         ret = __remove_instance(tr);
8868                         break;
8869                 }
8870         }
8871
8872         mutex_unlock(&trace_types_lock);
8873         mutex_unlock(&event_mutex);
8874
8875         return ret;
8876 }
8877 EXPORT_SYMBOL_GPL(trace_array_destroy);
8878
8879 static int instance_rmdir(const char *name)
8880 {
8881         struct trace_array *tr;
8882         int ret;
8883
8884         mutex_lock(&event_mutex);
8885         mutex_lock(&trace_types_lock);
8886
8887         ret = -ENODEV;
8888         tr = trace_array_find(name);
8889         if (tr)
8890                 ret = __remove_instance(tr);
8891
8892         mutex_unlock(&trace_types_lock);
8893         mutex_unlock(&event_mutex);
8894
8895         return ret;
8896 }
8897
8898 static __init void create_trace_instances(struct dentry *d_tracer)
8899 {
8900         struct trace_array *tr;
8901
8902         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8903                                                          instance_mkdir,
8904                                                          instance_rmdir);
8905         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8906                 return;
8907
8908         mutex_lock(&event_mutex);
8909         mutex_lock(&trace_types_lock);
8910
8911         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8912                 if (!tr->name)
8913                         continue;
8914                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8915                              "Failed to create instance directory\n"))
8916                         break;
8917         }
8918
8919         mutex_unlock(&trace_types_lock);
8920         mutex_unlock(&event_mutex);
8921 }
8922
8923 static void
8924 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8925 {
8926         struct trace_event_file *file;
8927         int cpu;
8928
8929         trace_create_file("available_tracers", 0444, d_tracer,
8930                         tr, &show_traces_fops);
8931
8932         trace_create_file("current_tracer", 0644, d_tracer,
8933                         tr, &set_tracer_fops);
8934
8935         trace_create_file("tracing_cpumask", 0644, d_tracer,
8936                           tr, &tracing_cpumask_fops);
8937
8938         trace_create_file("trace_options", 0644, d_tracer,
8939                           tr, &tracing_iter_fops);
8940
8941         trace_create_file("trace", 0644, d_tracer,
8942                           tr, &tracing_fops);
8943
8944         trace_create_file("trace_pipe", 0444, d_tracer,
8945                           tr, &tracing_pipe_fops);
8946
8947         trace_create_file("buffer_size_kb", 0644, d_tracer,
8948                           tr, &tracing_entries_fops);
8949
8950         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8951                           tr, &tracing_total_entries_fops);
8952
8953         trace_create_file("free_buffer", 0200, d_tracer,
8954                           tr, &tracing_free_buffer_fops);
8955
8956         trace_create_file("trace_marker", 0220, d_tracer,
8957                           tr, &tracing_mark_fops);
8958
8959         file = __find_event_file(tr, "ftrace", "print");
8960         if (file && file->dir)
8961                 trace_create_file("trigger", 0644, file->dir, file,
8962                                   &event_trigger_fops);
8963         tr->trace_marker_file = file;
8964
8965         trace_create_file("trace_marker_raw", 0220, d_tracer,
8966                           tr, &tracing_mark_raw_fops);
8967
8968         trace_create_file("trace_clock", 0644, d_tracer, tr,
8969                           &trace_clock_fops);
8970
8971         trace_create_file("tracing_on", 0644, d_tracer,
8972                           tr, &rb_simple_fops);
8973
8974         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8975                           &trace_time_stamp_mode_fops);
8976
8977         tr->buffer_percent = 50;
8978
8979         trace_create_file("buffer_percent", 0444, d_tracer,
8980                         tr, &buffer_percent_fops);
8981
8982         create_trace_options_dir(tr);
8983
8984 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8985         trace_create_maxlat_file(tr, d_tracer);
8986 #endif
8987
8988         if (ftrace_create_function_files(tr, d_tracer))
8989                 MEM_FAIL(1, "Could not allocate function filter files");
8990
8991 #ifdef CONFIG_TRACER_SNAPSHOT
8992         trace_create_file("snapshot", 0644, d_tracer,
8993                           tr, &snapshot_fops);
8994 #endif
8995
8996         trace_create_file("error_log", 0644, d_tracer,
8997                           tr, &tracing_err_log_fops);
8998
8999         for_each_tracing_cpu(cpu)
9000                 tracing_init_tracefs_percpu(tr, cpu);
9001
9002         ftrace_init_tracefs(tr, d_tracer);
9003 }
9004
9005 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9006 {
9007         struct vfsmount *mnt;
9008         struct file_system_type *type;
9009
9010         /*
9011          * To maintain backward compatibility for tools that mount
9012          * debugfs to get to the tracing facility, tracefs is automatically
9013          * mounted to the debugfs/tracing directory.
9014          */
9015         type = get_fs_type("tracefs");
9016         if (!type)
9017                 return NULL;
9018         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9019         put_filesystem(type);
9020         if (IS_ERR(mnt))
9021                 return NULL;
9022         mntget(mnt);
9023
9024         return mnt;
9025 }
9026
9027 /**
9028  * tracing_init_dentry - initialize top level trace array
9029  *
9030  * This is called when creating files or directories in the tracing
9031  * directory. It is called via fs_initcall() by any of the boot up code
9032  * and expects to return the dentry of the top level tracing directory.
9033  */
9034 int tracing_init_dentry(void)
9035 {
9036         struct trace_array *tr = &global_trace;
9037
9038         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9039                 pr_warn("Tracing disabled due to lockdown\n");
9040                 return -EPERM;
9041         }
9042
9043         /* The top level trace array uses  NULL as parent */
9044         if (tr->dir)
9045                 return 0;
9046
9047         if (WARN_ON(!tracefs_initialized()))
9048                 return -ENODEV;
9049
9050         /*
9051          * As there may still be users that expect the tracing
9052          * files to exist in debugfs/tracing, we must automount
9053          * the tracefs file system there, so older tools still
9054          * work with the newer kerenl.
9055          */
9056         tr->dir = debugfs_create_automount("tracing", NULL,
9057                                            trace_automount, NULL);
9058
9059         return 0;
9060 }
9061
9062 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9063 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9064
9065 static void __init trace_eval_init(void)
9066 {
9067         int len;
9068
9069         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9070         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9071 }
9072
9073 #ifdef CONFIG_MODULES
9074 static void trace_module_add_evals(struct module *mod)
9075 {
9076         if (!mod->num_trace_evals)
9077                 return;
9078
9079         /*
9080          * Modules with bad taint do not have events created, do
9081          * not bother with enums either.
9082          */
9083         if (trace_module_has_bad_taint(mod))
9084                 return;
9085
9086         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9087 }
9088
9089 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9090 static void trace_module_remove_evals(struct module *mod)
9091 {
9092         union trace_eval_map_item *map;
9093         union trace_eval_map_item **last = &trace_eval_maps;
9094
9095         if (!mod->num_trace_evals)
9096                 return;
9097
9098         mutex_lock(&trace_eval_mutex);
9099
9100         map = trace_eval_maps;
9101
9102         while (map) {
9103                 if (map->head.mod == mod)
9104                         break;
9105                 map = trace_eval_jmp_to_tail(map);
9106                 last = &map->tail.next;
9107                 map = map->tail.next;
9108         }
9109         if (!map)
9110                 goto out;
9111
9112         *last = trace_eval_jmp_to_tail(map)->tail.next;
9113         kfree(map);
9114  out:
9115         mutex_unlock(&trace_eval_mutex);
9116 }
9117 #else
9118 static inline void trace_module_remove_evals(struct module *mod) { }
9119 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9120
9121 static int trace_module_notify(struct notifier_block *self,
9122                                unsigned long val, void *data)
9123 {
9124         struct module *mod = data;
9125
9126         switch (val) {
9127         case MODULE_STATE_COMING:
9128                 trace_module_add_evals(mod);
9129                 break;
9130         case MODULE_STATE_GOING:
9131                 trace_module_remove_evals(mod);
9132                 break;
9133         }
9134
9135         return NOTIFY_OK;
9136 }
9137
9138 static struct notifier_block trace_module_nb = {
9139         .notifier_call = trace_module_notify,
9140         .priority = 0,
9141 };
9142 #endif /* CONFIG_MODULES */
9143
9144 static __init int tracer_init_tracefs(void)
9145 {
9146         int ret;
9147
9148         trace_access_lock_init();
9149
9150         ret = tracing_init_dentry();
9151         if (ret)
9152                 return 0;
9153
9154         event_trace_init();
9155
9156         init_tracer_tracefs(&global_trace, NULL);
9157         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9158
9159         trace_create_file("tracing_thresh", 0644, NULL,
9160                         &global_trace, &tracing_thresh_fops);
9161
9162         trace_create_file("README", 0444, NULL,
9163                         NULL, &tracing_readme_fops);
9164
9165         trace_create_file("saved_cmdlines", 0444, NULL,
9166                         NULL, &tracing_saved_cmdlines_fops);
9167
9168         trace_create_file("saved_cmdlines_size", 0644, NULL,
9169                           NULL, &tracing_saved_cmdlines_size_fops);
9170
9171         trace_create_file("saved_tgids", 0444, NULL,
9172                         NULL, &tracing_saved_tgids_fops);
9173
9174         trace_eval_init();
9175
9176         trace_create_eval_file(NULL);
9177
9178 #ifdef CONFIG_MODULES
9179         register_module_notifier(&trace_module_nb);
9180 #endif
9181
9182 #ifdef CONFIG_DYNAMIC_FTRACE
9183         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9184                         NULL, &tracing_dyn_info_fops);
9185 #endif
9186
9187         create_trace_instances(NULL);
9188
9189         update_tracer_options(&global_trace);
9190
9191         return 0;
9192 }
9193
9194 static int trace_panic_handler(struct notifier_block *this,
9195                                unsigned long event, void *unused)
9196 {
9197         if (ftrace_dump_on_oops)
9198                 ftrace_dump(ftrace_dump_on_oops);
9199         return NOTIFY_OK;
9200 }
9201
9202 static struct notifier_block trace_panic_notifier = {
9203         .notifier_call  = trace_panic_handler,
9204         .next           = NULL,
9205         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9206 };
9207
9208 static int trace_die_handler(struct notifier_block *self,
9209                              unsigned long val,
9210                              void *data)
9211 {
9212         switch (val) {
9213         case DIE_OOPS:
9214                 if (ftrace_dump_on_oops)
9215                         ftrace_dump(ftrace_dump_on_oops);
9216                 break;
9217         default:
9218                 break;
9219         }
9220         return NOTIFY_OK;
9221 }
9222
9223 static struct notifier_block trace_die_notifier = {
9224         .notifier_call = trace_die_handler,
9225         .priority = 200
9226 };
9227
9228 /*
9229  * printk is set to max of 1024, we really don't need it that big.
9230  * Nothing should be printing 1000 characters anyway.
9231  */
9232 #define TRACE_MAX_PRINT         1000
9233
9234 /*
9235  * Define here KERN_TRACE so that we have one place to modify
9236  * it if we decide to change what log level the ftrace dump
9237  * should be at.
9238  */
9239 #define KERN_TRACE              KERN_EMERG
9240
9241 void
9242 trace_printk_seq(struct trace_seq *s)
9243 {
9244         /* Probably should print a warning here. */
9245         if (s->seq.len >= TRACE_MAX_PRINT)
9246                 s->seq.len = TRACE_MAX_PRINT;
9247
9248         /*
9249          * More paranoid code. Although the buffer size is set to
9250          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9251          * an extra layer of protection.
9252          */
9253         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9254                 s->seq.len = s->seq.size - 1;
9255
9256         /* should be zero ended, but we are paranoid. */
9257         s->buffer[s->seq.len] = 0;
9258
9259         printk(KERN_TRACE "%s", s->buffer);
9260
9261         trace_seq_init(s);
9262 }
9263
9264 void trace_init_global_iter(struct trace_iterator *iter)
9265 {
9266         iter->tr = &global_trace;
9267         iter->trace = iter->tr->current_trace;
9268         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9269         iter->array_buffer = &global_trace.array_buffer;
9270
9271         if (iter->trace && iter->trace->open)
9272                 iter->trace->open(iter);
9273
9274         /* Annotate start of buffers if we had overruns */
9275         if (ring_buffer_overruns(iter->array_buffer->buffer))
9276                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9277
9278         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9279         if (trace_clocks[iter->tr->clock_id].in_ns)
9280                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9281 }
9282
9283 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9284 {
9285         /* use static because iter can be a bit big for the stack */
9286         static struct trace_iterator iter;
9287         static atomic_t dump_running;
9288         struct trace_array *tr = &global_trace;
9289         unsigned int old_userobj;
9290         unsigned long flags;
9291         int cnt = 0, cpu;
9292
9293         /* Only allow one dump user at a time. */
9294         if (atomic_inc_return(&dump_running) != 1) {
9295                 atomic_dec(&dump_running);
9296                 return;
9297         }
9298
9299         /*
9300          * Always turn off tracing when we dump.
9301          * We don't need to show trace output of what happens
9302          * between multiple crashes.
9303          *
9304          * If the user does a sysrq-z, then they can re-enable
9305          * tracing with echo 1 > tracing_on.
9306          */
9307         tracing_off();
9308
9309         local_irq_save(flags);
9310         printk_nmi_direct_enter();
9311
9312         /* Simulate the iterator */
9313         trace_init_global_iter(&iter);
9314         /* Can not use kmalloc for iter.temp */
9315         iter.temp = static_temp_buf;
9316         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9317
9318         for_each_tracing_cpu(cpu) {
9319                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9320         }
9321
9322         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9323
9324         /* don't look at user memory in panic mode */
9325         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9326
9327         switch (oops_dump_mode) {
9328         case DUMP_ALL:
9329                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9330                 break;
9331         case DUMP_ORIG:
9332                 iter.cpu_file = raw_smp_processor_id();
9333                 break;
9334         case DUMP_NONE:
9335                 goto out_enable;
9336         default:
9337                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9338                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9339         }
9340
9341         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9342
9343         /* Did function tracer already get disabled? */
9344         if (ftrace_is_dead()) {
9345                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9346                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9347         }
9348
9349         /*
9350          * We need to stop all tracing on all CPUS to read
9351          * the next buffer. This is a bit expensive, but is
9352          * not done often. We fill all what we can read,
9353          * and then release the locks again.
9354          */
9355
9356         while (!trace_empty(&iter)) {
9357
9358                 if (!cnt)
9359                         printk(KERN_TRACE "---------------------------------\n");
9360
9361                 cnt++;
9362
9363                 trace_iterator_reset(&iter);
9364                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9365
9366                 if (trace_find_next_entry_inc(&iter) != NULL) {
9367                         int ret;
9368
9369                         ret = print_trace_line(&iter);
9370                         if (ret != TRACE_TYPE_NO_CONSUME)
9371                                 trace_consume(&iter);
9372                 }
9373                 touch_nmi_watchdog();
9374
9375                 trace_printk_seq(&iter.seq);
9376         }
9377
9378         if (!cnt)
9379                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9380         else
9381                 printk(KERN_TRACE "---------------------------------\n");
9382
9383  out_enable:
9384         tr->trace_flags |= old_userobj;
9385
9386         for_each_tracing_cpu(cpu) {
9387                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9388         }
9389         atomic_dec(&dump_running);
9390         printk_nmi_direct_exit();
9391         local_irq_restore(flags);
9392 }
9393 EXPORT_SYMBOL_GPL(ftrace_dump);
9394
9395 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9396 {
9397         char **argv;
9398         int argc, ret;
9399
9400         argc = 0;
9401         ret = 0;
9402         argv = argv_split(GFP_KERNEL, buf, &argc);
9403         if (!argv)
9404                 return -ENOMEM;
9405
9406         if (argc)
9407                 ret = createfn(argc, argv);
9408
9409         argv_free(argv);
9410
9411         return ret;
9412 }
9413
9414 #define WRITE_BUFSIZE  4096
9415
9416 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9417                                 size_t count, loff_t *ppos,
9418                                 int (*createfn)(int, char **))
9419 {
9420         char *kbuf, *buf, *tmp;
9421         int ret = 0;
9422         size_t done = 0;
9423         size_t size;
9424
9425         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9426         if (!kbuf)
9427                 return -ENOMEM;
9428
9429         while (done < count) {
9430                 size = count - done;
9431
9432                 if (size >= WRITE_BUFSIZE)
9433                         size = WRITE_BUFSIZE - 1;
9434
9435                 if (copy_from_user(kbuf, buffer + done, size)) {
9436                         ret = -EFAULT;
9437                         goto out;
9438                 }
9439                 kbuf[size] = '\0';
9440                 buf = kbuf;
9441                 do {
9442                         tmp = strchr(buf, '\n');
9443                         if (tmp) {
9444                                 *tmp = '\0';
9445                                 size = tmp - buf + 1;
9446                         } else {
9447                                 size = strlen(buf);
9448                                 if (done + size < count) {
9449                                         if (buf != kbuf)
9450                                                 break;
9451                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9452                                         pr_warn("Line length is too long: Should be less than %d\n",
9453                                                 WRITE_BUFSIZE - 2);
9454                                         ret = -EINVAL;
9455                                         goto out;
9456                                 }
9457                         }
9458                         done += size;
9459
9460                         /* Remove comments */
9461                         tmp = strchr(buf, '#');
9462
9463                         if (tmp)
9464                                 *tmp = '\0';
9465
9466                         ret = trace_run_command(buf, createfn);
9467                         if (ret)
9468                                 goto out;
9469                         buf += size;
9470
9471                 } while (done < count);
9472         }
9473         ret = done;
9474
9475 out:
9476         kfree(kbuf);
9477
9478         return ret;
9479 }
9480
9481 __init static int tracer_alloc_buffers(void)
9482 {
9483         int ring_buf_size;
9484         int ret = -ENOMEM;
9485
9486
9487         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9488                 pr_warn("Tracing disabled due to lockdown\n");
9489                 return -EPERM;
9490         }
9491
9492         /*
9493          * Make sure we don't accidentally add more trace options
9494          * than we have bits for.
9495          */
9496         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9497
9498         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9499                 goto out;
9500
9501         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9502                 goto out_free_buffer_mask;
9503
9504         /* Only allocate trace_printk buffers if a trace_printk exists */
9505         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9506                 /* Must be called before global_trace.buffer is allocated */
9507                 trace_printk_init_buffers();
9508
9509         /* To save memory, keep the ring buffer size to its minimum */
9510         if (ring_buffer_expanded)
9511                 ring_buf_size = trace_buf_size;
9512         else
9513                 ring_buf_size = 1;
9514
9515         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9516         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9517
9518         raw_spin_lock_init(&global_trace.start_lock);
9519
9520         /*
9521          * The prepare callbacks allocates some memory for the ring buffer. We
9522          * don't free the buffer if the CPU goes down. If we were to free
9523          * the buffer, then the user would lose any trace that was in the
9524          * buffer. The memory will be removed once the "instance" is removed.
9525          */
9526         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9527                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9528                                       NULL);
9529         if (ret < 0)
9530                 goto out_free_cpumask;
9531         /* Used for event triggers */
9532         ret = -ENOMEM;
9533         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9534         if (!temp_buffer)
9535                 goto out_rm_hp_state;
9536
9537         if (trace_create_savedcmd() < 0)
9538                 goto out_free_temp_buffer;
9539
9540         /* TODO: make the number of buffers hot pluggable with CPUS */
9541         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9542                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9543                 goto out_free_savedcmd;
9544         }
9545
9546         if (global_trace.buffer_disabled)
9547                 tracing_off();
9548
9549         if (trace_boot_clock) {
9550                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9551                 if (ret < 0)
9552                         pr_warn("Trace clock %s not defined, going back to default\n",
9553                                 trace_boot_clock);
9554         }
9555
9556         /*
9557          * register_tracer() might reference current_trace, so it
9558          * needs to be set before we register anything. This is
9559          * just a bootstrap of current_trace anyway.
9560          */
9561         global_trace.current_trace = &nop_trace;
9562
9563         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9564
9565         ftrace_init_global_array_ops(&global_trace);
9566
9567         init_trace_flags_index(&global_trace);
9568
9569         register_tracer(&nop_trace);
9570
9571         /* Function tracing may start here (via kernel command line) */
9572         init_function_trace();
9573
9574         /* All seems OK, enable tracing */
9575         tracing_disabled = 0;
9576
9577         atomic_notifier_chain_register(&panic_notifier_list,
9578                                        &trace_panic_notifier);
9579
9580         register_die_notifier(&trace_die_notifier);
9581
9582         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9583
9584         INIT_LIST_HEAD(&global_trace.systems);
9585         INIT_LIST_HEAD(&global_trace.events);
9586         INIT_LIST_HEAD(&global_trace.hist_vars);
9587         INIT_LIST_HEAD(&global_trace.err_log);
9588         list_add(&global_trace.list, &ftrace_trace_arrays);
9589
9590         apply_trace_boot_options();
9591
9592         register_snapshot_cmd();
9593
9594         return 0;
9595
9596 out_free_savedcmd:
9597         free_saved_cmdlines_buffer(savedcmd);
9598 out_free_temp_buffer:
9599         ring_buffer_free(temp_buffer);
9600 out_rm_hp_state:
9601         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9602 out_free_cpumask:
9603         free_cpumask_var(global_trace.tracing_cpumask);
9604 out_free_buffer_mask:
9605         free_cpumask_var(tracing_buffer_mask);
9606 out:
9607         return ret;
9608 }
9609
9610 void __init early_trace_init(void)
9611 {
9612         if (tracepoint_printk) {
9613                 tracepoint_print_iter =
9614                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9615                 if (MEM_FAIL(!tracepoint_print_iter,
9616                              "Failed to allocate trace iterator\n"))
9617                         tracepoint_printk = 0;
9618                 else
9619                         static_key_enable(&tracepoint_printk_key.key);
9620         }
9621         tracer_alloc_buffers();
9622 }
9623
9624 void __init trace_init(void)
9625 {
9626         trace_event_init();
9627 }
9628
9629 __init static int clear_boot_tracer(void)
9630 {
9631         /*
9632          * The default tracer at boot buffer is an init section.
9633          * This function is called in lateinit. If we did not
9634          * find the boot tracer, then clear it out, to prevent
9635          * later registration from accessing the buffer that is
9636          * about to be freed.
9637          */
9638         if (!default_bootup_tracer)
9639                 return 0;
9640
9641         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9642                default_bootup_tracer);
9643         default_bootup_tracer = NULL;
9644
9645         return 0;
9646 }
9647
9648 fs_initcall(tracer_init_tracefs);
9649 late_initcall_sync(clear_boot_tracer);
9650
9651 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9652 __init static int tracing_set_default_clock(void)
9653 {
9654         /* sched_clock_stable() is determined in late_initcall */
9655         if (!trace_boot_clock && !sched_clock_stable()) {
9656                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9657                         pr_warn("Can not set tracing clock due to lockdown\n");
9658                         return -EPERM;
9659                 }
9660
9661                 printk(KERN_WARNING
9662                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9663                        "If you want to keep using the local clock, then add:\n"
9664                        "  \"trace_clock=local\"\n"
9665                        "on the kernel command line\n");
9666                 tracing_set_clock(&global_trace, "global");
9667         }
9668
9669         return 0;
9670 }
9671 late_initcall_sync(tracing_set_default_clock);
9672 #endif