Merge tag 'sound-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 static void
255 trace_process_export(struct trace_export *export,
256                struct ring_buffer_event *event, int flag)
257 {
258         struct trace_entry *entry;
259         unsigned int size = 0;
260
261         if (export->flags & flag) {
262                 entry = ring_buffer_event_data(event);
263                 size = ring_buffer_event_length(event);
264                 export->write(export, entry, size);
265         }
266 }
267
268 static DEFINE_MUTEX(ftrace_export_lock);
269
270 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
271
272 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
273 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
275
276 static inline void ftrace_exports_enable(struct trace_export *export)
277 {
278         if (export->flags & TRACE_EXPORT_FUNCTION)
279                 static_branch_inc(&trace_function_exports_enabled);
280
281         if (export->flags & TRACE_EXPORT_EVENT)
282                 static_branch_inc(&trace_event_exports_enabled);
283
284         if (export->flags & TRACE_EXPORT_MARKER)
285                 static_branch_inc(&trace_marker_exports_enabled);
286 }
287
288 static inline void ftrace_exports_disable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_dec(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_dec(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_dec(&trace_marker_exports_enabled);
298 }
299
300 static void ftrace_exports(struct ring_buffer_event *event, int flag)
301 {
302         struct trace_export *export;
303
304         preempt_disable_notrace();
305
306         export = rcu_dereference_raw_check(ftrace_exports_list);
307         while (export) {
308                 trace_process_export(export, event, flag);
309                 export = rcu_dereference_raw_check(export->next);
310         }
311
312         preempt_enable_notrace();
313 }
314
315 static inline void
316 add_trace_export(struct trace_export **list, struct trace_export *export)
317 {
318         rcu_assign_pointer(export->next, *list);
319         /*
320          * We are entering export into the list but another
321          * CPU might be walking that list. We need to make sure
322          * the export->next pointer is valid before another CPU sees
323          * the export pointer included into the list.
324          */
325         rcu_assign_pointer(*list, export);
326 }
327
328 static inline int
329 rm_trace_export(struct trace_export **list, struct trace_export *export)
330 {
331         struct trace_export **p;
332
333         for (p = list; *p != NULL; p = &(*p)->next)
334                 if (*p == export)
335                         break;
336
337         if (*p != export)
338                 return -1;
339
340         rcu_assign_pointer(*p, (*p)->next);
341
342         return 0;
343 }
344
345 static inline void
346 add_ftrace_export(struct trace_export **list, struct trace_export *export)
347 {
348         ftrace_exports_enable(export);
349
350         add_trace_export(list, export);
351 }
352
353 static inline int
354 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
355 {
356         int ret;
357
358         ret = rm_trace_export(list, export);
359         ftrace_exports_disable(export);
360
361         return ret;
362 }
363
364 int register_ftrace_export(struct trace_export *export)
365 {
366         if (WARN_ON_ONCE(!export->write))
367                 return -1;
368
369         mutex_lock(&ftrace_export_lock);
370
371         add_ftrace_export(&ftrace_exports_list, export);
372
373         mutex_unlock(&ftrace_export_lock);
374
375         return 0;
376 }
377 EXPORT_SYMBOL_GPL(register_ftrace_export);
378
379 int unregister_ftrace_export(struct trace_export *export)
380 {
381         int ret;
382
383         mutex_lock(&ftrace_export_lock);
384
385         ret = rm_ftrace_export(&ftrace_exports_list, export);
386
387         mutex_unlock(&ftrace_export_lock);
388
389         return ret;
390 }
391 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
392
393 /* trace_flags holds trace_options default values */
394 #define TRACE_DEFAULT_FLAGS                                             \
395         (FUNCTION_DEFAULT_FLAGS |                                       \
396          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
397          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
398          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
399          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
400
401 /* trace_options that are only supported by global_trace */
402 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
403                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
404
405 /* trace_flags that are default zero for instances */
406 #define ZEROED_TRACE_FLAGS \
407         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
408
409 /*
410  * The global_trace is the descriptor that holds the top-level tracing
411  * buffers for the live tracing.
412  */
413 static struct trace_array global_trace = {
414         .trace_flags = TRACE_DEFAULT_FLAGS,
415 };
416
417 LIST_HEAD(ftrace_trace_arrays);
418
419 int trace_array_get(struct trace_array *this_tr)
420 {
421         struct trace_array *tr;
422         int ret = -ENODEV;
423
424         mutex_lock(&trace_types_lock);
425         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
426                 if (tr == this_tr) {
427                         tr->ref++;
428                         ret = 0;
429                         break;
430                 }
431         }
432         mutex_unlock(&trace_types_lock);
433
434         return ret;
435 }
436
437 static void __trace_array_put(struct trace_array *this_tr)
438 {
439         WARN_ON(!this_tr->ref);
440         this_tr->ref--;
441 }
442
443 /**
444  * trace_array_put - Decrement the reference counter for this trace array.
445  *
446  * NOTE: Use this when we no longer need the trace array returned by
447  * trace_array_get_by_name(). This ensures the trace array can be later
448  * destroyed.
449  *
450  */
451 void trace_array_put(struct trace_array *this_tr)
452 {
453         if (!this_tr)
454                 return;
455
456         mutex_lock(&trace_types_lock);
457         __trace_array_put(this_tr);
458         mutex_unlock(&trace_types_lock);
459 }
460 EXPORT_SYMBOL_GPL(trace_array_put);
461
462 int tracing_check_open_get_tr(struct trace_array *tr)
463 {
464         int ret;
465
466         ret = security_locked_down(LOCKDOWN_TRACEFS);
467         if (ret)
468                 return ret;
469
470         if (tracing_disabled)
471                 return -ENODEV;
472
473         if (tr && trace_array_get(tr) < 0)
474                 return -ENODEV;
475
476         return 0;
477 }
478
479 int call_filter_check_discard(struct trace_event_call *call, void *rec,
480                               struct trace_buffer *buffer,
481                               struct ring_buffer_event *event)
482 {
483         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
484             !filter_match_preds(call->filter, rec)) {
485                 __trace_event_discard_commit(buffer, event);
486                 return 1;
487         }
488
489         return 0;
490 }
491
492 void trace_free_pid_list(struct trace_pid_list *pid_list)
493 {
494         vfree(pid_list->pids);
495         kfree(pid_list);
496 }
497
498 /**
499  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
500  * @filtered_pids: The list of pids to check
501  * @search_pid: The PID to find in @filtered_pids
502  *
503  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
504  */
505 bool
506 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
507 {
508         /*
509          * If pid_max changed after filtered_pids was created, we
510          * by default ignore all pids greater than the previous pid_max.
511          */
512         if (search_pid >= filtered_pids->pid_max)
513                 return false;
514
515         return test_bit(search_pid, filtered_pids->pids);
516 }
517
518 /**
519  * trace_ignore_this_task - should a task be ignored for tracing
520  * @filtered_pids: The list of pids to check
521  * @task: The task that should be ignored if not filtered
522  *
523  * Checks if @task should be traced or not from @filtered_pids.
524  * Returns true if @task should *NOT* be traced.
525  * Returns false if @task should be traced.
526  */
527 bool
528 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
529                        struct trace_pid_list *filtered_no_pids,
530                        struct task_struct *task)
531 {
532         /*
533          * If filterd_no_pids is not empty, and the task's pid is listed
534          * in filtered_no_pids, then return true.
535          * Otherwise, if filtered_pids is empty, that means we can
536          * trace all tasks. If it has content, then only trace pids
537          * within filtered_pids.
538          */
539
540         return (filtered_pids &&
541                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
542                 (filtered_no_pids &&
543                  trace_find_filtered_pid(filtered_no_pids, task->pid));
544 }
545
546 /**
547  * trace_filter_add_remove_task - Add or remove a task from a pid_list
548  * @pid_list: The list to modify
549  * @self: The current task for fork or NULL for exit
550  * @task: The task to add or remove
551  *
552  * If adding a task, if @self is defined, the task is only added if @self
553  * is also included in @pid_list. This happens on fork and tasks should
554  * only be added when the parent is listed. If @self is NULL, then the
555  * @task pid will be removed from the list, which would happen on exit
556  * of a task.
557  */
558 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
559                                   struct task_struct *self,
560                                   struct task_struct *task)
561 {
562         if (!pid_list)
563                 return;
564
565         /* For forks, we only add if the forking task is listed */
566         if (self) {
567                 if (!trace_find_filtered_pid(pid_list, self->pid))
568                         return;
569         }
570
571         /* Sorry, but we don't support pid_max changing after setting */
572         if (task->pid >= pid_list->pid_max)
573                 return;
574
575         /* "self" is set for forks, and NULL for exits */
576         if (self)
577                 set_bit(task->pid, pid_list->pids);
578         else
579                 clear_bit(task->pid, pid_list->pids);
580 }
581
582 /**
583  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
584  * @pid_list: The pid list to show
585  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
586  * @pos: The position of the file
587  *
588  * This is used by the seq_file "next" operation to iterate the pids
589  * listed in a trace_pid_list structure.
590  *
591  * Returns the pid+1 as we want to display pid of zero, but NULL would
592  * stop the iteration.
593  */
594 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
595 {
596         unsigned long pid = (unsigned long)v;
597
598         (*pos)++;
599
600         /* pid already is +1 of the actual prevous bit */
601         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
602
603         /* Return pid + 1 to allow zero to be represented */
604         if (pid < pid_list->pid_max)
605                 return (void *)(pid + 1);
606
607         return NULL;
608 }
609
610 /**
611  * trace_pid_start - Used for seq_file to start reading pid lists
612  * @pid_list: The pid list to show
613  * @pos: The position of the file
614  *
615  * This is used by seq_file "start" operation to start the iteration
616  * of listing pids.
617  *
618  * Returns the pid+1 as we want to display pid of zero, but NULL would
619  * stop the iteration.
620  */
621 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
622 {
623         unsigned long pid;
624         loff_t l = 0;
625
626         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
627         if (pid >= pid_list->pid_max)
628                 return NULL;
629
630         /* Return pid + 1 so that zero can be the exit value */
631         for (pid++; pid && l < *pos;
632              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
633                 ;
634         return (void *)pid;
635 }
636
637 /**
638  * trace_pid_show - show the current pid in seq_file processing
639  * @m: The seq_file structure to write into
640  * @v: A void pointer of the pid (+1) value to display
641  *
642  * Can be directly used by seq_file operations to display the current
643  * pid value.
644  */
645 int trace_pid_show(struct seq_file *m, void *v)
646 {
647         unsigned long pid = (unsigned long)v - 1;
648
649         seq_printf(m, "%lu\n", pid);
650         return 0;
651 }
652
653 /* 128 should be much more than enough */
654 #define PID_BUF_SIZE            127
655
656 int trace_pid_write(struct trace_pid_list *filtered_pids,
657                     struct trace_pid_list **new_pid_list,
658                     const char __user *ubuf, size_t cnt)
659 {
660         struct trace_pid_list *pid_list;
661         struct trace_parser parser;
662         unsigned long val;
663         int nr_pids = 0;
664         ssize_t read = 0;
665         ssize_t ret = 0;
666         loff_t pos;
667         pid_t pid;
668
669         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
670                 return -ENOMEM;
671
672         /*
673          * Always recreate a new array. The write is an all or nothing
674          * operation. Always create a new array when adding new pids by
675          * the user. If the operation fails, then the current list is
676          * not modified.
677          */
678         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
679         if (!pid_list) {
680                 trace_parser_put(&parser);
681                 return -ENOMEM;
682         }
683
684         pid_list->pid_max = READ_ONCE(pid_max);
685
686         /* Only truncating will shrink pid_max */
687         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
688                 pid_list->pid_max = filtered_pids->pid_max;
689
690         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
691         if (!pid_list->pids) {
692                 trace_parser_put(&parser);
693                 kfree(pid_list);
694                 return -ENOMEM;
695         }
696
697         if (filtered_pids) {
698                 /* copy the current bits to the new max */
699                 for_each_set_bit(pid, filtered_pids->pids,
700                                  filtered_pids->pid_max) {
701                         set_bit(pid, pid_list->pids);
702                         nr_pids++;
703                 }
704         }
705
706         while (cnt > 0) {
707
708                 pos = 0;
709
710                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
711                 if (ret < 0 || !trace_parser_loaded(&parser))
712                         break;
713
714                 read += ret;
715                 ubuf += ret;
716                 cnt -= ret;
717
718                 ret = -EINVAL;
719                 if (kstrtoul(parser.buffer, 0, &val))
720                         break;
721                 if (val >= pid_list->pid_max)
722                         break;
723
724                 pid = (pid_t)val;
725
726                 set_bit(pid, pid_list->pids);
727                 nr_pids++;
728
729                 trace_parser_clear(&parser);
730                 ret = 0;
731         }
732         trace_parser_put(&parser);
733
734         if (ret < 0) {
735                 trace_free_pid_list(pid_list);
736                 return ret;
737         }
738
739         if (!nr_pids) {
740                 /* Cleared the list of pids */
741                 trace_free_pid_list(pid_list);
742                 read = ret;
743                 pid_list = NULL;
744         }
745
746         *new_pid_list = pid_list;
747
748         return read;
749 }
750
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753         u64 ts;
754
755         /* Early boot up does not have a buffer yet */
756         if (!buf->buffer)
757                 return trace_clock_local();
758
759         ts = ring_buffer_time_stamp(buf->buffer, cpu);
760         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761
762         return ts;
763 }
764
765 u64 ftrace_now(int cpu)
766 {
767         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769
770 /**
771  * tracing_is_enabled - Show if global_trace has been disabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781         /*
782          * For quick access (irqsoff uses this in fast path), just
783          * return the mirror variable of the state of the ring buffer.
784          * It's a little racy, but we don't really care.
785          */
786         smp_rmb();
787         return !global_trace.buffer_disabled;
788 }
789
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
801
802 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer            *trace_types __read_mostly;
806
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewrited
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837
838 static inline void trace_access_lock(int cpu)
839 {
840         if (cpu == RING_BUFFER_ALL_CPUS) {
841                 /* gain it for accessing the whole ring buffer. */
842                 down_write(&all_cpu_access_lock);
843         } else {
844                 /* gain it for accessing a cpu ring buffer. */
845
846                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847                 down_read(&all_cpu_access_lock);
848
849                 /* Secondly block other access to this @cpu ring buffer. */
850                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
851         }
852 }
853
854 static inline void trace_access_unlock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 up_write(&all_cpu_access_lock);
858         } else {
859                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860                 up_read(&all_cpu_access_lock);
861         }
862 }
863
864 static inline void trace_access_lock_init(void)
865 {
866         int cpu;
867
868         for_each_possible_cpu(cpu)
869                 mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871
872 #else
873
874 static DEFINE_MUTEX(access_lock);
875
876 static inline void trace_access_lock(int cpu)
877 {
878         (void)cpu;
879         mutex_lock(&access_lock);
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884         (void)cpu;
885         mutex_unlock(&access_lock);
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890 }
891
892 #endif
893
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896                                  unsigned long flags,
897                                  int skip, int pc, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899                                       struct trace_buffer *buffer,
900                                       unsigned long flags,
901                                       int skip, int pc, struct pt_regs *regs);
902
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905                                         unsigned long flags,
906                                         int skip, int pc, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910                                       struct trace_buffer *buffer,
911                                       unsigned long flags,
912                                       int skip, int pc, struct pt_regs *regs)
913 {
914 }
915
916 #endif
917
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920                   int type, unsigned long flags, int pc)
921 {
922         struct trace_entry *ent = ring_buffer_event_data(event);
923
924         tracing_generic_entry_update(ent, type, flags, pc);
925 }
926
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929                           int type,
930                           unsigned long len,
931                           unsigned long flags, int pc)
932 {
933         struct ring_buffer_event *event;
934
935         event = ring_buffer_lock_reserve(buffer, len);
936         if (event != NULL)
937                 trace_event_setup(event, type, flags, pc);
938
939         return event;
940 }
941
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944         if (tr->array_buffer.buffer)
945                 ring_buffer_record_on(tr->array_buffer.buffer);
946         /*
947          * This flag is looked at when buffers haven't been allocated
948          * yet, or by some tracers (like irqsoff), that just want to
949          * know if the ring buffer has been disabled, but it can handle
950          * races of where it gets disabled but we still do a record.
951          * As the check is in the fast path of the tracers, it is more
952          * important to be fast than accurate.
953          */
954         tr->buffer_disabled = 0;
955         /* Make the flag seen by readers */
956         smp_wmb();
957 }
958
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967         tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970
971
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975         __this_cpu_write(trace_taskinfo_save, true);
976
977         /* If this is the temp buffer, we need to commit fully */
978         if (this_cpu_read(trace_buffered_event) == event) {
979                 /* Length is in event->array[0] */
980                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
981                 /* Release the temp buffer */
982                 this_cpu_dec(trace_buffered_event_cnt);
983         } else
984                 ring_buffer_unlock_commit(buffer, event);
985 }
986
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:    The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995         struct ring_buffer_event *event;
996         struct trace_buffer *buffer;
997         struct print_entry *entry;
998         unsigned long irq_flags;
999         int alloc;
1000         int pc;
1001
1002         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1003                 return 0;
1004
1005         pc = preempt_count();
1006
1007         if (unlikely(tracing_selftest_running || tracing_disabled))
1008                 return 0;
1009
1010         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011
1012         local_save_flags(irq_flags);
1013         buffer = global_trace.array_buffer.buffer;
1014         ring_buffer_nest_start(buffer);
1015         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1016                                             irq_flags, pc);
1017         if (!event) {
1018                 size = 0;
1019                 goto out;
1020         }
1021
1022         entry = ring_buffer_event_data(event);
1023         entry->ip = ip;
1024
1025         memcpy(&entry->buf, str, size);
1026
1027         /* Add a newline if necessary */
1028         if (entry->buf[size - 1] != '\n') {
1029                 entry->buf[size] = '\n';
1030                 entry->buf[size + 1] = '\0';
1031         } else
1032                 entry->buf[size] = '\0';
1033
1034         __buffer_unlock_commit(buffer, event);
1035         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1036  out:
1037         ring_buffer_nest_end(buffer);
1038         return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_puts);
1041
1042 /**
1043  * __trace_bputs - write the pointer to a constant string into trace buffer
1044  * @ip:    The address of the caller
1045  * @str:   The constant string to write to the buffer to
1046  */
1047 int __trace_bputs(unsigned long ip, const char *str)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct bputs_entry *entry;
1052         unsigned long irq_flags;
1053         int size = sizeof(struct bputs_entry);
1054         int ret = 0;
1055         int pc;
1056
1057         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1058                 return 0;
1059
1060         pc = preempt_count();
1061
1062         if (unlikely(tracing_selftest_running || tracing_disabled))
1063                 return 0;
1064
1065         local_save_flags(irq_flags);
1066         buffer = global_trace.array_buffer.buffer;
1067
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070                                             irq_flags, pc);
1071         if (!event)
1072                 goto out;
1073
1074         entry = ring_buffer_event_data(event);
1075         entry->ip                       = ip;
1076         entry->str                      = str;
1077
1078         __buffer_unlock_commit(buffer, event);
1079         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1080
1081         ret = 1;
1082  out:
1083         ring_buffer_nest_end(buffer);
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090                                            void *cond_data)
1091 {
1092         struct tracer *tracer = tr->current_trace;
1093         unsigned long flags;
1094
1095         if (in_nmi()) {
1096                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1098                 return;
1099         }
1100
1101         if (!tr->allocated_snapshot) {
1102                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103                 internal_trace_puts("*** stopping trace here!   ***\n");
1104                 tracing_off();
1105                 return;
1106         }
1107
1108         /* Note, snapshot can not be used when the tracer uses it */
1109         if (tracer->use_max_tr) {
1110                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112                 return;
1113         }
1114
1115         local_irq_save(flags);
1116         update_max_tr(tr, current, smp_processor_id(), cond_data);
1117         local_irq_restore(flags);
1118 }
1119
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122         tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141         struct trace_array *tr = &global_trace;
1142
1143         tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:         The tracing instance to snapshot
1150  * @cond_data:  The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162         tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:         The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182         void *cond_data = NULL;
1183
1184         arch_spin_lock(&tr->max_lock);
1185
1186         if (tr->cond_snapshot)
1187                 cond_data = tr->cond_snapshot->cond_data;
1188
1189         arch_spin_unlock(&tr->max_lock);
1190
1191         return cond_data;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1194
1195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1196                                         struct array_buffer *size_buf, int cpu_id);
1197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1198
1199 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1200 {
1201         int ret;
1202
1203         if (!tr->allocated_snapshot) {
1204
1205                 /* allocate spare buffer */
1206                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1207                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1208                 if (ret < 0)
1209                         return ret;
1210
1211                 tr->allocated_snapshot = true;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static void free_snapshot(struct trace_array *tr)
1218 {
1219         /*
1220          * We don't free the ring buffer. instead, resize it because
1221          * The max_tr ring buffer has some state (e.g. ring->clock) and
1222          * we want preserve it.
1223          */
1224         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1225         set_buffer_entries(&tr->max_buffer, 1);
1226         tracing_reset_online_cpus(&tr->max_buffer);
1227         tr->allocated_snapshot = false;
1228 }
1229
1230 /**
1231  * tracing_alloc_snapshot - allocate snapshot buffer.
1232  *
1233  * This only allocates the snapshot buffer if it isn't already
1234  * allocated - it doesn't also take a snapshot.
1235  *
1236  * This is meant to be used in cases where the snapshot buffer needs
1237  * to be set up for events that can't sleep but need to be able to
1238  * trigger a snapshot.
1239  */
1240 int tracing_alloc_snapshot(void)
1241 {
1242         struct trace_array *tr = &global_trace;
1243         int ret;
1244
1245         ret = tracing_alloc_snapshot_instance(tr);
1246         WARN_ON(ret < 0);
1247
1248         return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1251
1252 /**
1253  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1254  *
1255  * This is similar to tracing_snapshot(), but it will allocate the
1256  * snapshot buffer if it isn't already allocated. Use this only
1257  * where it is safe to sleep, as the allocation may sleep.
1258  *
1259  * This causes a swap between the snapshot buffer and the current live
1260  * tracing buffer. You can use this to take snapshots of the live
1261  * trace when some condition is triggered, but continue to trace.
1262  */
1263 void tracing_snapshot_alloc(void)
1264 {
1265         int ret;
1266
1267         ret = tracing_alloc_snapshot();
1268         if (ret < 0)
1269                 return;
1270
1271         tracing_snapshot();
1272 }
1273 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1274
1275 /**
1276  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1277  * @tr:         The tracing instance
1278  * @cond_data:  User data to associate with the snapshot
1279  * @update:     Implementation of the cond_snapshot update function
1280  *
1281  * Check whether the conditional snapshot for the given instance has
1282  * already been enabled, or if the current tracer is already using a
1283  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1284  * save the cond_data and update function inside.
1285  *
1286  * Returns 0 if successful, error otherwise.
1287  */
1288 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1289                                  cond_update_fn_t update)
1290 {
1291         struct cond_snapshot *cond_snapshot;
1292         int ret = 0;
1293
1294         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1295         if (!cond_snapshot)
1296                 return -ENOMEM;
1297
1298         cond_snapshot->cond_data = cond_data;
1299         cond_snapshot->update = update;
1300
1301         mutex_lock(&trace_types_lock);
1302
1303         ret = tracing_alloc_snapshot_instance(tr);
1304         if (ret)
1305                 goto fail_unlock;
1306
1307         if (tr->current_trace->use_max_tr) {
1308                 ret = -EBUSY;
1309                 goto fail_unlock;
1310         }
1311
1312         /*
1313          * The cond_snapshot can only change to NULL without the
1314          * trace_types_lock. We don't care if we race with it going
1315          * to NULL, but we want to make sure that it's not set to
1316          * something other than NULL when we get here, which we can
1317          * do safely with only holding the trace_types_lock and not
1318          * having to take the max_lock.
1319          */
1320         if (tr->cond_snapshot) {
1321                 ret = -EBUSY;
1322                 goto fail_unlock;
1323         }
1324
1325         arch_spin_lock(&tr->max_lock);
1326         tr->cond_snapshot = cond_snapshot;
1327         arch_spin_unlock(&tr->max_lock);
1328
1329         mutex_unlock(&trace_types_lock);
1330
1331         return ret;
1332
1333  fail_unlock:
1334         mutex_unlock(&trace_types_lock);
1335         kfree(cond_snapshot);
1336         return ret;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1339
1340 /**
1341  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  *
1344  * Check whether the conditional snapshot for the given instance is
1345  * enabled; if so, free the cond_snapshot associated with it,
1346  * otherwise return -EINVAL.
1347  *
1348  * Returns 0 if successful, error otherwise.
1349  */
1350 int tracing_snapshot_cond_disable(struct trace_array *tr)
1351 {
1352         int ret = 0;
1353
1354         arch_spin_lock(&tr->max_lock);
1355
1356         if (!tr->cond_snapshot)
1357                 ret = -EINVAL;
1358         else {
1359                 kfree(tr->cond_snapshot);
1360                 tr->cond_snapshot = NULL;
1361         }
1362
1363         arch_spin_unlock(&tr->max_lock);
1364
1365         return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1368 #else
1369 void tracing_snapshot(void)
1370 {
1371         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot);
1374 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1375 {
1376         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1379 int tracing_alloc_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1382         return -ENODEV;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1385 void tracing_snapshot_alloc(void)
1386 {
1387         /* Give warning */
1388         tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 void *tracing_cond_snapshot_data(struct trace_array *tr)
1392 {
1393         return NULL;
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1396 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1397 {
1398         return -ENODEV;
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1401 int tracing_snapshot_cond_disable(struct trace_array *tr)
1402 {
1403         return false;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1406 #endif /* CONFIG_TRACER_SNAPSHOT */
1407
1408 void tracer_tracing_off(struct trace_array *tr)
1409 {
1410         if (tr->array_buffer.buffer)
1411                 ring_buffer_record_off(tr->array_buffer.buffer);
1412         /*
1413          * This flag is looked at when buffers haven't been allocated
1414          * yet, or by some tracers (like irqsoff), that just want to
1415          * know if the ring buffer has been disabled, but it can handle
1416          * races of where it gets disabled but we still do a record.
1417          * As the check is in the fast path of the tracers, it is more
1418          * important to be fast than accurate.
1419          */
1420         tr->buffer_disabled = 1;
1421         /* Make the flag seen by readers */
1422         smp_wmb();
1423 }
1424
1425 /**
1426  * tracing_off - turn off tracing buffers
1427  *
1428  * This function stops the tracing buffers from recording data.
1429  * It does not disable any overhead the tracers themselves may
1430  * be causing. This function simply causes all recording to
1431  * the ring buffers to fail.
1432  */
1433 void tracing_off(void)
1434 {
1435         tracer_tracing_off(&global_trace);
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_off);
1438
1439 void disable_trace_on_warning(void)
1440 {
1441         if (__disable_trace_on_warning) {
1442                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1443                         "Disabling tracing due to warning\n");
1444                 tracing_off();
1445         }
1446 }
1447
1448 /**
1449  * tracer_tracing_is_on - show real state of ring buffer enabled
1450  * @tr : the trace array to know if ring buffer is enabled
1451  *
1452  * Shows real state of the ring buffer if it is enabled or not.
1453  */
1454 bool tracer_tracing_is_on(struct trace_array *tr)
1455 {
1456         if (tr->array_buffer.buffer)
1457                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1458         return !tr->buffer_disabled;
1459 }
1460
1461 /**
1462  * tracing_is_on - show state of ring buffers enabled
1463  */
1464 int tracing_is_on(void)
1465 {
1466         return tracer_tracing_is_on(&global_trace);
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_is_on);
1469
1470 static int __init set_buf_size(char *str)
1471 {
1472         unsigned long buf_size;
1473
1474         if (!str)
1475                 return 0;
1476         buf_size = memparse(str, &str);
1477         /* nr_entries can not be zero */
1478         if (buf_size == 0)
1479                 return 0;
1480         trace_buf_size = buf_size;
1481         return 1;
1482 }
1483 __setup("trace_buf_size=", set_buf_size);
1484
1485 static int __init set_tracing_thresh(char *str)
1486 {
1487         unsigned long threshold;
1488         int ret;
1489
1490         if (!str)
1491                 return 0;
1492         ret = kstrtoul(str, 0, &threshold);
1493         if (ret < 0)
1494                 return 0;
1495         tracing_thresh = threshold * 1000;
1496         return 1;
1497 }
1498 __setup("tracing_thresh=", set_tracing_thresh);
1499
1500 unsigned long nsecs_to_usecs(unsigned long nsecs)
1501 {
1502         return nsecs / 1000;
1503 }
1504
1505 /*
1506  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1507  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1508  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1509  * of strings in the order that the evals (enum) were defined.
1510  */
1511 #undef C
1512 #define C(a, b) b
1513
1514 /* These must match the bit postions in trace_iterator_flags */
1515 static const char *trace_options[] = {
1516         TRACE_FLAGS
1517         NULL
1518 };
1519
1520 static struct {
1521         u64 (*func)(void);
1522         const char *name;
1523         int in_ns;              /* is this clock in nanoseconds? */
1524 } trace_clocks[] = {
1525         { trace_clock_local,            "local",        1 },
1526         { trace_clock_global,           "global",       1 },
1527         { trace_clock_counter,          "counter",      0 },
1528         { trace_clock_jiffies,          "uptime",       0 },
1529         { trace_clock,                  "perf",         1 },
1530         { ktime_get_mono_fast_ns,       "mono",         1 },
1531         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1532         { ktime_get_boot_fast_ns,       "boot",         1 },
1533         ARCH_TRACE_CLOCKS
1534 };
1535
1536 bool trace_clock_in_ns(struct trace_array *tr)
1537 {
1538         if (trace_clocks[tr->clock_id].in_ns)
1539                 return true;
1540
1541         return false;
1542 }
1543
1544 /*
1545  * trace_parser_get_init - gets the buffer for trace parser
1546  */
1547 int trace_parser_get_init(struct trace_parser *parser, int size)
1548 {
1549         memset(parser, 0, sizeof(*parser));
1550
1551         parser->buffer = kmalloc(size, GFP_KERNEL);
1552         if (!parser->buffer)
1553                 return 1;
1554
1555         parser->size = size;
1556         return 0;
1557 }
1558
1559 /*
1560  * trace_parser_put - frees the buffer for trace parser
1561  */
1562 void trace_parser_put(struct trace_parser *parser)
1563 {
1564         kfree(parser->buffer);
1565         parser->buffer = NULL;
1566 }
1567
1568 /*
1569  * trace_get_user - reads the user input string separated by  space
1570  * (matched by isspace(ch))
1571  *
1572  * For each string found the 'struct trace_parser' is updated,
1573  * and the function returns.
1574  *
1575  * Returns number of bytes read.
1576  *
1577  * See kernel/trace/trace.h for 'struct trace_parser' details.
1578  */
1579 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1580         size_t cnt, loff_t *ppos)
1581 {
1582         char ch;
1583         size_t read = 0;
1584         ssize_t ret;
1585
1586         if (!*ppos)
1587                 trace_parser_clear(parser);
1588
1589         ret = get_user(ch, ubuf++);
1590         if (ret)
1591                 goto out;
1592
1593         read++;
1594         cnt--;
1595
1596         /*
1597          * The parser is not finished with the last write,
1598          * continue reading the user input without skipping spaces.
1599          */
1600         if (!parser->cont) {
1601                 /* skip white space */
1602                 while (cnt && isspace(ch)) {
1603                         ret = get_user(ch, ubuf++);
1604                         if (ret)
1605                                 goto out;
1606                         read++;
1607                         cnt--;
1608                 }
1609
1610                 parser->idx = 0;
1611
1612                 /* only spaces were written */
1613                 if (isspace(ch) || !ch) {
1614                         *ppos += read;
1615                         ret = read;
1616                         goto out;
1617                 }
1618         }
1619
1620         /* read the non-space input */
1621         while (cnt && !isspace(ch) && ch) {
1622                 if (parser->idx < parser->size - 1)
1623                         parser->buffer[parser->idx++] = ch;
1624                 else {
1625                         ret = -EINVAL;
1626                         goto out;
1627                 }
1628                 ret = get_user(ch, ubuf++);
1629                 if (ret)
1630                         goto out;
1631                 read++;
1632                 cnt--;
1633         }
1634
1635         /* We either got finished input or we have to wait for another call. */
1636         if (isspace(ch) || !ch) {
1637                 parser->buffer[parser->idx] = 0;
1638                 parser->cont = false;
1639         } else if (parser->idx < parser->size - 1) {
1640                 parser->cont = true;
1641                 parser->buffer[parser->idx++] = ch;
1642                 /* Make sure the parsed string always terminates with '\0'. */
1643                 parser->buffer[parser->idx] = 0;
1644         } else {
1645                 ret = -EINVAL;
1646                 goto out;
1647         }
1648
1649         *ppos += read;
1650         ret = read;
1651
1652 out:
1653         return ret;
1654 }
1655
1656 /* TODO add a seq_buf_to_buffer() */
1657 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1658 {
1659         int len;
1660
1661         if (trace_seq_used(s) <= s->seq.readpos)
1662                 return -EBUSY;
1663
1664         len = trace_seq_used(s) - s->seq.readpos;
1665         if (cnt > len)
1666                 cnt = len;
1667         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1668
1669         s->seq.readpos += cnt;
1670         return cnt;
1671 }
1672
1673 unsigned long __read_mostly     tracing_thresh;
1674 static const struct file_operations tracing_max_lat_fops;
1675
1676 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1677         defined(CONFIG_FSNOTIFY)
1678
1679 static struct workqueue_struct *fsnotify_wq;
1680
1681 static void latency_fsnotify_workfn(struct work_struct *work)
1682 {
1683         struct trace_array *tr = container_of(work, struct trace_array,
1684                                               fsnotify_work);
1685         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1686 }
1687
1688 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1689 {
1690         struct trace_array *tr = container_of(iwork, struct trace_array,
1691                                               fsnotify_irqwork);
1692         queue_work(fsnotify_wq, &tr->fsnotify_work);
1693 }
1694
1695 static void trace_create_maxlat_file(struct trace_array *tr,
1696                                      struct dentry *d_tracer)
1697 {
1698         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1699         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1700         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1701                                               d_tracer, &tr->max_latency,
1702                                               &tracing_max_lat_fops);
1703 }
1704
1705 __init static int latency_fsnotify_init(void)
1706 {
1707         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1708                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1709         if (!fsnotify_wq) {
1710                 pr_err("Unable to allocate tr_max_lat_wq\n");
1711                 return -ENOMEM;
1712         }
1713         return 0;
1714 }
1715
1716 late_initcall_sync(latency_fsnotify_init);
1717
1718 void latency_fsnotify(struct trace_array *tr)
1719 {
1720         if (!fsnotify_wq)
1721                 return;
1722         /*
1723          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1724          * possible that we are called from __schedule() or do_idle(), which
1725          * could cause a deadlock.
1726          */
1727         irq_work_queue(&tr->fsnotify_irqwork);
1728 }
1729
1730 /*
1731  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1732  *  defined(CONFIG_FSNOTIFY)
1733  */
1734 #else
1735
1736 #define trace_create_maxlat_file(tr, d_tracer)                          \
1737         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1738                           &tr->max_latency, &tracing_max_lat_fops)
1739
1740 #endif
1741
1742 #ifdef CONFIG_TRACER_MAX_TRACE
1743 /*
1744  * Copy the new maximum trace into the separate maximum-trace
1745  * structure. (this way the maximum trace is permanently saved,
1746  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1747  */
1748 static void
1749 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1750 {
1751         struct array_buffer *trace_buf = &tr->array_buffer;
1752         struct array_buffer *max_buf = &tr->max_buffer;
1753         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1754         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1755
1756         max_buf->cpu = cpu;
1757         max_buf->time_start = data->preempt_timestamp;
1758
1759         max_data->saved_latency = tr->max_latency;
1760         max_data->critical_start = data->critical_start;
1761         max_data->critical_end = data->critical_end;
1762
1763         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1764         max_data->pid = tsk->pid;
1765         /*
1766          * If tsk == current, then use current_uid(), as that does not use
1767          * RCU. The irq tracer can be called out of RCU scope.
1768          */
1769         if (tsk == current)
1770                 max_data->uid = current_uid();
1771         else
1772                 max_data->uid = task_uid(tsk);
1773
1774         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1775         max_data->policy = tsk->policy;
1776         max_data->rt_priority = tsk->rt_priority;
1777
1778         /* record this tasks comm */
1779         tracing_record_cmdline(tsk);
1780         latency_fsnotify(tr);
1781 }
1782
1783 /**
1784  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1785  * @tr: tracer
1786  * @tsk: the task with the latency
1787  * @cpu: The cpu that initiated the trace.
1788  * @cond_data: User data associated with a conditional snapshot
1789  *
1790  * Flip the buffers between the @tr and the max_tr and record information
1791  * about which task was the cause of this latency.
1792  */
1793 void
1794 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1795               void *cond_data)
1796 {
1797         if (tr->stop_count)
1798                 return;
1799
1800         WARN_ON_ONCE(!irqs_disabled());
1801
1802         if (!tr->allocated_snapshot) {
1803                 /* Only the nop tracer should hit this when disabling */
1804                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1805                 return;
1806         }
1807
1808         arch_spin_lock(&tr->max_lock);
1809
1810         /* Inherit the recordable setting from array_buffer */
1811         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1812                 ring_buffer_record_on(tr->max_buffer.buffer);
1813         else
1814                 ring_buffer_record_off(tr->max_buffer.buffer);
1815
1816 #ifdef CONFIG_TRACER_SNAPSHOT
1817         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1818                 goto out_unlock;
1819 #endif
1820         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1821
1822         __update_max_tr(tr, tsk, cpu);
1823
1824  out_unlock:
1825         arch_spin_unlock(&tr->max_lock);
1826 }
1827
1828 /**
1829  * update_max_tr_single - only copy one trace over, and reset the rest
1830  * @tr: tracer
1831  * @tsk: task with the latency
1832  * @cpu: the cpu of the buffer to copy.
1833  *
1834  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1835  */
1836 void
1837 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1838 {
1839         int ret;
1840
1841         if (tr->stop_count)
1842                 return;
1843
1844         WARN_ON_ONCE(!irqs_disabled());
1845         if (!tr->allocated_snapshot) {
1846                 /* Only the nop tracer should hit this when disabling */
1847                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1848                 return;
1849         }
1850
1851         arch_spin_lock(&tr->max_lock);
1852
1853         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1854
1855         if (ret == -EBUSY) {
1856                 /*
1857                  * We failed to swap the buffer due to a commit taking
1858                  * place on this CPU. We fail to record, but we reset
1859                  * the max trace buffer (no one writes directly to it)
1860                  * and flag that it failed.
1861                  */
1862                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1863                         "Failed to swap buffers due to commit in progress\n");
1864         }
1865
1866         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1867
1868         __update_max_tr(tr, tsk, cpu);
1869         arch_spin_unlock(&tr->max_lock);
1870 }
1871 #endif /* CONFIG_TRACER_MAX_TRACE */
1872
1873 static int wait_on_pipe(struct trace_iterator *iter, int full)
1874 {
1875         /* Iterators are static, they should be filled or empty */
1876         if (trace_buffer_iter(iter, iter->cpu_file))
1877                 return 0;
1878
1879         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1880                                 full);
1881 }
1882
1883 #ifdef CONFIG_FTRACE_STARTUP_TEST
1884 static bool selftests_can_run;
1885
1886 struct trace_selftests {
1887         struct list_head                list;
1888         struct tracer                   *type;
1889 };
1890
1891 static LIST_HEAD(postponed_selftests);
1892
1893 static int save_selftest(struct tracer *type)
1894 {
1895         struct trace_selftests *selftest;
1896
1897         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1898         if (!selftest)
1899                 return -ENOMEM;
1900
1901         selftest->type = type;
1902         list_add(&selftest->list, &postponed_selftests);
1903         return 0;
1904 }
1905
1906 static int run_tracer_selftest(struct tracer *type)
1907 {
1908         struct trace_array *tr = &global_trace;
1909         struct tracer *saved_tracer = tr->current_trace;
1910         int ret;
1911
1912         if (!type->selftest || tracing_selftest_disabled)
1913                 return 0;
1914
1915         /*
1916          * If a tracer registers early in boot up (before scheduling is
1917          * initialized and such), then do not run its selftests yet.
1918          * Instead, run it a little later in the boot process.
1919          */
1920         if (!selftests_can_run)
1921                 return save_selftest(type);
1922
1923         /*
1924          * Run a selftest on this tracer.
1925          * Here we reset the trace buffer, and set the current
1926          * tracer to be this tracer. The tracer can then run some
1927          * internal tracing to verify that everything is in order.
1928          * If we fail, we do not register this tracer.
1929          */
1930         tracing_reset_online_cpus(&tr->array_buffer);
1931
1932         tr->current_trace = type;
1933
1934 #ifdef CONFIG_TRACER_MAX_TRACE
1935         if (type->use_max_tr) {
1936                 /* If we expanded the buffers, make sure the max is expanded too */
1937                 if (ring_buffer_expanded)
1938                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1939                                            RING_BUFFER_ALL_CPUS);
1940                 tr->allocated_snapshot = true;
1941         }
1942 #endif
1943
1944         /* the test is responsible for initializing and enabling */
1945         pr_info("Testing tracer %s: ", type->name);
1946         ret = type->selftest(type, tr);
1947         /* the test is responsible for resetting too */
1948         tr->current_trace = saved_tracer;
1949         if (ret) {
1950                 printk(KERN_CONT "FAILED!\n");
1951                 /* Add the warning after printing 'FAILED' */
1952                 WARN_ON(1);
1953                 return -1;
1954         }
1955         /* Only reset on passing, to avoid touching corrupted buffers */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959         if (type->use_max_tr) {
1960                 tr->allocated_snapshot = false;
1961
1962                 /* Shrink the max buffer again */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1965                                            RING_BUFFER_ALL_CPUS);
1966         }
1967 #endif
1968
1969         printk(KERN_CONT "PASSED\n");
1970         return 0;
1971 }
1972
1973 static __init int init_trace_selftests(void)
1974 {
1975         struct trace_selftests *p, *n;
1976         struct tracer *t, **last;
1977         int ret;
1978
1979         selftests_can_run = true;
1980
1981         mutex_lock(&trace_types_lock);
1982
1983         if (list_empty(&postponed_selftests))
1984                 goto out;
1985
1986         pr_info("Running postponed tracer tests:\n");
1987
1988         tracing_selftest_running = true;
1989         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1990                 /* This loop can take minutes when sanitizers are enabled, so
1991                  * lets make sure we allow RCU processing.
1992                  */
1993                 cond_resched();
1994                 ret = run_tracer_selftest(p->type);
1995                 /* If the test fails, then warn and remove from available_tracers */
1996                 if (ret < 0) {
1997                         WARN(1, "tracer: %s failed selftest, disabling\n",
1998                              p->type->name);
1999                         last = &trace_types;
2000                         for (t = trace_types; t; t = t->next) {
2001                                 if (t == p->type) {
2002                                         *last = t->next;
2003                                         break;
2004                                 }
2005                                 last = &t->next;
2006                         }
2007                 }
2008                 list_del(&p->list);
2009                 kfree(p);
2010         }
2011         tracing_selftest_running = false;
2012
2013  out:
2014         mutex_unlock(&trace_types_lock);
2015
2016         return 0;
2017 }
2018 core_initcall(init_trace_selftests);
2019 #else
2020 static inline int run_tracer_selftest(struct tracer *type)
2021 {
2022         return 0;
2023 }
2024 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2025
2026 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2027
2028 static void __init apply_trace_boot_options(void);
2029
2030 /**
2031  * register_tracer - register a tracer with the ftrace system.
2032  * @type: the plugin for the tracer
2033  *
2034  * Register a new plugin tracer.
2035  */
2036 int __init register_tracer(struct tracer *type)
2037 {
2038         struct tracer *t;
2039         int ret = 0;
2040
2041         if (!type->name) {
2042                 pr_info("Tracer must have a name\n");
2043                 return -1;
2044         }
2045
2046         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2047                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2048                 return -1;
2049         }
2050
2051         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2052                 pr_warn("Can not register tracer %s due to lockdown\n",
2053                            type->name);
2054                 return -EPERM;
2055         }
2056
2057         mutex_lock(&trace_types_lock);
2058
2059         tracing_selftest_running = true;
2060
2061         for (t = trace_types; t; t = t->next) {
2062                 if (strcmp(type->name, t->name) == 0) {
2063                         /* already found */
2064                         pr_info("Tracer %s already registered\n",
2065                                 type->name);
2066                         ret = -1;
2067                         goto out;
2068                 }
2069         }
2070
2071         if (!type->set_flag)
2072                 type->set_flag = &dummy_set_flag;
2073         if (!type->flags) {
2074                 /*allocate a dummy tracer_flags*/
2075                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2076                 if (!type->flags) {
2077                         ret = -ENOMEM;
2078                         goto out;
2079                 }
2080                 type->flags->val = 0;
2081                 type->flags->opts = dummy_tracer_opt;
2082         } else
2083                 if (!type->flags->opts)
2084                         type->flags->opts = dummy_tracer_opt;
2085
2086         /* store the tracer for __set_tracer_option */
2087         type->flags->trace = type;
2088
2089         ret = run_tracer_selftest(type);
2090         if (ret < 0)
2091                 goto out;
2092
2093         type->next = trace_types;
2094         trace_types = type;
2095         add_tracer_options(&global_trace, type);
2096
2097  out:
2098         tracing_selftest_running = false;
2099         mutex_unlock(&trace_types_lock);
2100
2101         if (ret || !default_bootup_tracer)
2102                 goto out_unlock;
2103
2104         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2105                 goto out_unlock;
2106
2107         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2108         /* Do we want this tracer to start on bootup? */
2109         tracing_set_tracer(&global_trace, type->name);
2110         default_bootup_tracer = NULL;
2111
2112         apply_trace_boot_options();
2113
2114         /* disable other selftests, since this will break it. */
2115         tracing_selftest_disabled = true;
2116 #ifdef CONFIG_FTRACE_STARTUP_TEST
2117         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2118                type->name);
2119 #endif
2120
2121  out_unlock:
2122         return ret;
2123 }
2124
2125 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2126 {
2127         struct trace_buffer *buffer = buf->buffer;
2128
2129         if (!buffer)
2130                 return;
2131
2132         ring_buffer_record_disable(buffer);
2133
2134         /* Make sure all commits have finished */
2135         synchronize_rcu();
2136         ring_buffer_reset_cpu(buffer, cpu);
2137
2138         ring_buffer_record_enable(buffer);
2139 }
2140
2141 void tracing_reset_online_cpus(struct array_buffer *buf)
2142 {
2143         struct trace_buffer *buffer = buf->buffer;
2144
2145         if (!buffer)
2146                 return;
2147
2148         ring_buffer_record_disable(buffer);
2149
2150         /* Make sure all commits have finished */
2151         synchronize_rcu();
2152
2153         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2154
2155         ring_buffer_reset_online_cpus(buffer);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 /* Must have trace_types_lock held */
2161 void tracing_reset_all_online_cpus(void)
2162 {
2163         struct trace_array *tr;
2164
2165         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2166                 if (!tr->clear_trace)
2167                         continue;
2168                 tr->clear_trace = false;
2169                 tracing_reset_online_cpus(&tr->array_buffer);
2170 #ifdef CONFIG_TRACER_MAX_TRACE
2171                 tracing_reset_online_cpus(&tr->max_buffer);
2172 #endif
2173         }
2174 }
2175
2176 static int *tgid_map;
2177
2178 #define SAVED_CMDLINES_DEFAULT 128
2179 #define NO_CMDLINE_MAP UINT_MAX
2180 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2181 struct saved_cmdlines_buffer {
2182         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2183         unsigned *map_cmdline_to_pid;
2184         unsigned cmdline_num;
2185         int cmdline_idx;
2186         char *saved_cmdlines;
2187 };
2188 static struct saved_cmdlines_buffer *savedcmd;
2189
2190 /* temporary disable recording */
2191 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2192
2193 static inline char *get_saved_cmdlines(int idx)
2194 {
2195         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2196 }
2197
2198 static inline void set_cmdline(int idx, const char *cmdline)
2199 {
2200         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2201 }
2202
2203 static int allocate_cmdlines_buffer(unsigned int val,
2204                                     struct saved_cmdlines_buffer *s)
2205 {
2206         s->map_cmdline_to_pid = kmalloc_array(val,
2207                                               sizeof(*s->map_cmdline_to_pid),
2208                                               GFP_KERNEL);
2209         if (!s->map_cmdline_to_pid)
2210                 return -ENOMEM;
2211
2212         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2213         if (!s->saved_cmdlines) {
2214                 kfree(s->map_cmdline_to_pid);
2215                 return -ENOMEM;
2216         }
2217
2218         s->cmdline_idx = 0;
2219         s->cmdline_num = val;
2220         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2221                sizeof(s->map_pid_to_cmdline));
2222         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2223                val * sizeof(*s->map_cmdline_to_pid));
2224
2225         return 0;
2226 }
2227
2228 static int trace_create_savedcmd(void)
2229 {
2230         int ret;
2231
2232         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2233         if (!savedcmd)
2234                 return -ENOMEM;
2235
2236         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2237         if (ret < 0) {
2238                 kfree(savedcmd);
2239                 savedcmd = NULL;
2240                 return -ENOMEM;
2241         }
2242
2243         return 0;
2244 }
2245
2246 int is_tracing_stopped(void)
2247 {
2248         return global_trace.stop_count;
2249 }
2250
2251 /**
2252  * tracing_start - quick start of the tracer
2253  *
2254  * If tracing is enabled but was stopped by tracing_stop,
2255  * this will start the tracer back up.
2256  */
2257 void tracing_start(void)
2258 {
2259         struct trace_buffer *buffer;
2260         unsigned long flags;
2261
2262         if (tracing_disabled)
2263                 return;
2264
2265         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2266         if (--global_trace.stop_count) {
2267                 if (global_trace.stop_count < 0) {
2268                         /* Someone screwed up their debugging */
2269                         WARN_ON_ONCE(1);
2270                         global_trace.stop_count = 0;
2271                 }
2272                 goto out;
2273         }
2274
2275         /* Prevent the buffers from switching */
2276         arch_spin_lock(&global_trace.max_lock);
2277
2278         buffer = global_trace.array_buffer.buffer;
2279         if (buffer)
2280                 ring_buffer_record_enable(buffer);
2281
2282 #ifdef CONFIG_TRACER_MAX_TRACE
2283         buffer = global_trace.max_buffer.buffer;
2284         if (buffer)
2285                 ring_buffer_record_enable(buffer);
2286 #endif
2287
2288         arch_spin_unlock(&global_trace.max_lock);
2289
2290  out:
2291         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2292 }
2293
2294 static void tracing_start_tr(struct trace_array *tr)
2295 {
2296         struct trace_buffer *buffer;
2297         unsigned long flags;
2298
2299         if (tracing_disabled)
2300                 return;
2301
2302         /* If global, we need to also start the max tracer */
2303         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2304                 return tracing_start();
2305
2306         raw_spin_lock_irqsave(&tr->start_lock, flags);
2307
2308         if (--tr->stop_count) {
2309                 if (tr->stop_count < 0) {
2310                         /* Someone screwed up their debugging */
2311                         WARN_ON_ONCE(1);
2312                         tr->stop_count = 0;
2313                 }
2314                 goto out;
2315         }
2316
2317         buffer = tr->array_buffer.buffer;
2318         if (buffer)
2319                 ring_buffer_record_enable(buffer);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2323 }
2324
2325 /**
2326  * tracing_stop - quick stop of the tracer
2327  *
2328  * Light weight way to stop tracing. Use in conjunction with
2329  * tracing_start.
2330  */
2331 void tracing_stop(void)
2332 {
2333         struct trace_buffer *buffer;
2334         unsigned long flags;
2335
2336         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2337         if (global_trace.stop_count++)
2338                 goto out;
2339
2340         /* Prevent the buffers from switching */
2341         arch_spin_lock(&global_trace.max_lock);
2342
2343         buffer = global_trace.array_buffer.buffer;
2344         if (buffer)
2345                 ring_buffer_record_disable(buffer);
2346
2347 #ifdef CONFIG_TRACER_MAX_TRACE
2348         buffer = global_trace.max_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_disable(buffer);
2351 #endif
2352
2353         arch_spin_unlock(&global_trace.max_lock);
2354
2355  out:
2356         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2357 }
2358
2359 static void tracing_stop_tr(struct trace_array *tr)
2360 {
2361         struct trace_buffer *buffer;
2362         unsigned long flags;
2363
2364         /* If global, we need to also stop the max tracer */
2365         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366                 return tracing_stop();
2367
2368         raw_spin_lock_irqsave(&tr->start_lock, flags);
2369         if (tr->stop_count++)
2370                 goto out;
2371
2372         buffer = tr->array_buffer.buffer;
2373         if (buffer)
2374                 ring_buffer_record_disable(buffer);
2375
2376  out:
2377         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2378 }
2379
2380 static int trace_save_cmdline(struct task_struct *tsk)
2381 {
2382         unsigned pid, idx;
2383
2384         /* treat recording of idle task as a success */
2385         if (!tsk->pid)
2386                 return 1;
2387
2388         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2389                 return 0;
2390
2391         /*
2392          * It's not the end of the world if we don't get
2393          * the lock, but we also don't want to spin
2394          * nor do we want to disable interrupts,
2395          * so if we miss here, then better luck next time.
2396          */
2397         if (!arch_spin_trylock(&trace_cmdline_lock))
2398                 return 0;
2399
2400         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2401         if (idx == NO_CMDLINE_MAP) {
2402                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2403
2404                 /*
2405                  * Check whether the cmdline buffer at idx has a pid
2406                  * mapped. We are going to overwrite that entry so we
2407                  * need to clear the map_pid_to_cmdline. Otherwise we
2408                  * would read the new comm for the old pid.
2409                  */
2410                 pid = savedcmd->map_cmdline_to_pid[idx];
2411                 if (pid != NO_CMDLINE_MAP)
2412                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2413
2414                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2415                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2416
2417                 savedcmd->cmdline_idx = idx;
2418         }
2419
2420         set_cmdline(idx, tsk->comm);
2421
2422         arch_spin_unlock(&trace_cmdline_lock);
2423
2424         return 1;
2425 }
2426
2427 static void __trace_find_cmdline(int pid, char comm[])
2428 {
2429         unsigned map;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         if (pid > PID_MAX_DEFAULT) {
2442                 strcpy(comm, "<...>");
2443                 return;
2444         }
2445
2446         map = savedcmd->map_pid_to_cmdline[pid];
2447         if (map != NO_CMDLINE_MAP)
2448                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2449         else
2450                 strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 void
2580 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2581                              unsigned long flags, int pc)
2582 {
2583         struct task_struct *tsk = current;
2584
2585         entry->preempt_count            = pc & 0xff;
2586         entry->pid                      = (tsk) ? tsk->pid : 0;
2587         entry->type                     = type;
2588         entry->flags =
2589 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2590                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2591 #else
2592                 TRACE_FLAG_IRQS_NOSUPPORT |
2593 #endif
2594                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2595                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2596                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2597                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2598                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2599 }
2600 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2601
2602 struct ring_buffer_event *
2603 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2604                           int type,
2605                           unsigned long len,
2606                           unsigned long flags, int pc)
2607 {
2608         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2609 }
2610
2611 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2612 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2613 static int trace_buffered_event_ref;
2614
2615 /**
2616  * trace_buffered_event_enable - enable buffering events
2617  *
2618  * When events are being filtered, it is quicker to use a temporary
2619  * buffer to write the event data into if there's a likely chance
2620  * that it will not be committed. The discard of the ring buffer
2621  * is not as fast as committing, and is much slower than copying
2622  * a commit.
2623  *
2624  * When an event is to be filtered, allocate per cpu buffers to
2625  * write the event data into, and if the event is filtered and discarded
2626  * it is simply dropped, otherwise, the entire data is to be committed
2627  * in one shot.
2628  */
2629 void trace_buffered_event_enable(void)
2630 {
2631         struct ring_buffer_event *event;
2632         struct page *page;
2633         int cpu;
2634
2635         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2636
2637         if (trace_buffered_event_ref++)
2638                 return;
2639
2640         for_each_tracing_cpu(cpu) {
2641                 page = alloc_pages_node(cpu_to_node(cpu),
2642                                         GFP_KERNEL | __GFP_NORETRY, 0);
2643                 if (!page)
2644                         goto failed;
2645
2646                 event = page_address(page);
2647                 memset(event, 0, sizeof(*event));
2648
2649                 per_cpu(trace_buffered_event, cpu) = event;
2650
2651                 preempt_disable();
2652                 if (cpu == smp_processor_id() &&
2653                     this_cpu_read(trace_buffered_event) !=
2654                     per_cpu(trace_buffered_event, cpu))
2655                         WARN_ON_ONCE(1);
2656                 preempt_enable();
2657         }
2658
2659         return;
2660  failed:
2661         trace_buffered_event_disable();
2662 }
2663
2664 static void enable_trace_buffered_event(void *data)
2665 {
2666         /* Probably not needed, but do it anyway */
2667         smp_rmb();
2668         this_cpu_dec(trace_buffered_event_cnt);
2669 }
2670
2671 static void disable_trace_buffered_event(void *data)
2672 {
2673         this_cpu_inc(trace_buffered_event_cnt);
2674 }
2675
2676 /**
2677  * trace_buffered_event_disable - disable buffering events
2678  *
2679  * When a filter is removed, it is faster to not use the buffered
2680  * events, and to commit directly into the ring buffer. Free up
2681  * the temp buffers when there are no more users. This requires
2682  * special synchronization with current events.
2683  */
2684 void trace_buffered_event_disable(void)
2685 {
2686         int cpu;
2687
2688         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2689
2690         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2691                 return;
2692
2693         if (--trace_buffered_event_ref)
2694                 return;
2695
2696         preempt_disable();
2697         /* For each CPU, set the buffer as used. */
2698         smp_call_function_many(tracing_buffer_mask,
2699                                disable_trace_buffered_event, NULL, 1);
2700         preempt_enable();
2701
2702         /* Wait for all current users to finish */
2703         synchronize_rcu();
2704
2705         for_each_tracing_cpu(cpu) {
2706                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2707                 per_cpu(trace_buffered_event, cpu) = NULL;
2708         }
2709         /*
2710          * Make sure trace_buffered_event is NULL before clearing
2711          * trace_buffered_event_cnt.
2712          */
2713         smp_wmb();
2714
2715         preempt_disable();
2716         /* Do the work on each cpu */
2717         smp_call_function_many(tracing_buffer_mask,
2718                                enable_trace_buffered_event, NULL, 1);
2719         preempt_enable();
2720 }
2721
2722 static struct trace_buffer *temp_buffer;
2723
2724 struct ring_buffer_event *
2725 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2726                           struct trace_event_file *trace_file,
2727                           int type, unsigned long len,
2728                           unsigned long flags, int pc)
2729 {
2730         struct ring_buffer_event *entry;
2731         int val;
2732
2733         *current_rb = trace_file->tr->array_buffer.buffer;
2734
2735         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2736              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2737             (entry = this_cpu_read(trace_buffered_event))) {
2738                 /* Try to use the per cpu buffer first */
2739                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2740                 if (val == 1) {
2741                         trace_event_setup(entry, type, flags, pc);
2742                         entry->array[0] = len;
2743                         return entry;
2744                 }
2745                 this_cpu_dec(trace_buffered_event_cnt);
2746         }
2747
2748         entry = __trace_buffer_lock_reserve(*current_rb,
2749                                             type, len, flags, pc);
2750         /*
2751          * If tracing is off, but we have triggers enabled
2752          * we still need to look at the event data. Use the temp_buffer
2753          * to store the trace event for the tigger to use. It's recusive
2754          * safe and will not be recorded anywhere.
2755          */
2756         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757                 *current_rb = temp_buffer;
2758                 entry = __trace_buffer_lock_reserve(*current_rb,
2759                                                     type, len, flags, pc);
2760         }
2761         return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764
2765 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767
2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770         struct trace_event_call *event_call;
2771         struct trace_event_file *file;
2772         struct trace_event *event;
2773         unsigned long flags;
2774         struct trace_iterator *iter = tracepoint_print_iter;
2775
2776         /* We should never get here if iter is NULL */
2777         if (WARN_ON_ONCE(!iter))
2778                 return;
2779
2780         event_call = fbuffer->trace_file->event_call;
2781         if (!event_call || !event_call->event.funcs ||
2782             !event_call->event.funcs->trace)
2783                 return;
2784
2785         file = fbuffer->trace_file;
2786         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788              !filter_match_preds(file->filter, fbuffer->entry)))
2789                 return;
2790
2791         event = &fbuffer->trace_file->event_call->event;
2792
2793         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794         trace_seq_init(&iter->seq);
2795         iter->ent = fbuffer->entry;
2796         event_call->event.funcs->trace(iter, 0, event);
2797         trace_seq_putc(&iter->seq, 0);
2798         printk("%s", iter->seq.buffer);
2799
2800         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802
2803 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2804                              void *buffer, size_t *lenp,
2805                              loff_t *ppos)
2806 {
2807         int save_tracepoint_printk;
2808         int ret;
2809
2810         mutex_lock(&tracepoint_printk_mutex);
2811         save_tracepoint_printk = tracepoint_printk;
2812
2813         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814
2815         /*
2816          * This will force exiting early, as tracepoint_printk
2817          * is always zero when tracepoint_printk_iter is not allocated
2818          */
2819         if (!tracepoint_print_iter)
2820                 tracepoint_printk = 0;
2821
2822         if (save_tracepoint_printk == tracepoint_printk)
2823                 goto out;
2824
2825         if (tracepoint_printk)
2826                 static_key_enable(&tracepoint_printk_key.key);
2827         else
2828                 static_key_disable(&tracepoint_printk_key.key);
2829
2830  out:
2831         mutex_unlock(&tracepoint_printk_mutex);
2832
2833         return ret;
2834 }
2835
2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838         if (static_key_false(&tracepoint_printk_key.key))
2839                 output_printk(fbuffer);
2840
2841         if (static_branch_unlikely(&trace_event_exports_enabled))
2842                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2843         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2844                                     fbuffer->event, fbuffer->entry,
2845                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2846 }
2847 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2848
2849 /*
2850  * Skip 3:
2851  *
2852  *   trace_buffer_unlock_commit_regs()
2853  *   trace_event_buffer_commit()
2854  *   trace_event_raw_event_xxx()
2855  */
2856 # define STACK_SKIP 3
2857
2858 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2859                                      struct trace_buffer *buffer,
2860                                      struct ring_buffer_event *event,
2861                                      unsigned long flags, int pc,
2862                                      struct pt_regs *regs)
2863 {
2864         __buffer_unlock_commit(buffer, event);
2865
2866         /*
2867          * If regs is not set, then skip the necessary functions.
2868          * Note, we can still get here via blktrace, wakeup tracer
2869          * and mmiotrace, but that's ok if they lose a function or
2870          * two. They are not that meaningful.
2871          */
2872         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2873         ftrace_trace_userstack(buffer, flags, pc);
2874 }
2875
2876 /*
2877  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2878  */
2879 void
2880 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2881                                    struct ring_buffer_event *event)
2882 {
2883         __buffer_unlock_commit(buffer, event);
2884 }
2885
2886 void
2887 trace_function(struct trace_array *tr,
2888                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2889                int pc)
2890 {
2891         struct trace_event_call *call = &event_function;
2892         struct trace_buffer *buffer = tr->array_buffer.buffer;
2893         struct ring_buffer_event *event;
2894         struct ftrace_entry *entry;
2895
2896         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2897                                             flags, pc);
2898         if (!event)
2899                 return;
2900         entry   = ring_buffer_event_data(event);
2901         entry->ip                       = ip;
2902         entry->parent_ip                = parent_ip;
2903
2904         if (!call_filter_check_discard(call, entry, buffer, event)) {
2905                 if (static_branch_unlikely(&trace_function_exports_enabled))
2906                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2907                 __buffer_unlock_commit(buffer, event);
2908         }
2909 }
2910
2911 #ifdef CONFIG_STACKTRACE
2912
2913 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2914 #define FTRACE_KSTACK_NESTING   4
2915
2916 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2917
2918 struct ftrace_stack {
2919         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2920 };
2921
2922
2923 struct ftrace_stacks {
2924         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2925 };
2926
2927 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2928 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2929
2930 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2931                                  unsigned long flags,
2932                                  int skip, int pc, struct pt_regs *regs)
2933 {
2934         struct trace_event_call *call = &event_kernel_stack;
2935         struct ring_buffer_event *event;
2936         unsigned int size, nr_entries;
2937         struct ftrace_stack *fstack;
2938         struct stack_entry *entry;
2939         int stackidx;
2940
2941         /*
2942          * Add one, for this function and the call to save_stack_trace()
2943          * If regs is set, then these functions will not be in the way.
2944          */
2945 #ifndef CONFIG_UNWINDER_ORC
2946         if (!regs)
2947                 skip++;
2948 #endif
2949
2950         preempt_disable_notrace();
2951
2952         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2953
2954         /* This should never happen. If it does, yell once and skip */
2955         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2956                 goto out;
2957
2958         /*
2959          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2960          * interrupt will either see the value pre increment or post
2961          * increment. If the interrupt happens pre increment it will have
2962          * restored the counter when it returns.  We just need a barrier to
2963          * keep gcc from moving things around.
2964          */
2965         barrier();
2966
2967         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2968         size = ARRAY_SIZE(fstack->calls);
2969
2970         if (regs) {
2971                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2972                                                    size, skip);
2973         } else {
2974                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2975         }
2976
2977         size = nr_entries * sizeof(unsigned long);
2978         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2979                                             sizeof(*entry) + size, flags, pc);
2980         if (!event)
2981                 goto out;
2982         entry = ring_buffer_event_data(event);
2983
2984         memcpy(&entry->caller, fstack->calls, size);
2985         entry->size = nr_entries;
2986
2987         if (!call_filter_check_discard(call, entry, buffer, event))
2988                 __buffer_unlock_commit(buffer, event);
2989
2990  out:
2991         /* Again, don't let gcc optimize things here */
2992         barrier();
2993         __this_cpu_dec(ftrace_stack_reserve);
2994         preempt_enable_notrace();
2995
2996 }
2997
2998 static inline void ftrace_trace_stack(struct trace_array *tr,
2999                                       struct trace_buffer *buffer,
3000                                       unsigned long flags,
3001                                       int skip, int pc, struct pt_regs *regs)
3002 {
3003         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3004                 return;
3005
3006         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3007 }
3008
3009 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3010                    int pc)
3011 {
3012         struct trace_buffer *buffer = tr->array_buffer.buffer;
3013
3014         if (rcu_is_watching()) {
3015                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3016                 return;
3017         }
3018
3019         /*
3020          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3021          * but if the above rcu_is_watching() failed, then the NMI
3022          * triggered someplace critical, and rcu_irq_enter() should
3023          * not be called from NMI.
3024          */
3025         if (unlikely(in_nmi()))
3026                 return;
3027
3028         rcu_irq_enter_irqson();
3029         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3030         rcu_irq_exit_irqson();
3031 }
3032
3033 /**
3034  * trace_dump_stack - record a stack back trace in the trace buffer
3035  * @skip: Number of functions to skip (helper handlers)
3036  */
3037 void trace_dump_stack(int skip)
3038 {
3039         unsigned long flags;
3040
3041         if (tracing_disabled || tracing_selftest_running)
3042                 return;
3043
3044         local_save_flags(flags);
3045
3046 #ifndef CONFIG_UNWINDER_ORC
3047         /* Skip 1 to skip this function. */
3048         skip++;
3049 #endif
3050         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3051                              flags, skip, preempt_count(), NULL);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_dump_stack);
3054
3055 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3056 static DEFINE_PER_CPU(int, user_stack_count);
3057
3058 static void
3059 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3060 {
3061         struct trace_event_call *call = &event_user_stack;
3062         struct ring_buffer_event *event;
3063         struct userstack_entry *entry;
3064
3065         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3066                 return;
3067
3068         /*
3069          * NMIs can not handle page faults, even with fix ups.
3070          * The save user stack can (and often does) fault.
3071          */
3072         if (unlikely(in_nmi()))
3073                 return;
3074
3075         /*
3076          * prevent recursion, since the user stack tracing may
3077          * trigger other kernel events.
3078          */
3079         preempt_disable();
3080         if (__this_cpu_read(user_stack_count))
3081                 goto out;
3082
3083         __this_cpu_inc(user_stack_count);
3084
3085         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3086                                             sizeof(*entry), flags, pc);
3087         if (!event)
3088                 goto out_drop_count;
3089         entry   = ring_buffer_event_data(event);
3090
3091         entry->tgid             = current->tgid;
3092         memset(&entry->caller, 0, sizeof(entry->caller));
3093
3094         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3095         if (!call_filter_check_discard(call, entry, buffer, event))
3096                 __buffer_unlock_commit(buffer, event);
3097
3098  out_drop_count:
3099         __this_cpu_dec(user_stack_count);
3100  out:
3101         preempt_enable();
3102 }
3103 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3104 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3105                                    unsigned long flags, int pc)
3106 {
3107 }
3108 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3109
3110 #endif /* CONFIG_STACKTRACE */
3111
3112 /* created for use with alloc_percpu */
3113 struct trace_buffer_struct {
3114         int nesting;
3115         char buffer[4][TRACE_BUF_SIZE];
3116 };
3117
3118 static struct trace_buffer_struct *trace_percpu_buffer;
3119
3120 /*
3121  * Thise allows for lockless recording.  If we're nested too deeply, then
3122  * this returns NULL.
3123  */
3124 static char *get_trace_buf(void)
3125 {
3126         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3127
3128         if (!buffer || buffer->nesting >= 4)
3129                 return NULL;
3130
3131         buffer->nesting++;
3132
3133         /* Interrupts must see nesting incremented before we use the buffer */
3134         barrier();
3135         return &buffer->buffer[buffer->nesting][0];
3136 }
3137
3138 static void put_trace_buf(void)
3139 {
3140         /* Don't let the decrement of nesting leak before this */
3141         barrier();
3142         this_cpu_dec(trace_percpu_buffer->nesting);
3143 }
3144
3145 static int alloc_percpu_trace_buffer(void)
3146 {
3147         struct trace_buffer_struct *buffers;
3148
3149         if (trace_percpu_buffer)
3150                 return 0;
3151
3152         buffers = alloc_percpu(struct trace_buffer_struct);
3153         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3154                 return -ENOMEM;
3155
3156         trace_percpu_buffer = buffers;
3157         return 0;
3158 }
3159
3160 static int buffers_allocated;
3161
3162 void trace_printk_init_buffers(void)
3163 {
3164         if (buffers_allocated)
3165                 return;
3166
3167         if (alloc_percpu_trace_buffer())
3168                 return;
3169
3170         /* trace_printk() is for debug use only. Don't use it in production. */
3171
3172         pr_warn("\n");
3173         pr_warn("**********************************************************\n");
3174         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3175         pr_warn("**                                                      **\n");
3176         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3177         pr_warn("**                                                      **\n");
3178         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3179         pr_warn("** unsafe for production use.                           **\n");
3180         pr_warn("**                                                      **\n");
3181         pr_warn("** If you see this message and you are not debugging    **\n");
3182         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3183         pr_warn("**                                                      **\n");
3184         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185         pr_warn("**********************************************************\n");
3186
3187         /* Expand the buffers to set size */
3188         tracing_update_buffers();
3189
3190         buffers_allocated = 1;
3191
3192         /*
3193          * trace_printk_init_buffers() can be called by modules.
3194          * If that happens, then we need to start cmdline recording
3195          * directly here. If the global_trace.buffer is already
3196          * allocated here, then this was called by module code.
3197          */
3198         if (global_trace.array_buffer.buffer)
3199                 tracing_start_cmdline_record();
3200 }
3201 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3202
3203 void trace_printk_start_comm(void)
3204 {
3205         /* Start tracing comms if trace printk is set */
3206         if (!buffers_allocated)
3207                 return;
3208         tracing_start_cmdline_record();
3209 }
3210
3211 static void trace_printk_start_stop_comm(int enabled)
3212 {
3213         if (!buffers_allocated)
3214                 return;
3215
3216         if (enabled)
3217                 tracing_start_cmdline_record();
3218         else
3219                 tracing_stop_cmdline_record();
3220 }
3221
3222 /**
3223  * trace_vbprintk - write binary msg to tracing buffer
3224  * @ip:    The address of the caller
3225  * @fmt:   The string format to write to the buffer
3226  * @args:  Arguments for @fmt
3227  */
3228 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3229 {
3230         struct trace_event_call *call = &event_bprint;
3231         struct ring_buffer_event *event;
3232         struct trace_buffer *buffer;
3233         struct trace_array *tr = &global_trace;
3234         struct bprint_entry *entry;
3235         unsigned long flags;
3236         char *tbuffer;
3237         int len = 0, size, pc;
3238
3239         if (unlikely(tracing_selftest_running || tracing_disabled))
3240                 return 0;
3241
3242         /* Don't pollute graph traces with trace_vprintk internals */
3243         pause_graph_tracing();
3244
3245         pc = preempt_count();
3246         preempt_disable_notrace();
3247
3248         tbuffer = get_trace_buf();
3249         if (!tbuffer) {
3250                 len = 0;
3251                 goto out_nobuffer;
3252         }
3253
3254         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3255
3256         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3257                 goto out_put;
3258
3259         local_save_flags(flags);
3260         size = sizeof(*entry) + sizeof(u32) * len;
3261         buffer = tr->array_buffer.buffer;
3262         ring_buffer_nest_start(buffer);
3263         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3264                                             flags, pc);
3265         if (!event)
3266                 goto out;
3267         entry = ring_buffer_event_data(event);
3268         entry->ip                       = ip;
3269         entry->fmt                      = fmt;
3270
3271         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3272         if (!call_filter_check_discard(call, entry, buffer, event)) {
3273                 __buffer_unlock_commit(buffer, event);
3274                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3275         }
3276
3277 out:
3278         ring_buffer_nest_end(buffer);
3279 out_put:
3280         put_trace_buf();
3281
3282 out_nobuffer:
3283         preempt_enable_notrace();
3284         unpause_graph_tracing();
3285
3286         return len;
3287 }
3288 EXPORT_SYMBOL_GPL(trace_vbprintk);
3289
3290 __printf(3, 0)
3291 static int
3292 __trace_array_vprintk(struct trace_buffer *buffer,
3293                       unsigned long ip, const char *fmt, va_list args)
3294 {
3295         struct trace_event_call *call = &event_print;
3296         struct ring_buffer_event *event;
3297         int len = 0, size, pc;
3298         struct print_entry *entry;
3299         unsigned long flags;
3300         char *tbuffer;
3301
3302         if (tracing_disabled || tracing_selftest_running)
3303                 return 0;
3304
3305         /* Don't pollute graph traces with trace_vprintk internals */
3306         pause_graph_tracing();
3307
3308         pc = preempt_count();
3309         preempt_disable_notrace();
3310
3311
3312         tbuffer = get_trace_buf();
3313         if (!tbuffer) {
3314                 len = 0;
3315                 goto out_nobuffer;
3316         }
3317
3318         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3319
3320         local_save_flags(flags);
3321         size = sizeof(*entry) + len + 1;
3322         ring_buffer_nest_start(buffer);
3323         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3324                                             flags, pc);
3325         if (!event)
3326                 goto out;
3327         entry = ring_buffer_event_data(event);
3328         entry->ip = ip;
3329
3330         memcpy(&entry->buf, tbuffer, len + 1);
3331         if (!call_filter_check_discard(call, entry, buffer, event)) {
3332                 __buffer_unlock_commit(buffer, event);
3333                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3334         }
3335
3336 out:
3337         ring_buffer_nest_end(buffer);
3338         put_trace_buf();
3339
3340 out_nobuffer:
3341         preempt_enable_notrace();
3342         unpause_graph_tracing();
3343
3344         return len;
3345 }
3346
3347 __printf(3, 0)
3348 int trace_array_vprintk(struct trace_array *tr,
3349                         unsigned long ip, const char *fmt, va_list args)
3350 {
3351         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3352 }
3353
3354 /**
3355  * trace_array_printk - Print a message to a specific instance
3356  * @tr: The instance trace_array descriptor
3357  * @ip: The instruction pointer that this is called from.
3358  * @fmt: The format to print (printf format)
3359  *
3360  * If a subsystem sets up its own instance, they have the right to
3361  * printk strings into their tracing instance buffer using this
3362  * function. Note, this function will not write into the top level
3363  * buffer (use trace_printk() for that), as writing into the top level
3364  * buffer should only have events that can be individually disabled.
3365  * trace_printk() is only used for debugging a kernel, and should not
3366  * be ever encorporated in normal use.
3367  *
3368  * trace_array_printk() can be used, as it will not add noise to the
3369  * top level tracing buffer.
3370  *
3371  * Note, trace_array_init_printk() must be called on @tr before this
3372  * can be used.
3373  */
3374 __printf(3, 0)
3375 int trace_array_printk(struct trace_array *tr,
3376                        unsigned long ip, const char *fmt, ...)
3377 {
3378         int ret;
3379         va_list ap;
3380
3381         if (!tr)
3382                 return -ENOENT;
3383
3384         /* This is only allowed for created instances */
3385         if (tr == &global_trace)
3386                 return 0;
3387
3388         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3389                 return 0;
3390
3391         va_start(ap, fmt);
3392         ret = trace_array_vprintk(tr, ip, fmt, ap);
3393         va_end(ap);
3394         return ret;
3395 }
3396 EXPORT_SYMBOL_GPL(trace_array_printk);
3397
3398 /**
3399  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3400  * @tr: The trace array to initialize the buffers for
3401  *
3402  * As trace_array_printk() only writes into instances, they are OK to
3403  * have in the kernel (unlike trace_printk()). This needs to be called
3404  * before trace_array_printk() can be used on a trace_array.
3405  */
3406 int trace_array_init_printk(struct trace_array *tr)
3407 {
3408         if (!tr)
3409                 return -ENOENT;
3410
3411         /* This is only allowed for created instances */
3412         if (tr == &global_trace)
3413                 return -EINVAL;
3414
3415         return alloc_percpu_trace_buffer();
3416 }
3417 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3418
3419 __printf(3, 4)
3420 int trace_array_printk_buf(struct trace_buffer *buffer,
3421                            unsigned long ip, const char *fmt, ...)
3422 {
3423         int ret;
3424         va_list ap;
3425
3426         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3427                 return 0;
3428
3429         va_start(ap, fmt);
3430         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3431         va_end(ap);
3432         return ret;
3433 }
3434
3435 __printf(2, 0)
3436 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3437 {
3438         return trace_array_vprintk(&global_trace, ip, fmt, args);
3439 }
3440 EXPORT_SYMBOL_GPL(trace_vprintk);
3441
3442 static void trace_iterator_increment(struct trace_iterator *iter)
3443 {
3444         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3445
3446         iter->idx++;
3447         if (buf_iter)
3448                 ring_buffer_iter_advance(buf_iter);
3449 }
3450
3451 static struct trace_entry *
3452 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3453                 unsigned long *lost_events)
3454 {
3455         struct ring_buffer_event *event;
3456         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3457
3458         if (buf_iter) {
3459                 event = ring_buffer_iter_peek(buf_iter, ts);
3460                 if (lost_events)
3461                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3462                                 (unsigned long)-1 : 0;
3463         } else {
3464                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3465                                          lost_events);
3466         }
3467
3468         if (event) {
3469                 iter->ent_size = ring_buffer_event_length(event);
3470                 return ring_buffer_event_data(event);
3471         }
3472         iter->ent_size = 0;
3473         return NULL;
3474 }
3475
3476 static struct trace_entry *
3477 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3478                   unsigned long *missing_events, u64 *ent_ts)
3479 {
3480         struct trace_buffer *buffer = iter->array_buffer->buffer;
3481         struct trace_entry *ent, *next = NULL;
3482         unsigned long lost_events = 0, next_lost = 0;
3483         int cpu_file = iter->cpu_file;
3484         u64 next_ts = 0, ts;
3485         int next_cpu = -1;
3486         int next_size = 0;
3487         int cpu;
3488
3489         /*
3490          * If we are in a per_cpu trace file, don't bother by iterating over
3491          * all cpu and peek directly.
3492          */
3493         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3494                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3495                         return NULL;
3496                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3497                 if (ent_cpu)
3498                         *ent_cpu = cpu_file;
3499
3500                 return ent;
3501         }
3502
3503         for_each_tracing_cpu(cpu) {
3504
3505                 if (ring_buffer_empty_cpu(buffer, cpu))
3506                         continue;
3507
3508                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3509
3510                 /*
3511                  * Pick the entry with the smallest timestamp:
3512                  */
3513                 if (ent && (!next || ts < next_ts)) {
3514                         next = ent;
3515                         next_cpu = cpu;
3516                         next_ts = ts;
3517                         next_lost = lost_events;
3518                         next_size = iter->ent_size;
3519                 }
3520         }
3521
3522         iter->ent_size = next_size;
3523
3524         if (ent_cpu)
3525                 *ent_cpu = next_cpu;
3526
3527         if (ent_ts)
3528                 *ent_ts = next_ts;
3529
3530         if (missing_events)
3531                 *missing_events = next_lost;
3532
3533         return next;
3534 }
3535
3536 #define STATIC_TEMP_BUF_SIZE    128
3537 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3538
3539 /* Find the next real entry, without updating the iterator itself */
3540 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3541                                           int *ent_cpu, u64 *ent_ts)
3542 {
3543         /* __find_next_entry will reset ent_size */
3544         int ent_size = iter->ent_size;
3545         struct trace_entry *entry;
3546
3547         /*
3548          * If called from ftrace_dump(), then the iter->temp buffer
3549          * will be the static_temp_buf and not created from kmalloc.
3550          * If the entry size is greater than the buffer, we can
3551          * not save it. Just return NULL in that case. This is only
3552          * used to add markers when two consecutive events' time
3553          * stamps have a large delta. See trace_print_lat_context()
3554          */
3555         if (iter->temp == static_temp_buf &&
3556             STATIC_TEMP_BUF_SIZE < ent_size)
3557                 return NULL;
3558
3559         /*
3560          * The __find_next_entry() may call peek_next_entry(), which may
3561          * call ring_buffer_peek() that may make the contents of iter->ent
3562          * undefined. Need to copy iter->ent now.
3563          */
3564         if (iter->ent && iter->ent != iter->temp) {
3565                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3566                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3567                         void *temp;
3568                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3569                         if (!temp)
3570                                 return NULL;
3571                         kfree(iter->temp);
3572                         iter->temp = temp;
3573                         iter->temp_size = iter->ent_size;
3574                 }
3575                 memcpy(iter->temp, iter->ent, iter->ent_size);
3576                 iter->ent = iter->temp;
3577         }
3578         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3579         /* Put back the original ent_size */
3580         iter->ent_size = ent_size;
3581
3582         return entry;
3583 }
3584
3585 /* Find the next real entry, and increment the iterator to the next entry */
3586 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3587 {
3588         iter->ent = __find_next_entry(iter, &iter->cpu,
3589                                       &iter->lost_events, &iter->ts);
3590
3591         if (iter->ent)
3592                 trace_iterator_increment(iter);
3593
3594         return iter->ent ? iter : NULL;
3595 }
3596
3597 static void trace_consume(struct trace_iterator *iter)
3598 {
3599         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3600                             &iter->lost_events);
3601 }
3602
3603 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3604 {
3605         struct trace_iterator *iter = m->private;
3606         int i = (int)*pos;
3607         void *ent;
3608
3609         WARN_ON_ONCE(iter->leftover);
3610
3611         (*pos)++;
3612
3613         /* can't go backwards */
3614         if (iter->idx > i)
3615                 return NULL;
3616
3617         if (iter->idx < 0)
3618                 ent = trace_find_next_entry_inc(iter);
3619         else
3620                 ent = iter;
3621
3622         while (ent && iter->idx < i)
3623                 ent = trace_find_next_entry_inc(iter);
3624
3625         iter->pos = *pos;
3626
3627         return ent;
3628 }
3629
3630 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3631 {
3632         struct ring_buffer_iter *buf_iter;
3633         unsigned long entries = 0;
3634         u64 ts;
3635
3636         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3637
3638         buf_iter = trace_buffer_iter(iter, cpu);
3639         if (!buf_iter)
3640                 return;
3641
3642         ring_buffer_iter_reset(buf_iter);
3643
3644         /*
3645          * We could have the case with the max latency tracers
3646          * that a reset never took place on a cpu. This is evident
3647          * by the timestamp being before the start of the buffer.
3648          */
3649         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3650                 if (ts >= iter->array_buffer->time_start)
3651                         break;
3652                 entries++;
3653                 ring_buffer_iter_advance(buf_iter);
3654         }
3655
3656         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3657 }
3658
3659 /*
3660  * The current tracer is copied to avoid a global locking
3661  * all around.
3662  */
3663 static void *s_start(struct seq_file *m, loff_t *pos)
3664 {
3665         struct trace_iterator *iter = m->private;
3666         struct trace_array *tr = iter->tr;
3667         int cpu_file = iter->cpu_file;
3668         void *p = NULL;
3669         loff_t l = 0;
3670         int cpu;
3671
3672         /*
3673          * copy the tracer to avoid using a global lock all around.
3674          * iter->trace is a copy of current_trace, the pointer to the
3675          * name may be used instead of a strcmp(), as iter->trace->name
3676          * will point to the same string as current_trace->name.
3677          */
3678         mutex_lock(&trace_types_lock);
3679         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3680                 *iter->trace = *tr->current_trace;
3681         mutex_unlock(&trace_types_lock);
3682
3683 #ifdef CONFIG_TRACER_MAX_TRACE
3684         if (iter->snapshot && iter->trace->use_max_tr)
3685                 return ERR_PTR(-EBUSY);
3686 #endif
3687
3688         if (!iter->snapshot)
3689                 atomic_inc(&trace_record_taskinfo_disabled);
3690
3691         if (*pos != iter->pos) {
3692                 iter->ent = NULL;
3693                 iter->cpu = 0;
3694                 iter->idx = -1;
3695
3696                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3697                         for_each_tracing_cpu(cpu)
3698                                 tracing_iter_reset(iter, cpu);
3699                 } else
3700                         tracing_iter_reset(iter, cpu_file);
3701
3702                 iter->leftover = 0;
3703                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3704                         ;
3705
3706         } else {
3707                 /*
3708                  * If we overflowed the seq_file before, then we want
3709                  * to just reuse the trace_seq buffer again.
3710                  */
3711                 if (iter->leftover)
3712                         p = iter;
3713                 else {
3714                         l = *pos - 1;
3715                         p = s_next(m, p, &l);
3716                 }
3717         }
3718
3719         trace_event_read_lock();
3720         trace_access_lock(cpu_file);
3721         return p;
3722 }
3723
3724 static void s_stop(struct seq_file *m, void *p)
3725 {
3726         struct trace_iterator *iter = m->private;
3727
3728 #ifdef CONFIG_TRACER_MAX_TRACE
3729         if (iter->snapshot && iter->trace->use_max_tr)
3730                 return;
3731 #endif
3732
3733         if (!iter->snapshot)
3734                 atomic_dec(&trace_record_taskinfo_disabled);
3735
3736         trace_access_unlock(iter->cpu_file);
3737         trace_event_read_unlock();
3738 }
3739
3740 static void
3741 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3742                       unsigned long *entries, int cpu)
3743 {
3744         unsigned long count;
3745
3746         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3747         /*
3748          * If this buffer has skipped entries, then we hold all
3749          * entries for the trace and we need to ignore the
3750          * ones before the time stamp.
3751          */
3752         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3753                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3754                 /* total is the same as the entries */
3755                 *total = count;
3756         } else
3757                 *total = count +
3758                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3759         *entries = count;
3760 }
3761
3762 static void
3763 get_total_entries(struct array_buffer *buf,
3764                   unsigned long *total, unsigned long *entries)
3765 {
3766         unsigned long t, e;
3767         int cpu;
3768
3769         *total = 0;
3770         *entries = 0;
3771
3772         for_each_tracing_cpu(cpu) {
3773                 get_total_entries_cpu(buf, &t, &e, cpu);
3774                 *total += t;
3775                 *entries += e;
3776         }
3777 }
3778
3779 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3780 {
3781         unsigned long total, entries;
3782
3783         if (!tr)
3784                 tr = &global_trace;
3785
3786         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3787
3788         return entries;
3789 }
3790
3791 unsigned long trace_total_entries(struct trace_array *tr)
3792 {
3793         unsigned long total, entries;
3794
3795         if (!tr)
3796                 tr = &global_trace;
3797
3798         get_total_entries(&tr->array_buffer, &total, &entries);
3799
3800         return entries;
3801 }
3802
3803 static void print_lat_help_header(struct seq_file *m)
3804 {
3805         seq_puts(m, "#                    _------=> CPU#            \n"
3806                     "#                   / _-----=> irqs-off        \n"
3807                     "#                  | / _----=> need-resched    \n"
3808                     "#                  || / _---=> hardirq/softirq \n"
3809                     "#                  ||| / _--=> preempt-depth   \n"
3810                     "#                  |||| /     delay            \n"
3811                     "#  cmd     pid     ||||| time  |   caller      \n"
3812                     "#     \\   /        |||||  \\    |   /         \n");
3813 }
3814
3815 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3816 {
3817         unsigned long total;
3818         unsigned long entries;
3819
3820         get_total_entries(buf, &total, &entries);
3821         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3822                    entries, total, num_online_cpus());
3823         seq_puts(m, "#\n");
3824 }
3825
3826 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3827                                    unsigned int flags)
3828 {
3829         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3830
3831         print_event_info(buf, m);
3832
3833         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3834         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3835 }
3836
3837 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3838                                        unsigned int flags)
3839 {
3840         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3841         const char *space = "            ";
3842         int prec = tgid ? 12 : 2;
3843
3844         print_event_info(buf, m);
3845
3846         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3847         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3848         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3849         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3850         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3851         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3852         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3853 }
3854
3855 void
3856 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3857 {
3858         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3859         struct array_buffer *buf = iter->array_buffer;
3860         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3861         struct tracer *type = iter->trace;
3862         unsigned long entries;
3863         unsigned long total;
3864         const char *name = "preemption";
3865
3866         name = type->name;
3867
3868         get_total_entries(buf, &total, &entries);
3869
3870         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3871                    name, UTS_RELEASE);
3872         seq_puts(m, "# -----------------------------------"
3873                  "---------------------------------\n");
3874         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3875                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3876                    nsecs_to_usecs(data->saved_latency),
3877                    entries,
3878                    total,
3879                    buf->cpu,
3880 #if defined(CONFIG_PREEMPT_NONE)
3881                    "server",
3882 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3883                    "desktop",
3884 #elif defined(CONFIG_PREEMPT)
3885                    "preempt",
3886 #elif defined(CONFIG_PREEMPT_RT)
3887                    "preempt_rt",
3888 #else
3889                    "unknown",
3890 #endif
3891                    /* These are reserved for later use */
3892                    0, 0, 0, 0);
3893 #ifdef CONFIG_SMP
3894         seq_printf(m, " #P:%d)\n", num_online_cpus());
3895 #else
3896         seq_puts(m, ")\n");
3897 #endif
3898         seq_puts(m, "#    -----------------\n");
3899         seq_printf(m, "#    | task: %.16s-%d "
3900                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3901                    data->comm, data->pid,
3902                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3903                    data->policy, data->rt_priority);
3904         seq_puts(m, "#    -----------------\n");
3905
3906         if (data->critical_start) {
3907                 seq_puts(m, "#  => started at: ");
3908                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3909                 trace_print_seq(m, &iter->seq);
3910                 seq_puts(m, "\n#  => ended at:   ");
3911                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3912                 trace_print_seq(m, &iter->seq);
3913                 seq_puts(m, "\n#\n");
3914         }
3915
3916         seq_puts(m, "#\n");
3917 }
3918
3919 static void test_cpu_buff_start(struct trace_iterator *iter)
3920 {
3921         struct trace_seq *s = &iter->seq;
3922         struct trace_array *tr = iter->tr;
3923
3924         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3925                 return;
3926
3927         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3928                 return;
3929
3930         if (cpumask_available(iter->started) &&
3931             cpumask_test_cpu(iter->cpu, iter->started))
3932                 return;
3933
3934         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3935                 return;
3936
3937         if (cpumask_available(iter->started))
3938                 cpumask_set_cpu(iter->cpu, iter->started);
3939
3940         /* Don't print started cpu buffer for the first entry of the trace */
3941         if (iter->idx > 1)
3942                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3943                                 iter->cpu);
3944 }
3945
3946 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3947 {
3948         struct trace_array *tr = iter->tr;
3949         struct trace_seq *s = &iter->seq;
3950         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3951         struct trace_entry *entry;
3952         struct trace_event *event;
3953
3954         entry = iter->ent;
3955
3956         test_cpu_buff_start(iter);
3957
3958         event = ftrace_find_event(entry->type);
3959
3960         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3961                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3962                         trace_print_lat_context(iter);
3963                 else
3964                         trace_print_context(iter);
3965         }
3966
3967         if (trace_seq_has_overflowed(s))
3968                 return TRACE_TYPE_PARTIAL_LINE;
3969
3970         if (event)
3971                 return event->funcs->trace(iter, sym_flags, event);
3972
3973         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3974
3975         return trace_handle_return(s);
3976 }
3977
3978 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3979 {
3980         struct trace_array *tr = iter->tr;
3981         struct trace_seq *s = &iter->seq;
3982         struct trace_entry *entry;
3983         struct trace_event *event;
3984
3985         entry = iter->ent;
3986
3987         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3988                 trace_seq_printf(s, "%d %d %llu ",
3989                                  entry->pid, iter->cpu, iter->ts);
3990
3991         if (trace_seq_has_overflowed(s))
3992                 return TRACE_TYPE_PARTIAL_LINE;
3993
3994         event = ftrace_find_event(entry->type);
3995         if (event)
3996                 return event->funcs->raw(iter, 0, event);
3997
3998         trace_seq_printf(s, "%d ?\n", entry->type);
3999
4000         return trace_handle_return(s);
4001 }
4002
4003 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4004 {
4005         struct trace_array *tr = iter->tr;
4006         struct trace_seq *s = &iter->seq;
4007         unsigned char newline = '\n';
4008         struct trace_entry *entry;
4009         struct trace_event *event;
4010
4011         entry = iter->ent;
4012
4013         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4014                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4015                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4016                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4017                 if (trace_seq_has_overflowed(s))
4018                         return TRACE_TYPE_PARTIAL_LINE;
4019         }
4020
4021         event = ftrace_find_event(entry->type);
4022         if (event) {
4023                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4024                 if (ret != TRACE_TYPE_HANDLED)
4025                         return ret;
4026         }
4027
4028         SEQ_PUT_FIELD(s, newline);
4029
4030         return trace_handle_return(s);
4031 }
4032
4033 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4034 {
4035         struct trace_array *tr = iter->tr;
4036         struct trace_seq *s = &iter->seq;
4037         struct trace_entry *entry;
4038         struct trace_event *event;
4039
4040         entry = iter->ent;
4041
4042         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4043                 SEQ_PUT_FIELD(s, entry->pid);
4044                 SEQ_PUT_FIELD(s, iter->cpu);
4045                 SEQ_PUT_FIELD(s, iter->ts);
4046                 if (trace_seq_has_overflowed(s))
4047                         return TRACE_TYPE_PARTIAL_LINE;
4048         }
4049
4050         event = ftrace_find_event(entry->type);
4051         return event ? event->funcs->binary(iter, 0, event) :
4052                 TRACE_TYPE_HANDLED;
4053 }
4054
4055 int trace_empty(struct trace_iterator *iter)
4056 {
4057         struct ring_buffer_iter *buf_iter;
4058         int cpu;
4059
4060         /* If we are looking at one CPU buffer, only check that one */
4061         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4062                 cpu = iter->cpu_file;
4063                 buf_iter = trace_buffer_iter(iter, cpu);
4064                 if (buf_iter) {
4065                         if (!ring_buffer_iter_empty(buf_iter))
4066                                 return 0;
4067                 } else {
4068                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4069                                 return 0;
4070                 }
4071                 return 1;
4072         }
4073
4074         for_each_tracing_cpu(cpu) {
4075                 buf_iter = trace_buffer_iter(iter, cpu);
4076                 if (buf_iter) {
4077                         if (!ring_buffer_iter_empty(buf_iter))
4078                                 return 0;
4079                 } else {
4080                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4081                                 return 0;
4082                 }
4083         }
4084
4085         return 1;
4086 }
4087
4088 /*  Called with trace_event_read_lock() held. */
4089 enum print_line_t print_trace_line(struct trace_iterator *iter)
4090 {
4091         struct trace_array *tr = iter->tr;
4092         unsigned long trace_flags = tr->trace_flags;
4093         enum print_line_t ret;
4094
4095         if (iter->lost_events) {
4096                 if (iter->lost_events == (unsigned long)-1)
4097                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4098                                          iter->cpu);
4099                 else
4100                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4101                                          iter->cpu, iter->lost_events);
4102                 if (trace_seq_has_overflowed(&iter->seq))
4103                         return TRACE_TYPE_PARTIAL_LINE;
4104         }
4105
4106         if (iter->trace && iter->trace->print_line) {
4107                 ret = iter->trace->print_line(iter);
4108                 if (ret != TRACE_TYPE_UNHANDLED)
4109                         return ret;
4110         }
4111
4112         if (iter->ent->type == TRACE_BPUTS &&
4113                         trace_flags & TRACE_ITER_PRINTK &&
4114                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4115                 return trace_print_bputs_msg_only(iter);
4116
4117         if (iter->ent->type == TRACE_BPRINT &&
4118                         trace_flags & TRACE_ITER_PRINTK &&
4119                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4120                 return trace_print_bprintk_msg_only(iter);
4121
4122         if (iter->ent->type == TRACE_PRINT &&
4123                         trace_flags & TRACE_ITER_PRINTK &&
4124                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4125                 return trace_print_printk_msg_only(iter);
4126
4127         if (trace_flags & TRACE_ITER_BIN)
4128                 return print_bin_fmt(iter);
4129
4130         if (trace_flags & TRACE_ITER_HEX)
4131                 return print_hex_fmt(iter);
4132
4133         if (trace_flags & TRACE_ITER_RAW)
4134                 return print_raw_fmt(iter);
4135
4136         return print_trace_fmt(iter);
4137 }
4138
4139 void trace_latency_header(struct seq_file *m)
4140 {
4141         struct trace_iterator *iter = m->private;
4142         struct trace_array *tr = iter->tr;
4143
4144         /* print nothing if the buffers are empty */
4145         if (trace_empty(iter))
4146                 return;
4147
4148         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4149                 print_trace_header(m, iter);
4150
4151         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4152                 print_lat_help_header(m);
4153 }
4154
4155 void trace_default_header(struct seq_file *m)
4156 {
4157         struct trace_iterator *iter = m->private;
4158         struct trace_array *tr = iter->tr;
4159         unsigned long trace_flags = tr->trace_flags;
4160
4161         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4162                 return;
4163
4164         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4165                 /* print nothing if the buffers are empty */
4166                 if (trace_empty(iter))
4167                         return;
4168                 print_trace_header(m, iter);
4169                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4170                         print_lat_help_header(m);
4171         } else {
4172                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4173                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4174                                 print_func_help_header_irq(iter->array_buffer,
4175                                                            m, trace_flags);
4176                         else
4177                                 print_func_help_header(iter->array_buffer, m,
4178                                                        trace_flags);
4179                 }
4180         }
4181 }
4182
4183 static void test_ftrace_alive(struct seq_file *m)
4184 {
4185         if (!ftrace_is_dead())
4186                 return;
4187         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4188                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4189 }
4190
4191 #ifdef CONFIG_TRACER_MAX_TRACE
4192 static void show_snapshot_main_help(struct seq_file *m)
4193 {
4194         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4195                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4196                     "#                      Takes a snapshot of the main buffer.\n"
4197                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4198                     "#                      (Doesn't have to be '2' works with any number that\n"
4199                     "#                       is not a '0' or '1')\n");
4200 }
4201
4202 static void show_snapshot_percpu_help(struct seq_file *m)
4203 {
4204         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4205 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4206         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4207                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4208 #else
4209         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4210                     "#                     Must use main snapshot file to allocate.\n");
4211 #endif
4212         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4213                     "#                      (Doesn't have to be '2' works with any number that\n"
4214                     "#                       is not a '0' or '1')\n");
4215 }
4216
4217 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4218 {
4219         if (iter->tr->allocated_snapshot)
4220                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4221         else
4222                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4223
4224         seq_puts(m, "# Snapshot commands:\n");
4225         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4226                 show_snapshot_main_help(m);
4227         else
4228                 show_snapshot_percpu_help(m);
4229 }
4230 #else
4231 /* Should never be called */
4232 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4233 #endif
4234
4235 static int s_show(struct seq_file *m, void *v)
4236 {
4237         struct trace_iterator *iter = v;
4238         int ret;
4239
4240         if (iter->ent == NULL) {
4241                 if (iter->tr) {
4242                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4243                         seq_puts(m, "#\n");
4244                         test_ftrace_alive(m);
4245                 }
4246                 if (iter->snapshot && trace_empty(iter))
4247                         print_snapshot_help(m, iter);
4248                 else if (iter->trace && iter->trace->print_header)
4249                         iter->trace->print_header(m);
4250                 else
4251                         trace_default_header(m);
4252
4253         } else if (iter->leftover) {
4254                 /*
4255                  * If we filled the seq_file buffer earlier, we
4256                  * want to just show it now.
4257                  */
4258                 ret = trace_print_seq(m, &iter->seq);
4259
4260                 /* ret should this time be zero, but you never know */
4261                 iter->leftover = ret;
4262
4263         } else {
4264                 print_trace_line(iter);
4265                 ret = trace_print_seq(m, &iter->seq);
4266                 /*
4267                  * If we overflow the seq_file buffer, then it will
4268                  * ask us for this data again at start up.
4269                  * Use that instead.
4270                  *  ret is 0 if seq_file write succeeded.
4271                  *        -1 otherwise.
4272                  */
4273                 iter->leftover = ret;
4274         }
4275
4276         return 0;
4277 }
4278
4279 /*
4280  * Should be used after trace_array_get(), trace_types_lock
4281  * ensures that i_cdev was already initialized.
4282  */
4283 static inline int tracing_get_cpu(struct inode *inode)
4284 {
4285         if (inode->i_cdev) /* See trace_create_cpu_file() */
4286                 return (long)inode->i_cdev - 1;
4287         return RING_BUFFER_ALL_CPUS;
4288 }
4289
4290 static const struct seq_operations tracer_seq_ops = {
4291         .start          = s_start,
4292         .next           = s_next,
4293         .stop           = s_stop,
4294         .show           = s_show,
4295 };
4296
4297 static struct trace_iterator *
4298 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4299 {
4300         struct trace_array *tr = inode->i_private;
4301         struct trace_iterator *iter;
4302         int cpu;
4303
4304         if (tracing_disabled)
4305                 return ERR_PTR(-ENODEV);
4306
4307         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4308         if (!iter)
4309                 return ERR_PTR(-ENOMEM);
4310
4311         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4312                                     GFP_KERNEL);
4313         if (!iter->buffer_iter)
4314                 goto release;
4315
4316         /*
4317          * trace_find_next_entry() may need to save off iter->ent.
4318          * It will place it into the iter->temp buffer. As most
4319          * events are less than 128, allocate a buffer of that size.
4320          * If one is greater, then trace_find_next_entry() will
4321          * allocate a new buffer to adjust for the bigger iter->ent.
4322          * It's not critical if it fails to get allocated here.
4323          */
4324         iter->temp = kmalloc(128, GFP_KERNEL);
4325         if (iter->temp)
4326                 iter->temp_size = 128;
4327
4328         /*
4329          * We make a copy of the current tracer to avoid concurrent
4330          * changes on it while we are reading.
4331          */
4332         mutex_lock(&trace_types_lock);
4333         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4334         if (!iter->trace)
4335                 goto fail;
4336
4337         *iter->trace = *tr->current_trace;
4338
4339         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4340                 goto fail;
4341
4342         iter->tr = tr;
4343
4344 #ifdef CONFIG_TRACER_MAX_TRACE
4345         /* Currently only the top directory has a snapshot */
4346         if (tr->current_trace->print_max || snapshot)
4347                 iter->array_buffer = &tr->max_buffer;
4348         else
4349 #endif
4350                 iter->array_buffer = &tr->array_buffer;
4351         iter->snapshot = snapshot;
4352         iter->pos = -1;
4353         iter->cpu_file = tracing_get_cpu(inode);
4354         mutex_init(&iter->mutex);
4355
4356         /* Notify the tracer early; before we stop tracing. */
4357         if (iter->trace->open)
4358                 iter->trace->open(iter);
4359
4360         /* Annotate start of buffers if we had overruns */
4361         if (ring_buffer_overruns(iter->array_buffer->buffer))
4362                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4363
4364         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4365         if (trace_clocks[tr->clock_id].in_ns)
4366                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4367
4368         /*
4369          * If pause-on-trace is enabled, then stop the trace while
4370          * dumping, unless this is the "snapshot" file
4371          */
4372         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4373                 tracing_stop_tr(tr);
4374
4375         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4376                 for_each_tracing_cpu(cpu) {
4377                         iter->buffer_iter[cpu] =
4378                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4379                                                          cpu, GFP_KERNEL);
4380                 }
4381                 ring_buffer_read_prepare_sync();
4382                 for_each_tracing_cpu(cpu) {
4383                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4384                         tracing_iter_reset(iter, cpu);
4385                 }
4386         } else {
4387                 cpu = iter->cpu_file;
4388                 iter->buffer_iter[cpu] =
4389                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4390                                                  cpu, GFP_KERNEL);
4391                 ring_buffer_read_prepare_sync();
4392                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4393                 tracing_iter_reset(iter, cpu);
4394         }
4395
4396         mutex_unlock(&trace_types_lock);
4397
4398         return iter;
4399
4400  fail:
4401         mutex_unlock(&trace_types_lock);
4402         kfree(iter->trace);
4403         kfree(iter->temp);
4404         kfree(iter->buffer_iter);
4405 release:
4406         seq_release_private(inode, file);
4407         return ERR_PTR(-ENOMEM);
4408 }
4409
4410 int tracing_open_generic(struct inode *inode, struct file *filp)
4411 {
4412         int ret;
4413
4414         ret = tracing_check_open_get_tr(NULL);
4415         if (ret)
4416                 return ret;
4417
4418         filp->private_data = inode->i_private;
4419         return 0;
4420 }
4421
4422 bool tracing_is_disabled(void)
4423 {
4424         return (tracing_disabled) ? true: false;
4425 }
4426
4427 /*
4428  * Open and update trace_array ref count.
4429  * Must have the current trace_array passed to it.
4430  */
4431 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4432 {
4433         struct trace_array *tr = inode->i_private;
4434         int ret;
4435
4436         ret = tracing_check_open_get_tr(tr);
4437         if (ret)
4438                 return ret;
4439
4440         filp->private_data = inode->i_private;
4441
4442         return 0;
4443 }
4444
4445 static int tracing_release(struct inode *inode, struct file *file)
4446 {
4447         struct trace_array *tr = inode->i_private;
4448         struct seq_file *m = file->private_data;
4449         struct trace_iterator *iter;
4450         int cpu;
4451
4452         if (!(file->f_mode & FMODE_READ)) {
4453                 trace_array_put(tr);
4454                 return 0;
4455         }
4456
4457         /* Writes do not use seq_file */
4458         iter = m->private;
4459         mutex_lock(&trace_types_lock);
4460
4461         for_each_tracing_cpu(cpu) {
4462                 if (iter->buffer_iter[cpu])
4463                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4464         }
4465
4466         if (iter->trace && iter->trace->close)
4467                 iter->trace->close(iter);
4468
4469         if (!iter->snapshot && tr->stop_count)
4470                 /* reenable tracing if it was previously enabled */
4471                 tracing_start_tr(tr);
4472
4473         __trace_array_put(tr);
4474
4475         mutex_unlock(&trace_types_lock);
4476
4477         mutex_destroy(&iter->mutex);
4478         free_cpumask_var(iter->started);
4479         kfree(iter->temp);
4480         kfree(iter->trace);
4481         kfree(iter->buffer_iter);
4482         seq_release_private(inode, file);
4483
4484         return 0;
4485 }
4486
4487 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4488 {
4489         struct trace_array *tr = inode->i_private;
4490
4491         trace_array_put(tr);
4492         return 0;
4493 }
4494
4495 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4496 {
4497         struct trace_array *tr = inode->i_private;
4498
4499         trace_array_put(tr);
4500
4501         return single_release(inode, file);
4502 }
4503
4504 static int tracing_open(struct inode *inode, struct file *file)
4505 {
4506         struct trace_array *tr = inode->i_private;
4507         struct trace_iterator *iter;
4508         int ret;
4509
4510         ret = tracing_check_open_get_tr(tr);
4511         if (ret)
4512                 return ret;
4513
4514         /* If this file was open for write, then erase contents */
4515         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4516                 int cpu = tracing_get_cpu(inode);
4517                 struct array_buffer *trace_buf = &tr->array_buffer;
4518
4519 #ifdef CONFIG_TRACER_MAX_TRACE
4520                 if (tr->current_trace->print_max)
4521                         trace_buf = &tr->max_buffer;
4522 #endif
4523
4524                 if (cpu == RING_BUFFER_ALL_CPUS)
4525                         tracing_reset_online_cpus(trace_buf);
4526                 else
4527                         tracing_reset_cpu(trace_buf, cpu);
4528         }
4529
4530         if (file->f_mode & FMODE_READ) {
4531                 iter = __tracing_open(inode, file, false);
4532                 if (IS_ERR(iter))
4533                         ret = PTR_ERR(iter);
4534                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4535                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4536         }
4537
4538         if (ret < 0)
4539                 trace_array_put(tr);
4540
4541         return ret;
4542 }
4543
4544 /*
4545  * Some tracers are not suitable for instance buffers.
4546  * A tracer is always available for the global array (toplevel)
4547  * or if it explicitly states that it is.
4548  */
4549 static bool
4550 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4551 {
4552         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4553 }
4554
4555 /* Find the next tracer that this trace array may use */
4556 static struct tracer *
4557 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4558 {
4559         while (t && !trace_ok_for_array(t, tr))
4560                 t = t->next;
4561
4562         return t;
4563 }
4564
4565 static void *
4566 t_next(struct seq_file *m, void *v, loff_t *pos)
4567 {
4568         struct trace_array *tr = m->private;
4569         struct tracer *t = v;
4570
4571         (*pos)++;
4572
4573         if (t)
4574                 t = get_tracer_for_array(tr, t->next);
4575
4576         return t;
4577 }
4578
4579 static void *t_start(struct seq_file *m, loff_t *pos)
4580 {
4581         struct trace_array *tr = m->private;
4582         struct tracer *t;
4583         loff_t l = 0;
4584
4585         mutex_lock(&trace_types_lock);
4586
4587         t = get_tracer_for_array(tr, trace_types);
4588         for (; t && l < *pos; t = t_next(m, t, &l))
4589                         ;
4590
4591         return t;
4592 }
4593
4594 static void t_stop(struct seq_file *m, void *p)
4595 {
4596         mutex_unlock(&trace_types_lock);
4597 }
4598
4599 static int t_show(struct seq_file *m, void *v)
4600 {
4601         struct tracer *t = v;
4602
4603         if (!t)
4604                 return 0;
4605
4606         seq_puts(m, t->name);
4607         if (t->next)
4608                 seq_putc(m, ' ');
4609         else
4610                 seq_putc(m, '\n');
4611
4612         return 0;
4613 }
4614
4615 static const struct seq_operations show_traces_seq_ops = {
4616         .start          = t_start,
4617         .next           = t_next,
4618         .stop           = t_stop,
4619         .show           = t_show,
4620 };
4621
4622 static int show_traces_open(struct inode *inode, struct file *file)
4623 {
4624         struct trace_array *tr = inode->i_private;
4625         struct seq_file *m;
4626         int ret;
4627
4628         ret = tracing_check_open_get_tr(tr);
4629         if (ret)
4630                 return ret;
4631
4632         ret = seq_open(file, &show_traces_seq_ops);
4633         if (ret) {
4634                 trace_array_put(tr);
4635                 return ret;
4636         }
4637
4638         m = file->private_data;
4639         m->private = tr;
4640
4641         return 0;
4642 }
4643
4644 static int show_traces_release(struct inode *inode, struct file *file)
4645 {
4646         struct trace_array *tr = inode->i_private;
4647
4648         trace_array_put(tr);
4649         return seq_release(inode, file);
4650 }
4651
4652 static ssize_t
4653 tracing_write_stub(struct file *filp, const char __user *ubuf,
4654                    size_t count, loff_t *ppos)
4655 {
4656         return count;
4657 }
4658
4659 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4660 {
4661         int ret;
4662
4663         if (file->f_mode & FMODE_READ)
4664                 ret = seq_lseek(file, offset, whence);
4665         else
4666                 file->f_pos = ret = 0;
4667
4668         return ret;
4669 }
4670
4671 static const struct file_operations tracing_fops = {
4672         .open           = tracing_open,
4673         .read           = seq_read,
4674         .write          = tracing_write_stub,
4675         .llseek         = tracing_lseek,
4676         .release        = tracing_release,
4677 };
4678
4679 static const struct file_operations show_traces_fops = {
4680         .open           = show_traces_open,
4681         .read           = seq_read,
4682         .llseek         = seq_lseek,
4683         .release        = show_traces_release,
4684 };
4685
4686 static ssize_t
4687 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4688                      size_t count, loff_t *ppos)
4689 {
4690         struct trace_array *tr = file_inode(filp)->i_private;
4691         char *mask_str;
4692         int len;
4693
4694         len = snprintf(NULL, 0, "%*pb\n",
4695                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4696         mask_str = kmalloc(len, GFP_KERNEL);
4697         if (!mask_str)
4698                 return -ENOMEM;
4699
4700         len = snprintf(mask_str, len, "%*pb\n",
4701                        cpumask_pr_args(tr->tracing_cpumask));
4702         if (len >= count) {
4703                 count = -EINVAL;
4704                 goto out_err;
4705         }
4706         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4707
4708 out_err:
4709         kfree(mask_str);
4710
4711         return count;
4712 }
4713
4714 int tracing_set_cpumask(struct trace_array *tr,
4715                         cpumask_var_t tracing_cpumask_new)
4716 {
4717         int cpu;
4718
4719         if (!tr)
4720                 return -EINVAL;
4721
4722         local_irq_disable();
4723         arch_spin_lock(&tr->max_lock);
4724         for_each_tracing_cpu(cpu) {
4725                 /*
4726                  * Increase/decrease the disabled counter if we are
4727                  * about to flip a bit in the cpumask:
4728                  */
4729                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4730                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4731                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4732                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4733                 }
4734                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4735                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4736                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4737                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4738                 }
4739         }
4740         arch_spin_unlock(&tr->max_lock);
4741         local_irq_enable();
4742
4743         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4744
4745         return 0;
4746 }
4747
4748 static ssize_t
4749 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4750                       size_t count, loff_t *ppos)
4751 {
4752         struct trace_array *tr = file_inode(filp)->i_private;
4753         cpumask_var_t tracing_cpumask_new;
4754         int err;
4755
4756         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4757                 return -ENOMEM;
4758
4759         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4760         if (err)
4761                 goto err_free;
4762
4763         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4764         if (err)
4765                 goto err_free;
4766
4767         free_cpumask_var(tracing_cpumask_new);
4768
4769         return count;
4770
4771 err_free:
4772         free_cpumask_var(tracing_cpumask_new);
4773
4774         return err;
4775 }
4776
4777 static const struct file_operations tracing_cpumask_fops = {
4778         .open           = tracing_open_generic_tr,
4779         .read           = tracing_cpumask_read,
4780         .write          = tracing_cpumask_write,
4781         .release        = tracing_release_generic_tr,
4782         .llseek         = generic_file_llseek,
4783 };
4784
4785 static int tracing_trace_options_show(struct seq_file *m, void *v)
4786 {
4787         struct tracer_opt *trace_opts;
4788         struct trace_array *tr = m->private;
4789         u32 tracer_flags;
4790         int i;
4791
4792         mutex_lock(&trace_types_lock);
4793         tracer_flags = tr->current_trace->flags->val;
4794         trace_opts = tr->current_trace->flags->opts;
4795
4796         for (i = 0; trace_options[i]; i++) {
4797                 if (tr->trace_flags & (1 << i))
4798                         seq_printf(m, "%s\n", trace_options[i]);
4799                 else
4800                         seq_printf(m, "no%s\n", trace_options[i]);
4801         }
4802
4803         for (i = 0; trace_opts[i].name; i++) {
4804                 if (tracer_flags & trace_opts[i].bit)
4805                         seq_printf(m, "%s\n", trace_opts[i].name);
4806                 else
4807                         seq_printf(m, "no%s\n", trace_opts[i].name);
4808         }
4809         mutex_unlock(&trace_types_lock);
4810
4811         return 0;
4812 }
4813
4814 static int __set_tracer_option(struct trace_array *tr,
4815                                struct tracer_flags *tracer_flags,
4816                                struct tracer_opt *opts, int neg)
4817 {
4818         struct tracer *trace = tracer_flags->trace;
4819         int ret;
4820
4821         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4822         if (ret)
4823                 return ret;
4824
4825         if (neg)
4826                 tracer_flags->val &= ~opts->bit;
4827         else
4828                 tracer_flags->val |= opts->bit;
4829         return 0;
4830 }
4831
4832 /* Try to assign a tracer specific option */
4833 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4834 {
4835         struct tracer *trace = tr->current_trace;
4836         struct tracer_flags *tracer_flags = trace->flags;
4837         struct tracer_opt *opts = NULL;
4838         int i;
4839
4840         for (i = 0; tracer_flags->opts[i].name; i++) {
4841                 opts = &tracer_flags->opts[i];
4842
4843                 if (strcmp(cmp, opts->name) == 0)
4844                         return __set_tracer_option(tr, trace->flags, opts, neg);
4845         }
4846
4847         return -EINVAL;
4848 }
4849
4850 /* Some tracers require overwrite to stay enabled */
4851 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4852 {
4853         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4854                 return -1;
4855
4856         return 0;
4857 }
4858
4859 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4860 {
4861         if ((mask == TRACE_ITER_RECORD_TGID) ||
4862             (mask == TRACE_ITER_RECORD_CMD))
4863                 lockdep_assert_held(&event_mutex);
4864
4865         /* do nothing if flag is already set */
4866         if (!!(tr->trace_flags & mask) == !!enabled)
4867                 return 0;
4868
4869         /* Give the tracer a chance to approve the change */
4870         if (tr->current_trace->flag_changed)
4871                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4872                         return -EINVAL;
4873
4874         if (enabled)
4875                 tr->trace_flags |= mask;
4876         else
4877                 tr->trace_flags &= ~mask;
4878
4879         if (mask == TRACE_ITER_RECORD_CMD)
4880                 trace_event_enable_cmd_record(enabled);
4881
4882         if (mask == TRACE_ITER_RECORD_TGID) {
4883                 if (!tgid_map)
4884                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4885                                            sizeof(*tgid_map),
4886                                            GFP_KERNEL);
4887                 if (!tgid_map) {
4888                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4889                         return -ENOMEM;
4890                 }
4891
4892                 trace_event_enable_tgid_record(enabled);
4893         }
4894
4895         if (mask == TRACE_ITER_EVENT_FORK)
4896                 trace_event_follow_fork(tr, enabled);
4897
4898         if (mask == TRACE_ITER_FUNC_FORK)
4899                 ftrace_pid_follow_fork(tr, enabled);
4900
4901         if (mask == TRACE_ITER_OVERWRITE) {
4902                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4903 #ifdef CONFIG_TRACER_MAX_TRACE
4904                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4905 #endif
4906         }
4907
4908         if (mask == TRACE_ITER_PRINTK) {
4909                 trace_printk_start_stop_comm(enabled);
4910                 trace_printk_control(enabled);
4911         }
4912
4913         return 0;
4914 }
4915
4916 int trace_set_options(struct trace_array *tr, char *option)
4917 {
4918         char *cmp;
4919         int neg = 0;
4920         int ret;
4921         size_t orig_len = strlen(option);
4922         int len;
4923
4924         cmp = strstrip(option);
4925
4926         len = str_has_prefix(cmp, "no");
4927         if (len)
4928                 neg = 1;
4929
4930         cmp += len;
4931
4932         mutex_lock(&event_mutex);
4933         mutex_lock(&trace_types_lock);
4934
4935         ret = match_string(trace_options, -1, cmp);
4936         /* If no option could be set, test the specific tracer options */
4937         if (ret < 0)
4938                 ret = set_tracer_option(tr, cmp, neg);
4939         else
4940                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4941
4942         mutex_unlock(&trace_types_lock);
4943         mutex_unlock(&event_mutex);
4944
4945         /*
4946          * If the first trailing whitespace is replaced with '\0' by strstrip,
4947          * turn it back into a space.
4948          */
4949         if (orig_len > strlen(option))
4950                 option[strlen(option)] = ' ';
4951
4952         return ret;
4953 }
4954
4955 static void __init apply_trace_boot_options(void)
4956 {
4957         char *buf = trace_boot_options_buf;
4958         char *option;
4959
4960         while (true) {
4961                 option = strsep(&buf, ",");
4962
4963                 if (!option)
4964                         break;
4965
4966                 if (*option)
4967                         trace_set_options(&global_trace, option);
4968
4969                 /* Put back the comma to allow this to be called again */
4970                 if (buf)
4971                         *(buf - 1) = ',';
4972         }
4973 }
4974
4975 static ssize_t
4976 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4977                         size_t cnt, loff_t *ppos)
4978 {
4979         struct seq_file *m = filp->private_data;
4980         struct trace_array *tr = m->private;
4981         char buf[64];
4982         int ret;
4983
4984         if (cnt >= sizeof(buf))
4985                 return -EINVAL;
4986
4987         if (copy_from_user(buf, ubuf, cnt))
4988                 return -EFAULT;
4989
4990         buf[cnt] = 0;
4991
4992         ret = trace_set_options(tr, buf);
4993         if (ret < 0)
4994                 return ret;
4995
4996         *ppos += cnt;
4997
4998         return cnt;
4999 }
5000
5001 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5002 {
5003         struct trace_array *tr = inode->i_private;
5004         int ret;
5005
5006         ret = tracing_check_open_get_tr(tr);
5007         if (ret)
5008                 return ret;
5009
5010         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5011         if (ret < 0)
5012                 trace_array_put(tr);
5013
5014         return ret;
5015 }
5016
5017 static const struct file_operations tracing_iter_fops = {
5018         .open           = tracing_trace_options_open,
5019         .read           = seq_read,
5020         .llseek         = seq_lseek,
5021         .release        = tracing_single_release_tr,
5022         .write          = tracing_trace_options_write,
5023 };
5024
5025 static const char readme_msg[] =
5026         "tracing mini-HOWTO:\n\n"
5027         "# echo 0 > tracing_on : quick way to disable tracing\n"
5028         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5029         " Important files:\n"
5030         "  trace\t\t\t- The static contents of the buffer\n"
5031         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5032         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5033         "  current_tracer\t- function and latency tracers\n"
5034         "  available_tracers\t- list of configured tracers for current_tracer\n"
5035         "  error_log\t- error log for failed commands (that support it)\n"
5036         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5037         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5038         "  trace_clock\t\t-change the clock used to order events\n"
5039         "       local:   Per cpu clock but may not be synced across CPUs\n"
5040         "      global:   Synced across CPUs but slows tracing down.\n"
5041         "     counter:   Not a clock, but just an increment\n"
5042         "      uptime:   Jiffy counter from time of boot\n"
5043         "        perf:   Same clock that perf events use\n"
5044 #ifdef CONFIG_X86_64
5045         "     x86-tsc:   TSC cycle counter\n"
5046 #endif
5047         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5048         "       delta:   Delta difference against a buffer-wide timestamp\n"
5049         "    absolute:   Absolute (standalone) timestamp\n"
5050         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5051         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5052         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5053         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5054         "\t\t\t  Remove sub-buffer with rmdir\n"
5055         "  trace_options\t\t- Set format or modify how tracing happens\n"
5056         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5057         "\t\t\t  option name\n"
5058         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5059 #ifdef CONFIG_DYNAMIC_FTRACE
5060         "\n  available_filter_functions - list of functions that can be filtered on\n"
5061         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5062         "\t\t\t  functions\n"
5063         "\t     accepts: func_full_name or glob-matching-pattern\n"
5064         "\t     modules: Can select a group via module\n"
5065         "\t      Format: :mod:<module-name>\n"
5066         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5067         "\t    triggers: a command to perform when function is hit\n"
5068         "\t      Format: <function>:<trigger>[:count]\n"
5069         "\t     trigger: traceon, traceoff\n"
5070         "\t\t      enable_event:<system>:<event>\n"
5071         "\t\t      disable_event:<system>:<event>\n"
5072 #ifdef CONFIG_STACKTRACE
5073         "\t\t      stacktrace\n"
5074 #endif
5075 #ifdef CONFIG_TRACER_SNAPSHOT
5076         "\t\t      snapshot\n"
5077 #endif
5078         "\t\t      dump\n"
5079         "\t\t      cpudump\n"
5080         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5081         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5082         "\t     The first one will disable tracing every time do_fault is hit\n"
5083         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5084         "\t       The first time do trap is hit and it disables tracing, the\n"
5085         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5086         "\t       the counter will not decrement. It only decrements when the\n"
5087         "\t       trigger did work\n"
5088         "\t     To remove trigger without count:\n"
5089         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5090         "\t     To remove trigger with a count:\n"
5091         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5092         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5093         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5094         "\t    modules: Can select a group via module command :mod:\n"
5095         "\t    Does not accept triggers\n"
5096 #endif /* CONFIG_DYNAMIC_FTRACE */
5097 #ifdef CONFIG_FUNCTION_TRACER
5098         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5099         "\t\t    (function)\n"
5100         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5101         "\t\t    (function)\n"
5102 #endif
5103 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5104         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5105         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5106         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5107 #endif
5108 #ifdef CONFIG_TRACER_SNAPSHOT
5109         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5110         "\t\t\t  snapshot buffer. Read the contents for more\n"
5111         "\t\t\t  information\n"
5112 #endif
5113 #ifdef CONFIG_STACK_TRACER
5114         "  stack_trace\t\t- Shows the max stack trace when active\n"
5115         "  stack_max_size\t- Shows current max stack size that was traced\n"
5116         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5117         "\t\t\t  new trace)\n"
5118 #ifdef CONFIG_DYNAMIC_FTRACE
5119         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5120         "\t\t\t  traces\n"
5121 #endif
5122 #endif /* CONFIG_STACK_TRACER */
5123 #ifdef CONFIG_DYNAMIC_EVENTS
5124         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5125         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5126 #endif
5127 #ifdef CONFIG_KPROBE_EVENTS
5128         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5129         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5130 #endif
5131 #ifdef CONFIG_UPROBE_EVENTS
5132         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5133         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5134 #endif
5135 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5136         "\t  accepts: event-definitions (one definition per line)\n"
5137         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5138         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5139 #ifdef CONFIG_HIST_TRIGGERS
5140         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5141 #endif
5142         "\t           -:[<group>/]<event>\n"
5143 #ifdef CONFIG_KPROBE_EVENTS
5144         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5145   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5146 #endif
5147 #ifdef CONFIG_UPROBE_EVENTS
5148   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5149 #endif
5150         "\t     args: <name>=fetcharg[:type]\n"
5151         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5152 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5153         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5154 #else
5155         "\t           $stack<index>, $stack, $retval, $comm,\n"
5156 #endif
5157         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5158         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5159         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5160         "\t           <type>\\[<array-size>\\]\n"
5161 #ifdef CONFIG_HIST_TRIGGERS
5162         "\t    field: <stype> <name>;\n"
5163         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5164         "\t           [unsigned] char/int/long\n"
5165 #endif
5166 #endif
5167         "  events/\t\t- Directory containing all trace event subsystems:\n"
5168         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5169         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5170         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5171         "\t\t\t  events\n"
5172         "      filter\t\t- If set, only events passing filter are traced\n"
5173         "  events/<system>/<event>/\t- Directory containing control files for\n"
5174         "\t\t\t  <event>:\n"
5175         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5176         "      filter\t\t- If set, only events passing filter are traced\n"
5177         "      trigger\t\t- If set, a command to perform when event is hit\n"
5178         "\t    Format: <trigger>[:count][if <filter>]\n"
5179         "\t   trigger: traceon, traceoff\n"
5180         "\t            enable_event:<system>:<event>\n"
5181         "\t            disable_event:<system>:<event>\n"
5182 #ifdef CONFIG_HIST_TRIGGERS
5183         "\t            enable_hist:<system>:<event>\n"
5184         "\t            disable_hist:<system>:<event>\n"
5185 #endif
5186 #ifdef CONFIG_STACKTRACE
5187         "\t\t    stacktrace\n"
5188 #endif
5189 #ifdef CONFIG_TRACER_SNAPSHOT
5190         "\t\t    snapshot\n"
5191 #endif
5192 #ifdef CONFIG_HIST_TRIGGERS
5193         "\t\t    hist (see below)\n"
5194 #endif
5195         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5196         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5197         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5198         "\t                  events/block/block_unplug/trigger\n"
5199         "\t   The first disables tracing every time block_unplug is hit.\n"
5200         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5201         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5202         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5203         "\t   Like function triggers, the counter is only decremented if it\n"
5204         "\t    enabled or disabled tracing.\n"
5205         "\t   To remove a trigger without a count:\n"
5206         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5207         "\t   To remove a trigger with a count:\n"
5208         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5209         "\t   Filters can be ignored when removing a trigger.\n"
5210 #ifdef CONFIG_HIST_TRIGGERS
5211         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5212         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5213         "\t            [:values=<field1[,field2,...]>]\n"
5214         "\t            [:sort=<field1[,field2,...]>]\n"
5215         "\t            [:size=#entries]\n"
5216         "\t            [:pause][:continue][:clear]\n"
5217         "\t            [:name=histname1]\n"
5218         "\t            [:<handler>.<action>]\n"
5219         "\t            [if <filter>]\n\n"
5220         "\t    When a matching event is hit, an entry is added to a hash\n"
5221         "\t    table using the key(s) and value(s) named, and the value of a\n"
5222         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5223         "\t    correspond to fields in the event's format description.  Keys\n"
5224         "\t    can be any field, or the special string 'stacktrace'.\n"
5225         "\t    Compound keys consisting of up to two fields can be specified\n"
5226         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5227         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5228         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5229         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5230         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5231         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5232         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5233         "\t    its histogram data will be shared with other triggers of the\n"
5234         "\t    same name, and trigger hits will update this common data.\n\n"
5235         "\t    Reading the 'hist' file for the event will dump the hash\n"
5236         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5237         "\t    triggers attached to an event, there will be a table for each\n"
5238         "\t    trigger in the output.  The table displayed for a named\n"
5239         "\t    trigger will be the same as any other instance having the\n"
5240         "\t    same name.  The default format used to display a given field\n"
5241         "\t    can be modified by appending any of the following modifiers\n"
5242         "\t    to the field name, as applicable:\n\n"
5243         "\t            .hex        display a number as a hex value\n"
5244         "\t            .sym        display an address as a symbol\n"
5245         "\t            .sym-offset display an address as a symbol and offset\n"
5246         "\t            .execname   display a common_pid as a program name\n"
5247         "\t            .syscall    display a syscall id as a syscall name\n"
5248         "\t            .log2       display log2 value rather than raw number\n"
5249         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5250         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5251         "\t    trigger or to start a hist trigger but not log any events\n"
5252         "\t    until told to do so.  'continue' can be used to start or\n"
5253         "\t    restart a paused hist trigger.\n\n"
5254         "\t    The 'clear' parameter will clear the contents of a running\n"
5255         "\t    hist trigger and leave its current paused/active state\n"
5256         "\t    unchanged.\n\n"
5257         "\t    The enable_hist and disable_hist triggers can be used to\n"
5258         "\t    have one event conditionally start and stop another event's\n"
5259         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5260         "\t    the enable_event and disable_event triggers.\n\n"
5261         "\t    Hist trigger handlers and actions are executed whenever a\n"
5262         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5263         "\t        <handler>.<action>\n\n"
5264         "\t    The available handlers are:\n\n"
5265         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5266         "\t        onmax(var)               - invoke if var exceeds current max\n"
5267         "\t        onchange(var)            - invoke action if var changes\n\n"
5268         "\t    The available actions are:\n\n"
5269         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5270         "\t        save(field,...)                      - save current event fields\n"
5271 #ifdef CONFIG_TRACER_SNAPSHOT
5272         "\t        snapshot()                           - snapshot the trace buffer\n"
5273 #endif
5274 #endif
5275 ;
5276
5277 static ssize_t
5278 tracing_readme_read(struct file *filp, char __user *ubuf,
5279                        size_t cnt, loff_t *ppos)
5280 {
5281         return simple_read_from_buffer(ubuf, cnt, ppos,
5282                                         readme_msg, strlen(readme_msg));
5283 }
5284
5285 static const struct file_operations tracing_readme_fops = {
5286         .open           = tracing_open_generic,
5287         .read           = tracing_readme_read,
5288         .llseek         = generic_file_llseek,
5289 };
5290
5291 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5292 {
5293         int *ptr = v;
5294
5295         if (*pos || m->count)
5296                 ptr++;
5297
5298         (*pos)++;
5299
5300         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5301                 if (trace_find_tgid(*ptr))
5302                         return ptr;
5303         }
5304
5305         return NULL;
5306 }
5307
5308 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5309 {
5310         void *v;
5311         loff_t l = 0;
5312
5313         if (!tgid_map)
5314                 return NULL;
5315
5316         v = &tgid_map[0];
5317         while (l <= *pos) {
5318                 v = saved_tgids_next(m, v, &l);
5319                 if (!v)
5320                         return NULL;
5321         }
5322
5323         return v;
5324 }
5325
5326 static void saved_tgids_stop(struct seq_file *m, void *v)
5327 {
5328 }
5329
5330 static int saved_tgids_show(struct seq_file *m, void *v)
5331 {
5332         int pid = (int *)v - tgid_map;
5333
5334         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5335         return 0;
5336 }
5337
5338 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5339         .start          = saved_tgids_start,
5340         .stop           = saved_tgids_stop,
5341         .next           = saved_tgids_next,
5342         .show           = saved_tgids_show,
5343 };
5344
5345 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5346 {
5347         int ret;
5348
5349         ret = tracing_check_open_get_tr(NULL);
5350         if (ret)
5351                 return ret;
5352
5353         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5354 }
5355
5356
5357 static const struct file_operations tracing_saved_tgids_fops = {
5358         .open           = tracing_saved_tgids_open,
5359         .read           = seq_read,
5360         .llseek         = seq_lseek,
5361         .release        = seq_release,
5362 };
5363
5364 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5365 {
5366         unsigned int *ptr = v;
5367
5368         if (*pos || m->count)
5369                 ptr++;
5370
5371         (*pos)++;
5372
5373         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5374              ptr++) {
5375                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5376                         continue;
5377
5378                 return ptr;
5379         }
5380
5381         return NULL;
5382 }
5383
5384 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5385 {
5386         void *v;
5387         loff_t l = 0;
5388
5389         preempt_disable();
5390         arch_spin_lock(&trace_cmdline_lock);
5391
5392         v = &savedcmd->map_cmdline_to_pid[0];
5393         while (l <= *pos) {
5394                 v = saved_cmdlines_next(m, v, &l);
5395                 if (!v)
5396                         return NULL;
5397         }
5398
5399         return v;
5400 }
5401
5402 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5403 {
5404         arch_spin_unlock(&trace_cmdline_lock);
5405         preempt_enable();
5406 }
5407
5408 static int saved_cmdlines_show(struct seq_file *m, void *v)
5409 {
5410         char buf[TASK_COMM_LEN];
5411         unsigned int *pid = v;
5412
5413         __trace_find_cmdline(*pid, buf);
5414         seq_printf(m, "%d %s\n", *pid, buf);
5415         return 0;
5416 }
5417
5418 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5419         .start          = saved_cmdlines_start,
5420         .next           = saved_cmdlines_next,
5421         .stop           = saved_cmdlines_stop,
5422         .show           = saved_cmdlines_show,
5423 };
5424
5425 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5426 {
5427         int ret;
5428
5429         ret = tracing_check_open_get_tr(NULL);
5430         if (ret)
5431                 return ret;
5432
5433         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5434 }
5435
5436 static const struct file_operations tracing_saved_cmdlines_fops = {
5437         .open           = tracing_saved_cmdlines_open,
5438         .read           = seq_read,
5439         .llseek         = seq_lseek,
5440         .release        = seq_release,
5441 };
5442
5443 static ssize_t
5444 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5445                                  size_t cnt, loff_t *ppos)
5446 {
5447         char buf[64];
5448         int r;
5449
5450         arch_spin_lock(&trace_cmdline_lock);
5451         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5452         arch_spin_unlock(&trace_cmdline_lock);
5453
5454         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5455 }
5456
5457 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5458 {
5459         kfree(s->saved_cmdlines);
5460         kfree(s->map_cmdline_to_pid);
5461         kfree(s);
5462 }
5463
5464 static int tracing_resize_saved_cmdlines(unsigned int val)
5465 {
5466         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5467
5468         s = kmalloc(sizeof(*s), GFP_KERNEL);
5469         if (!s)
5470                 return -ENOMEM;
5471
5472         if (allocate_cmdlines_buffer(val, s) < 0) {
5473                 kfree(s);
5474                 return -ENOMEM;
5475         }
5476
5477         arch_spin_lock(&trace_cmdline_lock);
5478         savedcmd_temp = savedcmd;
5479         savedcmd = s;
5480         arch_spin_unlock(&trace_cmdline_lock);
5481         free_saved_cmdlines_buffer(savedcmd_temp);
5482
5483         return 0;
5484 }
5485
5486 static ssize_t
5487 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5488                                   size_t cnt, loff_t *ppos)
5489 {
5490         unsigned long val;
5491         int ret;
5492
5493         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5494         if (ret)
5495                 return ret;
5496
5497         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5498         if (!val || val > PID_MAX_DEFAULT)
5499                 return -EINVAL;
5500
5501         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5502         if (ret < 0)
5503                 return ret;
5504
5505         *ppos += cnt;
5506
5507         return cnt;
5508 }
5509
5510 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5511         .open           = tracing_open_generic,
5512         .read           = tracing_saved_cmdlines_size_read,
5513         .write          = tracing_saved_cmdlines_size_write,
5514 };
5515
5516 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5517 static union trace_eval_map_item *
5518 update_eval_map(union trace_eval_map_item *ptr)
5519 {
5520         if (!ptr->map.eval_string) {
5521                 if (ptr->tail.next) {
5522                         ptr = ptr->tail.next;
5523                         /* Set ptr to the next real item (skip head) */
5524                         ptr++;
5525                 } else
5526                         return NULL;
5527         }
5528         return ptr;
5529 }
5530
5531 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5532 {
5533         union trace_eval_map_item *ptr = v;
5534
5535         /*
5536          * Paranoid! If ptr points to end, we don't want to increment past it.
5537          * This really should never happen.
5538          */
5539         (*pos)++;
5540         ptr = update_eval_map(ptr);
5541         if (WARN_ON_ONCE(!ptr))
5542                 return NULL;
5543
5544         ptr++;
5545         ptr = update_eval_map(ptr);
5546
5547         return ptr;
5548 }
5549
5550 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5551 {
5552         union trace_eval_map_item *v;
5553         loff_t l = 0;
5554
5555         mutex_lock(&trace_eval_mutex);
5556
5557         v = trace_eval_maps;
5558         if (v)
5559                 v++;
5560
5561         while (v && l < *pos) {
5562                 v = eval_map_next(m, v, &l);
5563         }
5564
5565         return v;
5566 }
5567
5568 static void eval_map_stop(struct seq_file *m, void *v)
5569 {
5570         mutex_unlock(&trace_eval_mutex);
5571 }
5572
5573 static int eval_map_show(struct seq_file *m, void *v)
5574 {
5575         union trace_eval_map_item *ptr = v;
5576
5577         seq_printf(m, "%s %ld (%s)\n",
5578                    ptr->map.eval_string, ptr->map.eval_value,
5579                    ptr->map.system);
5580
5581         return 0;
5582 }
5583
5584 static const struct seq_operations tracing_eval_map_seq_ops = {
5585         .start          = eval_map_start,
5586         .next           = eval_map_next,
5587         .stop           = eval_map_stop,
5588         .show           = eval_map_show,
5589 };
5590
5591 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5592 {
5593         int ret;
5594
5595         ret = tracing_check_open_get_tr(NULL);
5596         if (ret)
5597                 return ret;
5598
5599         return seq_open(filp, &tracing_eval_map_seq_ops);
5600 }
5601
5602 static const struct file_operations tracing_eval_map_fops = {
5603         .open           = tracing_eval_map_open,
5604         .read           = seq_read,
5605         .llseek         = seq_lseek,
5606         .release        = seq_release,
5607 };
5608
5609 static inline union trace_eval_map_item *
5610 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5611 {
5612         /* Return tail of array given the head */
5613         return ptr + ptr->head.length + 1;
5614 }
5615
5616 static void
5617 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5618                            int len)
5619 {
5620         struct trace_eval_map **stop;
5621         struct trace_eval_map **map;
5622         union trace_eval_map_item *map_array;
5623         union trace_eval_map_item *ptr;
5624
5625         stop = start + len;
5626
5627         /*
5628          * The trace_eval_maps contains the map plus a head and tail item,
5629          * where the head holds the module and length of array, and the
5630          * tail holds a pointer to the next list.
5631          */
5632         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5633         if (!map_array) {
5634                 pr_warn("Unable to allocate trace eval mapping\n");
5635                 return;
5636         }
5637
5638         mutex_lock(&trace_eval_mutex);
5639
5640         if (!trace_eval_maps)
5641                 trace_eval_maps = map_array;
5642         else {
5643                 ptr = trace_eval_maps;
5644                 for (;;) {
5645                         ptr = trace_eval_jmp_to_tail(ptr);
5646                         if (!ptr->tail.next)
5647                                 break;
5648                         ptr = ptr->tail.next;
5649
5650                 }
5651                 ptr->tail.next = map_array;
5652         }
5653         map_array->head.mod = mod;
5654         map_array->head.length = len;
5655         map_array++;
5656
5657         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5658                 map_array->map = **map;
5659                 map_array++;
5660         }
5661         memset(map_array, 0, sizeof(*map_array));
5662
5663         mutex_unlock(&trace_eval_mutex);
5664 }
5665
5666 static void trace_create_eval_file(struct dentry *d_tracer)
5667 {
5668         trace_create_file("eval_map", 0444, d_tracer,
5669                           NULL, &tracing_eval_map_fops);
5670 }
5671
5672 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5673 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5674 static inline void trace_insert_eval_map_file(struct module *mod,
5675                               struct trace_eval_map **start, int len) { }
5676 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5677
5678 static void trace_insert_eval_map(struct module *mod,
5679                                   struct trace_eval_map **start, int len)
5680 {
5681         struct trace_eval_map **map;
5682
5683         if (len <= 0)
5684                 return;
5685
5686         map = start;
5687
5688         trace_event_eval_update(map, len);
5689
5690         trace_insert_eval_map_file(mod, start, len);
5691 }
5692
5693 static ssize_t
5694 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5695                        size_t cnt, loff_t *ppos)
5696 {
5697         struct trace_array *tr = filp->private_data;
5698         char buf[MAX_TRACER_SIZE+2];
5699         int r;
5700
5701         mutex_lock(&trace_types_lock);
5702         r = sprintf(buf, "%s\n", tr->current_trace->name);
5703         mutex_unlock(&trace_types_lock);
5704
5705         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5706 }
5707
5708 int tracer_init(struct tracer *t, struct trace_array *tr)
5709 {
5710         tracing_reset_online_cpus(&tr->array_buffer);
5711         return t->init(tr);
5712 }
5713
5714 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5715 {
5716         int cpu;
5717
5718         for_each_tracing_cpu(cpu)
5719                 per_cpu_ptr(buf->data, cpu)->entries = val;
5720 }
5721
5722 #ifdef CONFIG_TRACER_MAX_TRACE
5723 /* resize @tr's buffer to the size of @size_tr's entries */
5724 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5725                                         struct array_buffer *size_buf, int cpu_id)
5726 {
5727         int cpu, ret = 0;
5728
5729         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5730                 for_each_tracing_cpu(cpu) {
5731                         ret = ring_buffer_resize(trace_buf->buffer,
5732                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5733                         if (ret < 0)
5734                                 break;
5735                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5736                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5737                 }
5738         } else {
5739                 ret = ring_buffer_resize(trace_buf->buffer,
5740                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5741                 if (ret == 0)
5742                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5743                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5744         }
5745
5746         return ret;
5747 }
5748 #endif /* CONFIG_TRACER_MAX_TRACE */
5749
5750 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5751                                         unsigned long size, int cpu)
5752 {
5753         int ret;
5754
5755         /*
5756          * If kernel or user changes the size of the ring buffer
5757          * we use the size that was given, and we can forget about
5758          * expanding it later.
5759          */
5760         ring_buffer_expanded = true;
5761
5762         /* May be called before buffers are initialized */
5763         if (!tr->array_buffer.buffer)
5764                 return 0;
5765
5766         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5767         if (ret < 0)
5768                 return ret;
5769
5770 #ifdef CONFIG_TRACER_MAX_TRACE
5771         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5772             !tr->current_trace->use_max_tr)
5773                 goto out;
5774
5775         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5776         if (ret < 0) {
5777                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5778                                                      &tr->array_buffer, cpu);
5779                 if (r < 0) {
5780                         /*
5781                          * AARGH! We are left with different
5782                          * size max buffer!!!!
5783                          * The max buffer is our "snapshot" buffer.
5784                          * When a tracer needs a snapshot (one of the
5785                          * latency tracers), it swaps the max buffer
5786                          * with the saved snap shot. We succeeded to
5787                          * update the size of the main buffer, but failed to
5788                          * update the size of the max buffer. But when we tried
5789                          * to reset the main buffer to the original size, we
5790                          * failed there too. This is very unlikely to
5791                          * happen, but if it does, warn and kill all
5792                          * tracing.
5793                          */
5794                         WARN_ON(1);
5795                         tracing_disabled = 1;
5796                 }
5797                 return ret;
5798         }
5799
5800         if (cpu == RING_BUFFER_ALL_CPUS)
5801                 set_buffer_entries(&tr->max_buffer, size);
5802         else
5803                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5804
5805  out:
5806 #endif /* CONFIG_TRACER_MAX_TRACE */
5807
5808         if (cpu == RING_BUFFER_ALL_CPUS)
5809                 set_buffer_entries(&tr->array_buffer, size);
5810         else
5811                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5812
5813         return ret;
5814 }
5815
5816 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5817                                   unsigned long size, int cpu_id)
5818 {
5819         int ret = size;
5820
5821         mutex_lock(&trace_types_lock);
5822
5823         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5824                 /* make sure, this cpu is enabled in the mask */
5825                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5826                         ret = -EINVAL;
5827                         goto out;
5828                 }
5829         }
5830
5831         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5832         if (ret < 0)
5833                 ret = -ENOMEM;
5834
5835 out:
5836         mutex_unlock(&trace_types_lock);
5837
5838         return ret;
5839 }
5840
5841
5842 /**
5843  * tracing_update_buffers - used by tracing facility to expand ring buffers
5844  *
5845  * To save on memory when the tracing is never used on a system with it
5846  * configured in. The ring buffers are set to a minimum size. But once
5847  * a user starts to use the tracing facility, then they need to grow
5848  * to their default size.
5849  *
5850  * This function is to be called when a tracer is about to be used.
5851  */
5852 int tracing_update_buffers(void)
5853 {
5854         int ret = 0;
5855
5856         mutex_lock(&trace_types_lock);
5857         if (!ring_buffer_expanded)
5858                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5859                                                 RING_BUFFER_ALL_CPUS);
5860         mutex_unlock(&trace_types_lock);
5861
5862         return ret;
5863 }
5864
5865 struct trace_option_dentry;
5866
5867 static void
5868 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5869
5870 /*
5871  * Used to clear out the tracer before deletion of an instance.
5872  * Must have trace_types_lock held.
5873  */
5874 static void tracing_set_nop(struct trace_array *tr)
5875 {
5876         if (tr->current_trace == &nop_trace)
5877                 return;
5878         
5879         tr->current_trace->enabled--;
5880
5881         if (tr->current_trace->reset)
5882                 tr->current_trace->reset(tr);
5883
5884         tr->current_trace = &nop_trace;
5885 }
5886
5887 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5888 {
5889         /* Only enable if the directory has been created already. */
5890         if (!tr->dir)
5891                 return;
5892
5893         create_trace_option_files(tr, t);
5894 }
5895
5896 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5897 {
5898         struct tracer *t;
5899 #ifdef CONFIG_TRACER_MAX_TRACE
5900         bool had_max_tr;
5901 #endif
5902         int ret = 0;
5903
5904         mutex_lock(&trace_types_lock);
5905
5906         if (!ring_buffer_expanded) {
5907                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5908                                                 RING_BUFFER_ALL_CPUS);
5909                 if (ret < 0)
5910                         goto out;
5911                 ret = 0;
5912         }
5913
5914         for (t = trace_types; t; t = t->next) {
5915                 if (strcmp(t->name, buf) == 0)
5916                         break;
5917         }
5918         if (!t) {
5919                 ret = -EINVAL;
5920                 goto out;
5921         }
5922         if (t == tr->current_trace)
5923                 goto out;
5924
5925 #ifdef CONFIG_TRACER_SNAPSHOT
5926         if (t->use_max_tr) {
5927                 arch_spin_lock(&tr->max_lock);
5928                 if (tr->cond_snapshot)
5929                         ret = -EBUSY;
5930                 arch_spin_unlock(&tr->max_lock);
5931                 if (ret)
5932                         goto out;
5933         }
5934 #endif
5935         /* Some tracers won't work on kernel command line */
5936         if (system_state < SYSTEM_RUNNING && t->noboot) {
5937                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5938                         t->name);
5939                 goto out;
5940         }
5941
5942         /* Some tracers are only allowed for the top level buffer */
5943         if (!trace_ok_for_array(t, tr)) {
5944                 ret = -EINVAL;
5945                 goto out;
5946         }
5947
5948         /* If trace pipe files are being read, we can't change the tracer */
5949         if (tr->trace_ref) {
5950                 ret = -EBUSY;
5951                 goto out;
5952         }
5953
5954         trace_branch_disable();
5955
5956         tr->current_trace->enabled--;
5957
5958         if (tr->current_trace->reset)
5959                 tr->current_trace->reset(tr);
5960
5961         /* Current trace needs to be nop_trace before synchronize_rcu */
5962         tr->current_trace = &nop_trace;
5963
5964 #ifdef CONFIG_TRACER_MAX_TRACE
5965         had_max_tr = tr->allocated_snapshot;
5966
5967         if (had_max_tr && !t->use_max_tr) {
5968                 /*
5969                  * We need to make sure that the update_max_tr sees that
5970                  * current_trace changed to nop_trace to keep it from
5971                  * swapping the buffers after we resize it.
5972                  * The update_max_tr is called from interrupts disabled
5973                  * so a synchronized_sched() is sufficient.
5974                  */
5975                 synchronize_rcu();
5976                 free_snapshot(tr);
5977         }
5978 #endif
5979
5980 #ifdef CONFIG_TRACER_MAX_TRACE
5981         if (t->use_max_tr && !had_max_tr) {
5982                 ret = tracing_alloc_snapshot_instance(tr);
5983                 if (ret < 0)
5984                         goto out;
5985         }
5986 #endif
5987
5988         if (t->init) {
5989                 ret = tracer_init(t, tr);
5990                 if (ret)
5991                         goto out;
5992         }
5993
5994         tr->current_trace = t;
5995         tr->current_trace->enabled++;
5996         trace_branch_enable(tr);
5997  out:
5998         mutex_unlock(&trace_types_lock);
5999
6000         return ret;
6001 }
6002
6003 static ssize_t
6004 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6005                         size_t cnt, loff_t *ppos)
6006 {
6007         struct trace_array *tr = filp->private_data;
6008         char buf[MAX_TRACER_SIZE+1];
6009         int i;
6010         size_t ret;
6011         int err;
6012
6013         ret = cnt;
6014
6015         if (cnt > MAX_TRACER_SIZE)
6016                 cnt = MAX_TRACER_SIZE;
6017
6018         if (copy_from_user(buf, ubuf, cnt))
6019                 return -EFAULT;
6020
6021         buf[cnt] = 0;
6022
6023         /* strip ending whitespace. */
6024         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6025                 buf[i] = 0;
6026
6027         err = tracing_set_tracer(tr, buf);
6028         if (err)
6029                 return err;
6030
6031         *ppos += ret;
6032
6033         return ret;
6034 }
6035
6036 static ssize_t
6037 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6038                    size_t cnt, loff_t *ppos)
6039 {
6040         char buf[64];
6041         int r;
6042
6043         r = snprintf(buf, sizeof(buf), "%ld\n",
6044                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6045         if (r > sizeof(buf))
6046                 r = sizeof(buf);
6047         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6048 }
6049
6050 static ssize_t
6051 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6052                     size_t cnt, loff_t *ppos)
6053 {
6054         unsigned long val;
6055         int ret;
6056
6057         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6058         if (ret)
6059                 return ret;
6060
6061         *ptr = val * 1000;
6062
6063         return cnt;
6064 }
6065
6066 static ssize_t
6067 tracing_thresh_read(struct file *filp, char __user *ubuf,
6068                     size_t cnt, loff_t *ppos)
6069 {
6070         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6071 }
6072
6073 static ssize_t
6074 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6075                      size_t cnt, loff_t *ppos)
6076 {
6077         struct trace_array *tr = filp->private_data;
6078         int ret;
6079
6080         mutex_lock(&trace_types_lock);
6081         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6082         if (ret < 0)
6083                 goto out;
6084
6085         if (tr->current_trace->update_thresh) {
6086                 ret = tr->current_trace->update_thresh(tr);
6087                 if (ret < 0)
6088                         goto out;
6089         }
6090
6091         ret = cnt;
6092 out:
6093         mutex_unlock(&trace_types_lock);
6094
6095         return ret;
6096 }
6097
6098 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6099
6100 static ssize_t
6101 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6102                      size_t cnt, loff_t *ppos)
6103 {
6104         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6105 }
6106
6107 static ssize_t
6108 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6109                       size_t cnt, loff_t *ppos)
6110 {
6111         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6112 }
6113
6114 #endif
6115
6116 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6117 {
6118         struct trace_array *tr = inode->i_private;
6119         struct trace_iterator *iter;
6120         int ret;
6121
6122         ret = tracing_check_open_get_tr(tr);
6123         if (ret)
6124                 return ret;
6125
6126         mutex_lock(&trace_types_lock);
6127
6128         /* create a buffer to store the information to pass to userspace */
6129         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6130         if (!iter) {
6131                 ret = -ENOMEM;
6132                 __trace_array_put(tr);
6133                 goto out;
6134         }
6135
6136         trace_seq_init(&iter->seq);
6137         iter->trace = tr->current_trace;
6138
6139         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6140                 ret = -ENOMEM;
6141                 goto fail;
6142         }
6143
6144         /* trace pipe does not show start of buffer */
6145         cpumask_setall(iter->started);
6146
6147         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6148                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6149
6150         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6151         if (trace_clocks[tr->clock_id].in_ns)
6152                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6153
6154         iter->tr = tr;
6155         iter->array_buffer = &tr->array_buffer;
6156         iter->cpu_file = tracing_get_cpu(inode);
6157         mutex_init(&iter->mutex);
6158         filp->private_data = iter;
6159
6160         if (iter->trace->pipe_open)
6161                 iter->trace->pipe_open(iter);
6162
6163         nonseekable_open(inode, filp);
6164
6165         tr->trace_ref++;
6166 out:
6167         mutex_unlock(&trace_types_lock);
6168         return ret;
6169
6170 fail:
6171         kfree(iter);
6172         __trace_array_put(tr);
6173         mutex_unlock(&trace_types_lock);
6174         return ret;
6175 }
6176
6177 static int tracing_release_pipe(struct inode *inode, struct file *file)
6178 {
6179         struct trace_iterator *iter = file->private_data;
6180         struct trace_array *tr = inode->i_private;
6181
6182         mutex_lock(&trace_types_lock);
6183
6184         tr->trace_ref--;
6185
6186         if (iter->trace->pipe_close)
6187                 iter->trace->pipe_close(iter);
6188
6189         mutex_unlock(&trace_types_lock);
6190
6191         free_cpumask_var(iter->started);
6192         mutex_destroy(&iter->mutex);
6193         kfree(iter);
6194
6195         trace_array_put(tr);
6196
6197         return 0;
6198 }
6199
6200 static __poll_t
6201 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6202 {
6203         struct trace_array *tr = iter->tr;
6204
6205         /* Iterators are static, they should be filled or empty */
6206         if (trace_buffer_iter(iter, iter->cpu_file))
6207                 return EPOLLIN | EPOLLRDNORM;
6208
6209         if (tr->trace_flags & TRACE_ITER_BLOCK)
6210                 /*
6211                  * Always select as readable when in blocking mode
6212                  */
6213                 return EPOLLIN | EPOLLRDNORM;
6214         else
6215                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6216                                              filp, poll_table);
6217 }
6218
6219 static __poll_t
6220 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6221 {
6222         struct trace_iterator *iter = filp->private_data;
6223
6224         return trace_poll(iter, filp, poll_table);
6225 }
6226
6227 /* Must be called with iter->mutex held. */
6228 static int tracing_wait_pipe(struct file *filp)
6229 {
6230         struct trace_iterator *iter = filp->private_data;
6231         int ret;
6232
6233         while (trace_empty(iter)) {
6234
6235                 if ((filp->f_flags & O_NONBLOCK)) {
6236                         return -EAGAIN;
6237                 }
6238
6239                 /*
6240                  * We block until we read something and tracing is disabled.
6241                  * We still block if tracing is disabled, but we have never
6242                  * read anything. This allows a user to cat this file, and
6243                  * then enable tracing. But after we have read something,
6244                  * we give an EOF when tracing is again disabled.
6245                  *
6246                  * iter->pos will be 0 if we haven't read anything.
6247                  */
6248                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6249                         break;
6250
6251                 mutex_unlock(&iter->mutex);
6252
6253                 ret = wait_on_pipe(iter, 0);
6254
6255                 mutex_lock(&iter->mutex);
6256
6257                 if (ret)
6258                         return ret;
6259         }
6260
6261         return 1;
6262 }
6263
6264 /*
6265  * Consumer reader.
6266  */
6267 static ssize_t
6268 tracing_read_pipe(struct file *filp, char __user *ubuf,
6269                   size_t cnt, loff_t *ppos)
6270 {
6271         struct trace_iterator *iter = filp->private_data;
6272         ssize_t sret;
6273
6274         /*
6275          * Avoid more than one consumer on a single file descriptor
6276          * This is just a matter of traces coherency, the ring buffer itself
6277          * is protected.
6278          */
6279         mutex_lock(&iter->mutex);
6280
6281         /* return any leftover data */
6282         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6283         if (sret != -EBUSY)
6284                 goto out;
6285
6286         trace_seq_init(&iter->seq);
6287
6288         if (iter->trace->read) {
6289                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6290                 if (sret)
6291                         goto out;
6292         }
6293
6294 waitagain:
6295         sret = tracing_wait_pipe(filp);
6296         if (sret <= 0)
6297                 goto out;
6298
6299         /* stop when tracing is finished */
6300         if (trace_empty(iter)) {
6301                 sret = 0;
6302                 goto out;
6303         }
6304
6305         if (cnt >= PAGE_SIZE)
6306                 cnt = PAGE_SIZE - 1;
6307
6308         /* reset all but tr, trace, and overruns */
6309         memset(&iter->seq, 0,
6310                sizeof(struct trace_iterator) -
6311                offsetof(struct trace_iterator, seq));
6312         cpumask_clear(iter->started);
6313         trace_seq_init(&iter->seq);
6314         iter->pos = -1;
6315
6316         trace_event_read_lock();
6317         trace_access_lock(iter->cpu_file);
6318         while (trace_find_next_entry_inc(iter) != NULL) {
6319                 enum print_line_t ret;
6320                 int save_len = iter->seq.seq.len;
6321
6322                 ret = print_trace_line(iter);
6323                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6324                         /* don't print partial lines */
6325                         iter->seq.seq.len = save_len;
6326                         break;
6327                 }
6328                 if (ret != TRACE_TYPE_NO_CONSUME)
6329                         trace_consume(iter);
6330
6331                 if (trace_seq_used(&iter->seq) >= cnt)
6332                         break;
6333
6334                 /*
6335                  * Setting the full flag means we reached the trace_seq buffer
6336                  * size and we should leave by partial output condition above.
6337                  * One of the trace_seq_* functions is not used properly.
6338                  */
6339                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6340                           iter->ent->type);
6341         }
6342         trace_access_unlock(iter->cpu_file);
6343         trace_event_read_unlock();
6344
6345         /* Now copy what we have to the user */
6346         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6347         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6348                 trace_seq_init(&iter->seq);
6349
6350         /*
6351          * If there was nothing to send to user, in spite of consuming trace
6352          * entries, go back to wait for more entries.
6353          */
6354         if (sret == -EBUSY)
6355                 goto waitagain;
6356
6357 out:
6358         mutex_unlock(&iter->mutex);
6359
6360         return sret;
6361 }
6362
6363 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6364                                      unsigned int idx)
6365 {
6366         __free_page(spd->pages[idx]);
6367 }
6368
6369 static size_t
6370 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6371 {
6372         size_t count;
6373         int save_len;
6374         int ret;
6375
6376         /* Seq buffer is page-sized, exactly what we need. */
6377         for (;;) {
6378                 save_len = iter->seq.seq.len;
6379                 ret = print_trace_line(iter);
6380
6381                 if (trace_seq_has_overflowed(&iter->seq)) {
6382                         iter->seq.seq.len = save_len;
6383                         break;
6384                 }
6385
6386                 /*
6387                  * This should not be hit, because it should only
6388                  * be set if the iter->seq overflowed. But check it
6389                  * anyway to be safe.
6390                  */
6391                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6392                         iter->seq.seq.len = save_len;
6393                         break;
6394                 }
6395
6396                 count = trace_seq_used(&iter->seq) - save_len;
6397                 if (rem < count) {
6398                         rem = 0;
6399                         iter->seq.seq.len = save_len;
6400                         break;
6401                 }
6402
6403                 if (ret != TRACE_TYPE_NO_CONSUME)
6404                         trace_consume(iter);
6405                 rem -= count;
6406                 if (!trace_find_next_entry_inc(iter))   {
6407                         rem = 0;
6408                         iter->ent = NULL;
6409                         break;
6410                 }
6411         }
6412
6413         return rem;
6414 }
6415
6416 static ssize_t tracing_splice_read_pipe(struct file *filp,
6417                                         loff_t *ppos,
6418                                         struct pipe_inode_info *pipe,
6419                                         size_t len,
6420                                         unsigned int flags)
6421 {
6422         struct page *pages_def[PIPE_DEF_BUFFERS];
6423         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6424         struct trace_iterator *iter = filp->private_data;
6425         struct splice_pipe_desc spd = {
6426                 .pages          = pages_def,
6427                 .partial        = partial_def,
6428                 .nr_pages       = 0, /* This gets updated below. */
6429                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6430                 .ops            = &default_pipe_buf_ops,
6431                 .spd_release    = tracing_spd_release_pipe,
6432         };
6433         ssize_t ret;
6434         size_t rem;
6435         unsigned int i;
6436
6437         if (splice_grow_spd(pipe, &spd))
6438                 return -ENOMEM;
6439
6440         mutex_lock(&iter->mutex);
6441
6442         if (iter->trace->splice_read) {
6443                 ret = iter->trace->splice_read(iter, filp,
6444                                                ppos, pipe, len, flags);
6445                 if (ret)
6446                         goto out_err;
6447         }
6448
6449         ret = tracing_wait_pipe(filp);
6450         if (ret <= 0)
6451                 goto out_err;
6452
6453         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6454                 ret = -EFAULT;
6455                 goto out_err;
6456         }
6457
6458         trace_event_read_lock();
6459         trace_access_lock(iter->cpu_file);
6460
6461         /* Fill as many pages as possible. */
6462         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6463                 spd.pages[i] = alloc_page(GFP_KERNEL);
6464                 if (!spd.pages[i])
6465                         break;
6466
6467                 rem = tracing_fill_pipe_page(rem, iter);
6468
6469                 /* Copy the data into the page, so we can start over. */
6470                 ret = trace_seq_to_buffer(&iter->seq,
6471                                           page_address(spd.pages[i]),
6472                                           trace_seq_used(&iter->seq));
6473                 if (ret < 0) {
6474                         __free_page(spd.pages[i]);
6475                         break;
6476                 }
6477                 spd.partial[i].offset = 0;
6478                 spd.partial[i].len = trace_seq_used(&iter->seq);
6479
6480                 trace_seq_init(&iter->seq);
6481         }
6482
6483         trace_access_unlock(iter->cpu_file);
6484         trace_event_read_unlock();
6485         mutex_unlock(&iter->mutex);
6486
6487         spd.nr_pages = i;
6488
6489         if (i)
6490                 ret = splice_to_pipe(pipe, &spd);
6491         else
6492                 ret = 0;
6493 out:
6494         splice_shrink_spd(&spd);
6495         return ret;
6496
6497 out_err:
6498         mutex_unlock(&iter->mutex);
6499         goto out;
6500 }
6501
6502 static ssize_t
6503 tracing_entries_read(struct file *filp, char __user *ubuf,
6504                      size_t cnt, loff_t *ppos)
6505 {
6506         struct inode *inode = file_inode(filp);
6507         struct trace_array *tr = inode->i_private;
6508         int cpu = tracing_get_cpu(inode);
6509         char buf[64];
6510         int r = 0;
6511         ssize_t ret;
6512
6513         mutex_lock(&trace_types_lock);
6514
6515         if (cpu == RING_BUFFER_ALL_CPUS) {
6516                 int cpu, buf_size_same;
6517                 unsigned long size;
6518
6519                 size = 0;
6520                 buf_size_same = 1;
6521                 /* check if all cpu sizes are same */
6522                 for_each_tracing_cpu(cpu) {
6523                         /* fill in the size from first enabled cpu */
6524                         if (size == 0)
6525                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6526                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6527                                 buf_size_same = 0;
6528                                 break;
6529                         }
6530                 }
6531
6532                 if (buf_size_same) {
6533                         if (!ring_buffer_expanded)
6534                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6535                                             size >> 10,
6536                                             trace_buf_size >> 10);
6537                         else
6538                                 r = sprintf(buf, "%lu\n", size >> 10);
6539                 } else
6540                         r = sprintf(buf, "X\n");
6541         } else
6542                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6543
6544         mutex_unlock(&trace_types_lock);
6545
6546         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6547         return ret;
6548 }
6549
6550 static ssize_t
6551 tracing_entries_write(struct file *filp, const char __user *ubuf,
6552                       size_t cnt, loff_t *ppos)
6553 {
6554         struct inode *inode = file_inode(filp);
6555         struct trace_array *tr = inode->i_private;
6556         unsigned long val;
6557         int ret;
6558
6559         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6560         if (ret)
6561                 return ret;
6562
6563         /* must have at least 1 entry */
6564         if (!val)
6565                 return -EINVAL;
6566
6567         /* value is in KB */
6568         val <<= 10;
6569         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6570         if (ret < 0)
6571                 return ret;
6572
6573         *ppos += cnt;
6574
6575         return cnt;
6576 }
6577
6578 static ssize_t
6579 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6580                                 size_t cnt, loff_t *ppos)
6581 {
6582         struct trace_array *tr = filp->private_data;
6583         char buf[64];
6584         int r, cpu;
6585         unsigned long size = 0, expanded_size = 0;
6586
6587         mutex_lock(&trace_types_lock);
6588         for_each_tracing_cpu(cpu) {
6589                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6590                 if (!ring_buffer_expanded)
6591                         expanded_size += trace_buf_size >> 10;
6592         }
6593         if (ring_buffer_expanded)
6594                 r = sprintf(buf, "%lu\n", size);
6595         else
6596                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6597         mutex_unlock(&trace_types_lock);
6598
6599         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6600 }
6601
6602 static ssize_t
6603 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6604                           size_t cnt, loff_t *ppos)
6605 {
6606         /*
6607          * There is no need to read what the user has written, this function
6608          * is just to make sure that there is no error when "echo" is used
6609          */
6610
6611         *ppos += cnt;
6612
6613         return cnt;
6614 }
6615
6616 static int
6617 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6618 {
6619         struct trace_array *tr = inode->i_private;
6620
6621         /* disable tracing ? */
6622         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6623                 tracer_tracing_off(tr);
6624         /* resize the ring buffer to 0 */
6625         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6626
6627         trace_array_put(tr);
6628
6629         return 0;
6630 }
6631
6632 static ssize_t
6633 tracing_mark_write(struct file *filp, const char __user *ubuf,
6634                                         size_t cnt, loff_t *fpos)
6635 {
6636         struct trace_array *tr = filp->private_data;
6637         struct ring_buffer_event *event;
6638         enum event_trigger_type tt = ETT_NONE;
6639         struct trace_buffer *buffer;
6640         struct print_entry *entry;
6641         unsigned long irq_flags;
6642         ssize_t written;
6643         int size;
6644         int len;
6645
6646 /* Used in tracing_mark_raw_write() as well */
6647 #define FAULTED_STR "<faulted>"
6648 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6649
6650         if (tracing_disabled)
6651                 return -EINVAL;
6652
6653         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6654                 return -EINVAL;
6655
6656         if (cnt > TRACE_BUF_SIZE)
6657                 cnt = TRACE_BUF_SIZE;
6658
6659         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6660
6661         local_save_flags(irq_flags);
6662         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6663
6664         /* If less than "<faulted>", then make sure we can still add that */
6665         if (cnt < FAULTED_SIZE)
6666                 size += FAULTED_SIZE - cnt;
6667
6668         buffer = tr->array_buffer.buffer;
6669         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6670                                             irq_flags, preempt_count());
6671         if (unlikely(!event))
6672                 /* Ring buffer disabled, return as if not open for write */
6673                 return -EBADF;
6674
6675         entry = ring_buffer_event_data(event);
6676         entry->ip = _THIS_IP_;
6677
6678         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6679         if (len) {
6680                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6681                 cnt = FAULTED_SIZE;
6682                 written = -EFAULT;
6683         } else
6684                 written = cnt;
6685         len = cnt;
6686
6687         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6688                 /* do not add \n before testing triggers, but add \0 */
6689                 entry->buf[cnt] = '\0';
6690                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6691         }
6692
6693         if (entry->buf[cnt - 1] != '\n') {
6694                 entry->buf[cnt] = '\n';
6695                 entry->buf[cnt + 1] = '\0';
6696         } else
6697                 entry->buf[cnt] = '\0';
6698
6699         if (static_branch_unlikely(&trace_marker_exports_enabled))
6700                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6701         __buffer_unlock_commit(buffer, event);
6702
6703         if (tt)
6704                 event_triggers_post_call(tr->trace_marker_file, tt);
6705
6706         if (written > 0)
6707                 *fpos += written;
6708
6709         return written;
6710 }
6711
6712 /* Limit it for now to 3K (including tag) */
6713 #define RAW_DATA_MAX_SIZE (1024*3)
6714
6715 static ssize_t
6716 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6717                                         size_t cnt, loff_t *fpos)
6718 {
6719         struct trace_array *tr = filp->private_data;
6720         struct ring_buffer_event *event;
6721         struct trace_buffer *buffer;
6722         struct raw_data_entry *entry;
6723         unsigned long irq_flags;
6724         ssize_t written;
6725         int size;
6726         int len;
6727
6728 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6729
6730         if (tracing_disabled)
6731                 return -EINVAL;
6732
6733         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6734                 return -EINVAL;
6735
6736         /* The marker must at least have a tag id */
6737         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6738                 return -EINVAL;
6739
6740         if (cnt > TRACE_BUF_SIZE)
6741                 cnt = TRACE_BUF_SIZE;
6742
6743         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6744
6745         local_save_flags(irq_flags);
6746         size = sizeof(*entry) + cnt;
6747         if (cnt < FAULT_SIZE_ID)
6748                 size += FAULT_SIZE_ID - cnt;
6749
6750         buffer = tr->array_buffer.buffer;
6751         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6752                                             irq_flags, preempt_count());
6753         if (!event)
6754                 /* Ring buffer disabled, return as if not open for write */
6755                 return -EBADF;
6756
6757         entry = ring_buffer_event_data(event);
6758
6759         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6760         if (len) {
6761                 entry->id = -1;
6762                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6763                 written = -EFAULT;
6764         } else
6765                 written = cnt;
6766
6767         __buffer_unlock_commit(buffer, event);
6768
6769         if (written > 0)
6770                 *fpos += written;
6771
6772         return written;
6773 }
6774
6775 static int tracing_clock_show(struct seq_file *m, void *v)
6776 {
6777         struct trace_array *tr = m->private;
6778         int i;
6779
6780         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6781                 seq_printf(m,
6782                         "%s%s%s%s", i ? " " : "",
6783                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6784                         i == tr->clock_id ? "]" : "");
6785         seq_putc(m, '\n');
6786
6787         return 0;
6788 }
6789
6790 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6791 {
6792         int i;
6793
6794         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6795                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6796                         break;
6797         }
6798         if (i == ARRAY_SIZE(trace_clocks))
6799                 return -EINVAL;
6800
6801         mutex_lock(&trace_types_lock);
6802
6803         tr->clock_id = i;
6804
6805         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6806
6807         /*
6808          * New clock may not be consistent with the previous clock.
6809          * Reset the buffer so that it doesn't have incomparable timestamps.
6810          */
6811         tracing_reset_online_cpus(&tr->array_buffer);
6812
6813 #ifdef CONFIG_TRACER_MAX_TRACE
6814         if (tr->max_buffer.buffer)
6815                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6816         tracing_reset_online_cpus(&tr->max_buffer);
6817 #endif
6818
6819         mutex_unlock(&trace_types_lock);
6820
6821         return 0;
6822 }
6823
6824 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6825                                    size_t cnt, loff_t *fpos)
6826 {
6827         struct seq_file *m = filp->private_data;
6828         struct trace_array *tr = m->private;
6829         char buf[64];
6830         const char *clockstr;
6831         int ret;
6832
6833         if (cnt >= sizeof(buf))
6834                 return -EINVAL;
6835
6836         if (copy_from_user(buf, ubuf, cnt))
6837                 return -EFAULT;
6838
6839         buf[cnt] = 0;
6840
6841         clockstr = strstrip(buf);
6842
6843         ret = tracing_set_clock(tr, clockstr);
6844         if (ret)
6845                 return ret;
6846
6847         *fpos += cnt;
6848
6849         return cnt;
6850 }
6851
6852 static int tracing_clock_open(struct inode *inode, struct file *file)
6853 {
6854         struct trace_array *tr = inode->i_private;
6855         int ret;
6856
6857         ret = tracing_check_open_get_tr(tr);
6858         if (ret)
6859                 return ret;
6860
6861         ret = single_open(file, tracing_clock_show, inode->i_private);
6862         if (ret < 0)
6863                 trace_array_put(tr);
6864
6865         return ret;
6866 }
6867
6868 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6869 {
6870         struct trace_array *tr = m->private;
6871
6872         mutex_lock(&trace_types_lock);
6873
6874         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6875                 seq_puts(m, "delta [absolute]\n");
6876         else
6877                 seq_puts(m, "[delta] absolute\n");
6878
6879         mutex_unlock(&trace_types_lock);
6880
6881         return 0;
6882 }
6883
6884 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6885 {
6886         struct trace_array *tr = inode->i_private;
6887         int ret;
6888
6889         ret = tracing_check_open_get_tr(tr);
6890         if (ret)
6891                 return ret;
6892
6893         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6894         if (ret < 0)
6895                 trace_array_put(tr);
6896
6897         return ret;
6898 }
6899
6900 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6901 {
6902         int ret = 0;
6903
6904         mutex_lock(&trace_types_lock);
6905
6906         if (abs && tr->time_stamp_abs_ref++)
6907                 goto out;
6908
6909         if (!abs) {
6910                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6911                         ret = -EINVAL;
6912                         goto out;
6913                 }
6914
6915                 if (--tr->time_stamp_abs_ref)
6916                         goto out;
6917         }
6918
6919         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6920
6921 #ifdef CONFIG_TRACER_MAX_TRACE
6922         if (tr->max_buffer.buffer)
6923                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6924 #endif
6925  out:
6926         mutex_unlock(&trace_types_lock);
6927
6928         return ret;
6929 }
6930
6931 struct ftrace_buffer_info {
6932         struct trace_iterator   iter;
6933         void                    *spare;
6934         unsigned int            spare_cpu;
6935         unsigned int            read;
6936 };
6937
6938 #ifdef CONFIG_TRACER_SNAPSHOT
6939 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6940 {
6941         struct trace_array *tr = inode->i_private;
6942         struct trace_iterator *iter;
6943         struct seq_file *m;
6944         int ret;
6945
6946         ret = tracing_check_open_get_tr(tr);
6947         if (ret)
6948                 return ret;
6949
6950         if (file->f_mode & FMODE_READ) {
6951                 iter = __tracing_open(inode, file, true);
6952                 if (IS_ERR(iter))
6953                         ret = PTR_ERR(iter);
6954         } else {
6955                 /* Writes still need the seq_file to hold the private data */
6956                 ret = -ENOMEM;
6957                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6958                 if (!m)
6959                         goto out;
6960                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6961                 if (!iter) {
6962                         kfree(m);
6963                         goto out;
6964                 }
6965                 ret = 0;
6966
6967                 iter->tr = tr;
6968                 iter->array_buffer = &tr->max_buffer;
6969                 iter->cpu_file = tracing_get_cpu(inode);
6970                 m->private = iter;
6971                 file->private_data = m;
6972         }
6973 out:
6974         if (ret < 0)
6975                 trace_array_put(tr);
6976
6977         return ret;
6978 }
6979
6980 static ssize_t
6981 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6982                        loff_t *ppos)
6983 {
6984         struct seq_file *m = filp->private_data;
6985         struct trace_iterator *iter = m->private;
6986         struct trace_array *tr = iter->tr;
6987         unsigned long val;
6988         int ret;
6989
6990         ret = tracing_update_buffers();
6991         if (ret < 0)
6992                 return ret;
6993
6994         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6995         if (ret)
6996                 return ret;
6997
6998         mutex_lock(&trace_types_lock);
6999
7000         if (tr->current_trace->use_max_tr) {
7001                 ret = -EBUSY;
7002                 goto out;
7003         }
7004
7005         arch_spin_lock(&tr->max_lock);
7006         if (tr->cond_snapshot)
7007                 ret = -EBUSY;
7008         arch_spin_unlock(&tr->max_lock);
7009         if (ret)
7010                 goto out;
7011
7012         switch (val) {
7013         case 0:
7014                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7015                         ret = -EINVAL;
7016                         break;
7017                 }
7018                 if (tr->allocated_snapshot)
7019                         free_snapshot(tr);
7020                 break;
7021         case 1:
7022 /* Only allow per-cpu swap if the ring buffer supports it */
7023 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7024                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7025                         ret = -EINVAL;
7026                         break;
7027                 }
7028 #endif
7029                 if (tr->allocated_snapshot)
7030                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7031                                         &tr->array_buffer, iter->cpu_file);
7032                 else
7033                         ret = tracing_alloc_snapshot_instance(tr);
7034                 if (ret < 0)
7035                         break;
7036                 local_irq_disable();
7037                 /* Now, we're going to swap */
7038                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7039                         update_max_tr(tr, current, smp_processor_id(), NULL);
7040                 else
7041                         update_max_tr_single(tr, current, iter->cpu_file);
7042                 local_irq_enable();
7043                 break;
7044         default:
7045                 if (tr->allocated_snapshot) {
7046                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7047                                 tracing_reset_online_cpus(&tr->max_buffer);
7048                         else
7049                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7050                 }
7051                 break;
7052         }
7053
7054         if (ret >= 0) {
7055                 *ppos += cnt;
7056                 ret = cnt;
7057         }
7058 out:
7059         mutex_unlock(&trace_types_lock);
7060         return ret;
7061 }
7062
7063 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7064 {
7065         struct seq_file *m = file->private_data;
7066         int ret;
7067
7068         ret = tracing_release(inode, file);
7069
7070         if (file->f_mode & FMODE_READ)
7071                 return ret;
7072
7073         /* If write only, the seq_file is just a stub */
7074         if (m)
7075                 kfree(m->private);
7076         kfree(m);
7077
7078         return 0;
7079 }
7080
7081 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7082 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7083                                     size_t count, loff_t *ppos);
7084 static int tracing_buffers_release(struct inode *inode, struct file *file);
7085 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7086                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7087
7088 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7089 {
7090         struct ftrace_buffer_info *info;
7091         int ret;
7092
7093         /* The following checks for tracefs lockdown */
7094         ret = tracing_buffers_open(inode, filp);
7095         if (ret < 0)
7096                 return ret;
7097
7098         info = filp->private_data;
7099
7100         if (info->iter.trace->use_max_tr) {
7101                 tracing_buffers_release(inode, filp);
7102                 return -EBUSY;
7103         }
7104
7105         info->iter.snapshot = true;
7106         info->iter.array_buffer = &info->iter.tr->max_buffer;
7107
7108         return ret;
7109 }
7110
7111 #endif /* CONFIG_TRACER_SNAPSHOT */
7112
7113
7114 static const struct file_operations tracing_thresh_fops = {
7115         .open           = tracing_open_generic,
7116         .read           = tracing_thresh_read,
7117         .write          = tracing_thresh_write,
7118         .llseek         = generic_file_llseek,
7119 };
7120
7121 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7122 static const struct file_operations tracing_max_lat_fops = {
7123         .open           = tracing_open_generic,
7124         .read           = tracing_max_lat_read,
7125         .write          = tracing_max_lat_write,
7126         .llseek         = generic_file_llseek,
7127 };
7128 #endif
7129
7130 static const struct file_operations set_tracer_fops = {
7131         .open           = tracing_open_generic,
7132         .read           = tracing_set_trace_read,
7133         .write          = tracing_set_trace_write,
7134         .llseek         = generic_file_llseek,
7135 };
7136
7137 static const struct file_operations tracing_pipe_fops = {
7138         .open           = tracing_open_pipe,
7139         .poll           = tracing_poll_pipe,
7140         .read           = tracing_read_pipe,
7141         .splice_read    = tracing_splice_read_pipe,
7142         .release        = tracing_release_pipe,
7143         .llseek         = no_llseek,
7144 };
7145
7146 static const struct file_operations tracing_entries_fops = {
7147         .open           = tracing_open_generic_tr,
7148         .read           = tracing_entries_read,
7149         .write          = tracing_entries_write,
7150         .llseek         = generic_file_llseek,
7151         .release        = tracing_release_generic_tr,
7152 };
7153
7154 static const struct file_operations tracing_total_entries_fops = {
7155         .open           = tracing_open_generic_tr,
7156         .read           = tracing_total_entries_read,
7157         .llseek         = generic_file_llseek,
7158         .release        = tracing_release_generic_tr,
7159 };
7160
7161 static const struct file_operations tracing_free_buffer_fops = {
7162         .open           = tracing_open_generic_tr,
7163         .write          = tracing_free_buffer_write,
7164         .release        = tracing_free_buffer_release,
7165 };
7166
7167 static const struct file_operations tracing_mark_fops = {
7168         .open           = tracing_open_generic_tr,
7169         .write          = tracing_mark_write,
7170         .llseek         = generic_file_llseek,
7171         .release        = tracing_release_generic_tr,
7172 };
7173
7174 static const struct file_operations tracing_mark_raw_fops = {
7175         .open           = tracing_open_generic_tr,
7176         .write          = tracing_mark_raw_write,
7177         .llseek         = generic_file_llseek,
7178         .release        = tracing_release_generic_tr,
7179 };
7180
7181 static const struct file_operations trace_clock_fops = {
7182         .open           = tracing_clock_open,
7183         .read           = seq_read,
7184         .llseek         = seq_lseek,
7185         .release        = tracing_single_release_tr,
7186         .write          = tracing_clock_write,
7187 };
7188
7189 static const struct file_operations trace_time_stamp_mode_fops = {
7190         .open           = tracing_time_stamp_mode_open,
7191         .read           = seq_read,
7192         .llseek         = seq_lseek,
7193         .release        = tracing_single_release_tr,
7194 };
7195
7196 #ifdef CONFIG_TRACER_SNAPSHOT
7197 static const struct file_operations snapshot_fops = {
7198         .open           = tracing_snapshot_open,
7199         .read           = seq_read,
7200         .write          = tracing_snapshot_write,
7201         .llseek         = tracing_lseek,
7202         .release        = tracing_snapshot_release,
7203 };
7204
7205 static const struct file_operations snapshot_raw_fops = {
7206         .open           = snapshot_raw_open,
7207         .read           = tracing_buffers_read,
7208         .release        = tracing_buffers_release,
7209         .splice_read    = tracing_buffers_splice_read,
7210         .llseek         = no_llseek,
7211 };
7212
7213 #endif /* CONFIG_TRACER_SNAPSHOT */
7214
7215 #define TRACING_LOG_ERRS_MAX    8
7216 #define TRACING_LOG_LOC_MAX     128
7217
7218 #define CMD_PREFIX "  Command: "
7219
7220 struct err_info {
7221         const char      **errs; /* ptr to loc-specific array of err strings */
7222         u8              type;   /* index into errs -> specific err string */
7223         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7224         u64             ts;
7225 };
7226
7227 struct tracing_log_err {
7228         struct list_head        list;
7229         struct err_info         info;
7230         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7231         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7232 };
7233
7234 static DEFINE_MUTEX(tracing_err_log_lock);
7235
7236 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7237 {
7238         struct tracing_log_err *err;
7239
7240         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7241                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7242                 if (!err)
7243                         err = ERR_PTR(-ENOMEM);
7244                 tr->n_err_log_entries++;
7245
7246                 return err;
7247         }
7248
7249         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7250         list_del(&err->list);
7251
7252         return err;
7253 }
7254
7255 /**
7256  * err_pos - find the position of a string within a command for error careting
7257  * @cmd: The tracing command that caused the error
7258  * @str: The string to position the caret at within @cmd
7259  *
7260  * Finds the position of the first occurence of @str within @cmd.  The
7261  * return value can be passed to tracing_log_err() for caret placement
7262  * within @cmd.
7263  *
7264  * Returns the index within @cmd of the first occurence of @str or 0
7265  * if @str was not found.
7266  */
7267 unsigned int err_pos(char *cmd, const char *str)
7268 {
7269         char *found;
7270
7271         if (WARN_ON(!strlen(cmd)))
7272                 return 0;
7273
7274         found = strstr(cmd, str);
7275         if (found)
7276                 return found - cmd;
7277
7278         return 0;
7279 }
7280
7281 /**
7282  * tracing_log_err - write an error to the tracing error log
7283  * @tr: The associated trace array for the error (NULL for top level array)
7284  * @loc: A string describing where the error occurred
7285  * @cmd: The tracing command that caused the error
7286  * @errs: The array of loc-specific static error strings
7287  * @type: The index into errs[], which produces the specific static err string
7288  * @pos: The position the caret should be placed in the cmd
7289  *
7290  * Writes an error into tracing/error_log of the form:
7291  *
7292  * <loc>: error: <text>
7293  *   Command: <cmd>
7294  *              ^
7295  *
7296  * tracing/error_log is a small log file containing the last
7297  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7298  * unless there has been a tracing error, and the error log can be
7299  * cleared and have its memory freed by writing the empty string in
7300  * truncation mode to it i.e. echo > tracing/error_log.
7301  *
7302  * NOTE: the @errs array along with the @type param are used to
7303  * produce a static error string - this string is not copied and saved
7304  * when the error is logged - only a pointer to it is saved.  See
7305  * existing callers for examples of how static strings are typically
7306  * defined for use with tracing_log_err().
7307  */
7308 void tracing_log_err(struct trace_array *tr,
7309                      const char *loc, const char *cmd,
7310                      const char **errs, u8 type, u8 pos)
7311 {
7312         struct tracing_log_err *err;
7313
7314         if (!tr)
7315                 tr = &global_trace;
7316
7317         mutex_lock(&tracing_err_log_lock);
7318         err = get_tracing_log_err(tr);
7319         if (PTR_ERR(err) == -ENOMEM) {
7320                 mutex_unlock(&tracing_err_log_lock);
7321                 return;
7322         }
7323
7324         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7325         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7326
7327         err->info.errs = errs;
7328         err->info.type = type;
7329         err->info.pos = pos;
7330         err->info.ts = local_clock();
7331
7332         list_add_tail(&err->list, &tr->err_log);
7333         mutex_unlock(&tracing_err_log_lock);
7334 }
7335
7336 static void clear_tracing_err_log(struct trace_array *tr)
7337 {
7338         struct tracing_log_err *err, *next;
7339
7340         mutex_lock(&tracing_err_log_lock);
7341         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7342                 list_del(&err->list);
7343                 kfree(err);
7344         }
7345
7346         tr->n_err_log_entries = 0;
7347         mutex_unlock(&tracing_err_log_lock);
7348 }
7349
7350 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7351 {
7352         struct trace_array *tr = m->private;
7353
7354         mutex_lock(&tracing_err_log_lock);
7355
7356         return seq_list_start(&tr->err_log, *pos);
7357 }
7358
7359 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7360 {
7361         struct trace_array *tr = m->private;
7362
7363         return seq_list_next(v, &tr->err_log, pos);
7364 }
7365
7366 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7367 {
7368         mutex_unlock(&tracing_err_log_lock);
7369 }
7370
7371 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7372 {
7373         u8 i;
7374
7375         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7376                 seq_putc(m, ' ');
7377         for (i = 0; i < pos; i++)
7378                 seq_putc(m, ' ');
7379         seq_puts(m, "^\n");
7380 }
7381
7382 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7383 {
7384         struct tracing_log_err *err = v;
7385
7386         if (err) {
7387                 const char *err_text = err->info.errs[err->info.type];
7388                 u64 sec = err->info.ts;
7389                 u32 nsec;
7390
7391                 nsec = do_div(sec, NSEC_PER_SEC);
7392                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7393                            err->loc, err_text);
7394                 seq_printf(m, "%s", err->cmd);
7395                 tracing_err_log_show_pos(m, err->info.pos);
7396         }
7397
7398         return 0;
7399 }
7400
7401 static const struct seq_operations tracing_err_log_seq_ops = {
7402         .start  = tracing_err_log_seq_start,
7403         .next   = tracing_err_log_seq_next,
7404         .stop   = tracing_err_log_seq_stop,
7405         .show   = tracing_err_log_seq_show
7406 };
7407
7408 static int tracing_err_log_open(struct inode *inode, struct file *file)
7409 {
7410         struct trace_array *tr = inode->i_private;
7411         int ret = 0;
7412
7413         ret = tracing_check_open_get_tr(tr);
7414         if (ret)
7415                 return ret;
7416
7417         /* If this file was opened for write, then erase contents */
7418         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7419                 clear_tracing_err_log(tr);
7420
7421         if (file->f_mode & FMODE_READ) {
7422                 ret = seq_open(file, &tracing_err_log_seq_ops);
7423                 if (!ret) {
7424                         struct seq_file *m = file->private_data;
7425                         m->private = tr;
7426                 } else {
7427                         trace_array_put(tr);
7428                 }
7429         }
7430         return ret;
7431 }
7432
7433 static ssize_t tracing_err_log_write(struct file *file,
7434                                      const char __user *buffer,
7435                                      size_t count, loff_t *ppos)
7436 {
7437         return count;
7438 }
7439
7440 static int tracing_err_log_release(struct inode *inode, struct file *file)
7441 {
7442         struct trace_array *tr = inode->i_private;
7443
7444         trace_array_put(tr);
7445
7446         if (file->f_mode & FMODE_READ)
7447                 seq_release(inode, file);
7448
7449         return 0;
7450 }
7451
7452 static const struct file_operations tracing_err_log_fops = {
7453         .open           = tracing_err_log_open,
7454         .write          = tracing_err_log_write,
7455         .read           = seq_read,
7456         .llseek         = seq_lseek,
7457         .release        = tracing_err_log_release,
7458 };
7459
7460 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7461 {
7462         struct trace_array *tr = inode->i_private;
7463         struct ftrace_buffer_info *info;
7464         int ret;
7465
7466         ret = tracing_check_open_get_tr(tr);
7467         if (ret)
7468                 return ret;
7469
7470         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7471         if (!info) {
7472                 trace_array_put(tr);
7473                 return -ENOMEM;
7474         }
7475
7476         mutex_lock(&trace_types_lock);
7477
7478         info->iter.tr           = tr;
7479         info->iter.cpu_file     = tracing_get_cpu(inode);
7480         info->iter.trace        = tr->current_trace;
7481         info->iter.array_buffer = &tr->array_buffer;
7482         info->spare             = NULL;
7483         /* Force reading ring buffer for first read */
7484         info->read              = (unsigned int)-1;
7485
7486         filp->private_data = info;
7487
7488         tr->trace_ref++;
7489
7490         mutex_unlock(&trace_types_lock);
7491
7492         ret = nonseekable_open(inode, filp);
7493         if (ret < 0)
7494                 trace_array_put(tr);
7495
7496         return ret;
7497 }
7498
7499 static __poll_t
7500 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7501 {
7502         struct ftrace_buffer_info *info = filp->private_data;
7503         struct trace_iterator *iter = &info->iter;
7504
7505         return trace_poll(iter, filp, poll_table);
7506 }
7507
7508 static ssize_t
7509 tracing_buffers_read(struct file *filp, char __user *ubuf,
7510                      size_t count, loff_t *ppos)
7511 {
7512         struct ftrace_buffer_info *info = filp->private_data;
7513         struct trace_iterator *iter = &info->iter;
7514         ssize_t ret = 0;
7515         ssize_t size;
7516
7517         if (!count)
7518                 return 0;
7519
7520 #ifdef CONFIG_TRACER_MAX_TRACE
7521         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7522                 return -EBUSY;
7523 #endif
7524
7525         if (!info->spare) {
7526                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7527                                                           iter->cpu_file);
7528                 if (IS_ERR(info->spare)) {
7529                         ret = PTR_ERR(info->spare);
7530                         info->spare = NULL;
7531                 } else {
7532                         info->spare_cpu = iter->cpu_file;
7533                 }
7534         }
7535         if (!info->spare)
7536                 return ret;
7537
7538         /* Do we have previous read data to read? */
7539         if (info->read < PAGE_SIZE)
7540                 goto read;
7541
7542  again:
7543         trace_access_lock(iter->cpu_file);
7544         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7545                                     &info->spare,
7546                                     count,
7547                                     iter->cpu_file, 0);
7548         trace_access_unlock(iter->cpu_file);
7549
7550         if (ret < 0) {
7551                 if (trace_empty(iter)) {
7552                         if ((filp->f_flags & O_NONBLOCK))
7553                                 return -EAGAIN;
7554
7555                         ret = wait_on_pipe(iter, 0);
7556                         if (ret)
7557                                 return ret;
7558
7559                         goto again;
7560                 }
7561                 return 0;
7562         }
7563
7564         info->read = 0;
7565  read:
7566         size = PAGE_SIZE - info->read;
7567         if (size > count)
7568                 size = count;
7569
7570         ret = copy_to_user(ubuf, info->spare + info->read, size);
7571         if (ret == size)
7572                 return -EFAULT;
7573
7574         size -= ret;
7575
7576         *ppos += size;
7577         info->read += size;
7578
7579         return size;
7580 }
7581
7582 static int tracing_buffers_release(struct inode *inode, struct file *file)
7583 {
7584         struct ftrace_buffer_info *info = file->private_data;
7585         struct trace_iterator *iter = &info->iter;
7586
7587         mutex_lock(&trace_types_lock);
7588
7589         iter->tr->trace_ref--;
7590
7591         __trace_array_put(iter->tr);
7592
7593         if (info->spare)
7594                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7595                                            info->spare_cpu, info->spare);
7596         kvfree(info);
7597
7598         mutex_unlock(&trace_types_lock);
7599
7600         return 0;
7601 }
7602
7603 struct buffer_ref {
7604         struct trace_buffer     *buffer;
7605         void                    *page;
7606         int                     cpu;
7607         refcount_t              refcount;
7608 };
7609
7610 static void buffer_ref_release(struct buffer_ref *ref)
7611 {
7612         if (!refcount_dec_and_test(&ref->refcount))
7613                 return;
7614         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7615         kfree(ref);
7616 }
7617
7618 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7619                                     struct pipe_buffer *buf)
7620 {
7621         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7622
7623         buffer_ref_release(ref);
7624         buf->private = 0;
7625 }
7626
7627 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7628                                 struct pipe_buffer *buf)
7629 {
7630         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7631
7632         if (refcount_read(&ref->refcount) > INT_MAX/2)
7633                 return false;
7634
7635         refcount_inc(&ref->refcount);
7636         return true;
7637 }
7638
7639 /* Pipe buffer operations for a buffer. */
7640 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7641         .release                = buffer_pipe_buf_release,
7642         .get                    = buffer_pipe_buf_get,
7643 };
7644
7645 /*
7646  * Callback from splice_to_pipe(), if we need to release some pages
7647  * at the end of the spd in case we error'ed out in filling the pipe.
7648  */
7649 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7650 {
7651         struct buffer_ref *ref =
7652                 (struct buffer_ref *)spd->partial[i].private;
7653
7654         buffer_ref_release(ref);
7655         spd->partial[i].private = 0;
7656 }
7657
7658 static ssize_t
7659 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7660                             struct pipe_inode_info *pipe, size_t len,
7661                             unsigned int flags)
7662 {
7663         struct ftrace_buffer_info *info = file->private_data;
7664         struct trace_iterator *iter = &info->iter;
7665         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7666         struct page *pages_def[PIPE_DEF_BUFFERS];
7667         struct splice_pipe_desc spd = {
7668                 .pages          = pages_def,
7669                 .partial        = partial_def,
7670                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7671                 .ops            = &buffer_pipe_buf_ops,
7672                 .spd_release    = buffer_spd_release,
7673         };
7674         struct buffer_ref *ref;
7675         int entries, i;
7676         ssize_t ret = 0;
7677
7678 #ifdef CONFIG_TRACER_MAX_TRACE
7679         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7680                 return -EBUSY;
7681 #endif
7682
7683         if (*ppos & (PAGE_SIZE - 1))
7684                 return -EINVAL;
7685
7686         if (len & (PAGE_SIZE - 1)) {
7687                 if (len < PAGE_SIZE)
7688                         return -EINVAL;
7689                 len &= PAGE_MASK;
7690         }
7691
7692         if (splice_grow_spd(pipe, &spd))
7693                 return -ENOMEM;
7694
7695  again:
7696         trace_access_lock(iter->cpu_file);
7697         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7698
7699         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7700                 struct page *page;
7701                 int r;
7702
7703                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7704                 if (!ref) {
7705                         ret = -ENOMEM;
7706                         break;
7707                 }
7708
7709                 refcount_set(&ref->refcount, 1);
7710                 ref->buffer = iter->array_buffer->buffer;
7711                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7712                 if (IS_ERR(ref->page)) {
7713                         ret = PTR_ERR(ref->page);
7714                         ref->page = NULL;
7715                         kfree(ref);
7716                         break;
7717                 }
7718                 ref->cpu = iter->cpu_file;
7719
7720                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7721                                           len, iter->cpu_file, 1);
7722                 if (r < 0) {
7723                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7724                                                    ref->page);
7725                         kfree(ref);
7726                         break;
7727                 }
7728
7729                 page = virt_to_page(ref->page);
7730
7731                 spd.pages[i] = page;
7732                 spd.partial[i].len = PAGE_SIZE;
7733                 spd.partial[i].offset = 0;
7734                 spd.partial[i].private = (unsigned long)ref;
7735                 spd.nr_pages++;
7736                 *ppos += PAGE_SIZE;
7737
7738                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7739         }
7740
7741         trace_access_unlock(iter->cpu_file);
7742         spd.nr_pages = i;
7743
7744         /* did we read anything? */
7745         if (!spd.nr_pages) {
7746                 if (ret)
7747                         goto out;
7748
7749                 ret = -EAGAIN;
7750                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7751                         goto out;
7752
7753                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7754                 if (ret)
7755                         goto out;
7756
7757                 goto again;
7758         }
7759
7760         ret = splice_to_pipe(pipe, &spd);
7761 out:
7762         splice_shrink_spd(&spd);
7763
7764         return ret;
7765 }
7766
7767 static const struct file_operations tracing_buffers_fops = {
7768         .open           = tracing_buffers_open,
7769         .read           = tracing_buffers_read,
7770         .poll           = tracing_buffers_poll,
7771         .release        = tracing_buffers_release,
7772         .splice_read    = tracing_buffers_splice_read,
7773         .llseek         = no_llseek,
7774 };
7775
7776 static ssize_t
7777 tracing_stats_read(struct file *filp, char __user *ubuf,
7778                    size_t count, loff_t *ppos)
7779 {
7780         struct inode *inode = file_inode(filp);
7781         struct trace_array *tr = inode->i_private;
7782         struct array_buffer *trace_buf = &tr->array_buffer;
7783         int cpu = tracing_get_cpu(inode);
7784         struct trace_seq *s;
7785         unsigned long cnt;
7786         unsigned long long t;
7787         unsigned long usec_rem;
7788
7789         s = kmalloc(sizeof(*s), GFP_KERNEL);
7790         if (!s)
7791                 return -ENOMEM;
7792
7793         trace_seq_init(s);
7794
7795         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7796         trace_seq_printf(s, "entries: %ld\n", cnt);
7797
7798         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7799         trace_seq_printf(s, "overrun: %ld\n", cnt);
7800
7801         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7802         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7803
7804         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7805         trace_seq_printf(s, "bytes: %ld\n", cnt);
7806
7807         if (trace_clocks[tr->clock_id].in_ns) {
7808                 /* local or global for trace_clock */
7809                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7810                 usec_rem = do_div(t, USEC_PER_SEC);
7811                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7812                                                                 t, usec_rem);
7813
7814                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7815                 usec_rem = do_div(t, USEC_PER_SEC);
7816                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7817         } else {
7818                 /* counter or tsc mode for trace_clock */
7819                 trace_seq_printf(s, "oldest event ts: %llu\n",
7820                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7821
7822                 trace_seq_printf(s, "now ts: %llu\n",
7823                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7824         }
7825
7826         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7827         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7828
7829         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7830         trace_seq_printf(s, "read events: %ld\n", cnt);
7831
7832         count = simple_read_from_buffer(ubuf, count, ppos,
7833                                         s->buffer, trace_seq_used(s));
7834
7835         kfree(s);
7836
7837         return count;
7838 }
7839
7840 static const struct file_operations tracing_stats_fops = {
7841         .open           = tracing_open_generic_tr,
7842         .read           = tracing_stats_read,
7843         .llseek         = generic_file_llseek,
7844         .release        = tracing_release_generic_tr,
7845 };
7846
7847 #ifdef CONFIG_DYNAMIC_FTRACE
7848
7849 static ssize_t
7850 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7851                   size_t cnt, loff_t *ppos)
7852 {
7853         ssize_t ret;
7854         char *buf;
7855         int r;
7856
7857         /* 256 should be plenty to hold the amount needed */
7858         buf = kmalloc(256, GFP_KERNEL);
7859         if (!buf)
7860                 return -ENOMEM;
7861
7862         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7863                       ftrace_update_tot_cnt,
7864                       ftrace_number_of_pages,
7865                       ftrace_number_of_groups);
7866
7867         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7868         kfree(buf);
7869         return ret;
7870 }
7871
7872 static const struct file_operations tracing_dyn_info_fops = {
7873         .open           = tracing_open_generic,
7874         .read           = tracing_read_dyn_info,
7875         .llseek         = generic_file_llseek,
7876 };
7877 #endif /* CONFIG_DYNAMIC_FTRACE */
7878
7879 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7880 static void
7881 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7882                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7883                 void *data)
7884 {
7885         tracing_snapshot_instance(tr);
7886 }
7887
7888 static void
7889 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7890                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7891                       void *data)
7892 {
7893         struct ftrace_func_mapper *mapper = data;
7894         long *count = NULL;
7895
7896         if (mapper)
7897                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7898
7899         if (count) {
7900
7901                 if (*count <= 0)
7902                         return;
7903
7904                 (*count)--;
7905         }
7906
7907         tracing_snapshot_instance(tr);
7908 }
7909
7910 static int
7911 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7912                       struct ftrace_probe_ops *ops, void *data)
7913 {
7914         struct ftrace_func_mapper *mapper = data;
7915         long *count = NULL;
7916
7917         seq_printf(m, "%ps:", (void *)ip);
7918
7919         seq_puts(m, "snapshot");
7920
7921         if (mapper)
7922                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7923
7924         if (count)
7925                 seq_printf(m, ":count=%ld\n", *count);
7926         else
7927                 seq_puts(m, ":unlimited\n");
7928
7929         return 0;
7930 }
7931
7932 static int
7933 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7934                      unsigned long ip, void *init_data, void **data)
7935 {
7936         struct ftrace_func_mapper *mapper = *data;
7937
7938         if (!mapper) {
7939                 mapper = allocate_ftrace_func_mapper();
7940                 if (!mapper)
7941                         return -ENOMEM;
7942                 *data = mapper;
7943         }
7944
7945         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7946 }
7947
7948 static void
7949 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7950                      unsigned long ip, void *data)
7951 {
7952         struct ftrace_func_mapper *mapper = data;
7953
7954         if (!ip) {
7955                 if (!mapper)
7956                         return;
7957                 free_ftrace_func_mapper(mapper, NULL);
7958                 return;
7959         }
7960
7961         ftrace_func_mapper_remove_ip(mapper, ip);
7962 }
7963
7964 static struct ftrace_probe_ops snapshot_probe_ops = {
7965         .func                   = ftrace_snapshot,
7966         .print                  = ftrace_snapshot_print,
7967 };
7968
7969 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7970         .func                   = ftrace_count_snapshot,
7971         .print                  = ftrace_snapshot_print,
7972         .init                   = ftrace_snapshot_init,
7973         .free                   = ftrace_snapshot_free,
7974 };
7975
7976 static int
7977 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7978                                char *glob, char *cmd, char *param, int enable)
7979 {
7980         struct ftrace_probe_ops *ops;
7981         void *count = (void *)-1;
7982         char *number;
7983         int ret;
7984
7985         if (!tr)
7986                 return -ENODEV;
7987
7988         /* hash funcs only work with set_ftrace_filter */
7989         if (!enable)
7990                 return -EINVAL;
7991
7992         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7993
7994         if (glob[0] == '!')
7995                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7996
7997         if (!param)
7998                 goto out_reg;
7999
8000         number = strsep(&param, ":");
8001
8002         if (!strlen(number))
8003                 goto out_reg;
8004
8005         /*
8006          * We use the callback data field (which is a pointer)
8007          * as our counter.
8008          */
8009         ret = kstrtoul(number, 0, (unsigned long *)&count);
8010         if (ret)
8011                 return ret;
8012
8013  out_reg:
8014         ret = tracing_alloc_snapshot_instance(tr);
8015         if (ret < 0)
8016                 goto out;
8017
8018         ret = register_ftrace_function_probe(glob, tr, ops, count);
8019
8020  out:
8021         return ret < 0 ? ret : 0;
8022 }
8023
8024 static struct ftrace_func_command ftrace_snapshot_cmd = {
8025         .name                   = "snapshot",
8026         .func                   = ftrace_trace_snapshot_callback,
8027 };
8028
8029 static __init int register_snapshot_cmd(void)
8030 {
8031         return register_ftrace_command(&ftrace_snapshot_cmd);
8032 }
8033 #else
8034 static inline __init int register_snapshot_cmd(void) { return 0; }
8035 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8036
8037 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8038 {
8039         if (WARN_ON(!tr->dir))
8040                 return ERR_PTR(-ENODEV);
8041
8042         /* Top directory uses NULL as the parent */
8043         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8044                 return NULL;
8045
8046         /* All sub buffers have a descriptor */
8047         return tr->dir;
8048 }
8049
8050 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8051 {
8052         struct dentry *d_tracer;
8053
8054         if (tr->percpu_dir)
8055                 return tr->percpu_dir;
8056
8057         d_tracer = tracing_get_dentry(tr);
8058         if (IS_ERR(d_tracer))
8059                 return NULL;
8060
8061         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8062
8063         MEM_FAIL(!tr->percpu_dir,
8064                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8065
8066         return tr->percpu_dir;
8067 }
8068
8069 static struct dentry *
8070 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8071                       void *data, long cpu, const struct file_operations *fops)
8072 {
8073         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8074
8075         if (ret) /* See tracing_get_cpu() */
8076                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8077         return ret;
8078 }
8079
8080 static void
8081 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8082 {
8083         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8084         struct dentry *d_cpu;
8085         char cpu_dir[30]; /* 30 characters should be more than enough */
8086
8087         if (!d_percpu)
8088                 return;
8089
8090         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8091         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8092         if (!d_cpu) {
8093                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8094                 return;
8095         }
8096
8097         /* per cpu trace_pipe */
8098         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8099                                 tr, cpu, &tracing_pipe_fops);
8100
8101         /* per cpu trace */
8102         trace_create_cpu_file("trace", 0644, d_cpu,
8103                                 tr, cpu, &tracing_fops);
8104
8105         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8106                                 tr, cpu, &tracing_buffers_fops);
8107
8108         trace_create_cpu_file("stats", 0444, d_cpu,
8109                                 tr, cpu, &tracing_stats_fops);
8110
8111         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8112                                 tr, cpu, &tracing_entries_fops);
8113
8114 #ifdef CONFIG_TRACER_SNAPSHOT
8115         trace_create_cpu_file("snapshot", 0644, d_cpu,
8116                                 tr, cpu, &snapshot_fops);
8117
8118         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8119                                 tr, cpu, &snapshot_raw_fops);
8120 #endif
8121 }
8122
8123 #ifdef CONFIG_FTRACE_SELFTEST
8124 /* Let selftest have access to static functions in this file */
8125 #include "trace_selftest.c"
8126 #endif
8127
8128 static ssize_t
8129 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8130                         loff_t *ppos)
8131 {
8132         struct trace_option_dentry *topt = filp->private_data;
8133         char *buf;
8134
8135         if (topt->flags->val & topt->opt->bit)
8136                 buf = "1\n";
8137         else
8138                 buf = "0\n";
8139
8140         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8141 }
8142
8143 static ssize_t
8144 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8145                          loff_t *ppos)
8146 {
8147         struct trace_option_dentry *topt = filp->private_data;
8148         unsigned long val;
8149         int ret;
8150
8151         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8152         if (ret)
8153                 return ret;
8154
8155         if (val != 0 && val != 1)
8156                 return -EINVAL;
8157
8158         if (!!(topt->flags->val & topt->opt->bit) != val) {
8159                 mutex_lock(&trace_types_lock);
8160                 ret = __set_tracer_option(topt->tr, topt->flags,
8161                                           topt->opt, !val);
8162                 mutex_unlock(&trace_types_lock);
8163                 if (ret)
8164                         return ret;
8165         }
8166
8167         *ppos += cnt;
8168
8169         return cnt;
8170 }
8171
8172
8173 static const struct file_operations trace_options_fops = {
8174         .open = tracing_open_generic,
8175         .read = trace_options_read,
8176         .write = trace_options_write,
8177         .llseek = generic_file_llseek,
8178 };
8179
8180 /*
8181  * In order to pass in both the trace_array descriptor as well as the index
8182  * to the flag that the trace option file represents, the trace_array
8183  * has a character array of trace_flags_index[], which holds the index
8184  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8185  * The address of this character array is passed to the flag option file
8186  * read/write callbacks.
8187  *
8188  * In order to extract both the index and the trace_array descriptor,
8189  * get_tr_index() uses the following algorithm.
8190  *
8191  *   idx = *ptr;
8192  *
8193  * As the pointer itself contains the address of the index (remember
8194  * index[1] == 1).
8195  *
8196  * Then to get the trace_array descriptor, by subtracting that index
8197  * from the ptr, we get to the start of the index itself.
8198  *
8199  *   ptr - idx == &index[0]
8200  *
8201  * Then a simple container_of() from that pointer gets us to the
8202  * trace_array descriptor.
8203  */
8204 static void get_tr_index(void *data, struct trace_array **ptr,
8205                          unsigned int *pindex)
8206 {
8207         *pindex = *(unsigned char *)data;
8208
8209         *ptr = container_of(data - *pindex, struct trace_array,
8210                             trace_flags_index);
8211 }
8212
8213 static ssize_t
8214 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8215                         loff_t *ppos)
8216 {
8217         void *tr_index = filp->private_data;
8218         struct trace_array *tr;
8219         unsigned int index;
8220         char *buf;
8221
8222         get_tr_index(tr_index, &tr, &index);
8223
8224         if (tr->trace_flags & (1 << index))
8225                 buf = "1\n";
8226         else
8227                 buf = "0\n";
8228
8229         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8230 }
8231
8232 static ssize_t
8233 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8234                          loff_t *ppos)
8235 {
8236         void *tr_index = filp->private_data;
8237         struct trace_array *tr;
8238         unsigned int index;
8239         unsigned long val;
8240         int ret;
8241
8242         get_tr_index(tr_index, &tr, &index);
8243
8244         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8245         if (ret)
8246                 return ret;
8247
8248         if (val != 0 && val != 1)
8249                 return -EINVAL;
8250
8251         mutex_lock(&event_mutex);
8252         mutex_lock(&trace_types_lock);
8253         ret = set_tracer_flag(tr, 1 << index, val);
8254         mutex_unlock(&trace_types_lock);
8255         mutex_unlock(&event_mutex);
8256
8257         if (ret < 0)
8258                 return ret;
8259
8260         *ppos += cnt;
8261
8262         return cnt;
8263 }
8264
8265 static const struct file_operations trace_options_core_fops = {
8266         .open = tracing_open_generic,
8267         .read = trace_options_core_read,
8268         .write = trace_options_core_write,
8269         .llseek = generic_file_llseek,
8270 };
8271
8272 struct dentry *trace_create_file(const char *name,
8273                                  umode_t mode,
8274                                  struct dentry *parent,
8275                                  void *data,
8276                                  const struct file_operations *fops)
8277 {
8278         struct dentry *ret;
8279
8280         ret = tracefs_create_file(name, mode, parent, data, fops);
8281         if (!ret)
8282                 pr_warn("Could not create tracefs '%s' entry\n", name);
8283
8284         return ret;
8285 }
8286
8287
8288 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8289 {
8290         struct dentry *d_tracer;
8291
8292         if (tr->options)
8293                 return tr->options;
8294
8295         d_tracer = tracing_get_dentry(tr);
8296         if (IS_ERR(d_tracer))
8297                 return NULL;
8298
8299         tr->options = tracefs_create_dir("options", d_tracer);
8300         if (!tr->options) {
8301                 pr_warn("Could not create tracefs directory 'options'\n");
8302                 return NULL;
8303         }
8304
8305         return tr->options;
8306 }
8307
8308 static void
8309 create_trace_option_file(struct trace_array *tr,
8310                          struct trace_option_dentry *topt,
8311                          struct tracer_flags *flags,
8312                          struct tracer_opt *opt)
8313 {
8314         struct dentry *t_options;
8315
8316         t_options = trace_options_init_dentry(tr);
8317         if (!t_options)
8318                 return;
8319
8320         topt->flags = flags;
8321         topt->opt = opt;
8322         topt->tr = tr;
8323
8324         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8325                                     &trace_options_fops);
8326
8327 }
8328
8329 static void
8330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8331 {
8332         struct trace_option_dentry *topts;
8333         struct trace_options *tr_topts;
8334         struct tracer_flags *flags;
8335         struct tracer_opt *opts;
8336         int cnt;
8337         int i;
8338
8339         if (!tracer)
8340                 return;
8341
8342         flags = tracer->flags;
8343
8344         if (!flags || !flags->opts)
8345                 return;
8346
8347         /*
8348          * If this is an instance, only create flags for tracers
8349          * the instance may have.
8350          */
8351         if (!trace_ok_for_array(tracer, tr))
8352                 return;
8353
8354         for (i = 0; i < tr->nr_topts; i++) {
8355                 /* Make sure there's no duplicate flags. */
8356                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8357                         return;
8358         }
8359
8360         opts = flags->opts;
8361
8362         for (cnt = 0; opts[cnt].name; cnt++)
8363                 ;
8364
8365         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8366         if (!topts)
8367                 return;
8368
8369         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8370                             GFP_KERNEL);
8371         if (!tr_topts) {
8372                 kfree(topts);
8373                 return;
8374         }
8375
8376         tr->topts = tr_topts;
8377         tr->topts[tr->nr_topts].tracer = tracer;
8378         tr->topts[tr->nr_topts].topts = topts;
8379         tr->nr_topts++;
8380
8381         for (cnt = 0; opts[cnt].name; cnt++) {
8382                 create_trace_option_file(tr, &topts[cnt], flags,
8383                                          &opts[cnt]);
8384                 MEM_FAIL(topts[cnt].entry == NULL,
8385                           "Failed to create trace option: %s",
8386                           opts[cnt].name);
8387         }
8388 }
8389
8390 static struct dentry *
8391 create_trace_option_core_file(struct trace_array *tr,
8392                               const char *option, long index)
8393 {
8394         struct dentry *t_options;
8395
8396         t_options = trace_options_init_dentry(tr);
8397         if (!t_options)
8398                 return NULL;
8399
8400         return trace_create_file(option, 0644, t_options,
8401                                  (void *)&tr->trace_flags_index[index],
8402                                  &trace_options_core_fops);
8403 }
8404
8405 static void create_trace_options_dir(struct trace_array *tr)
8406 {
8407         struct dentry *t_options;
8408         bool top_level = tr == &global_trace;
8409         int i;
8410
8411         t_options = trace_options_init_dentry(tr);
8412         if (!t_options)
8413                 return;
8414
8415         for (i = 0; trace_options[i]; i++) {
8416                 if (top_level ||
8417                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8418                         create_trace_option_core_file(tr, trace_options[i], i);
8419         }
8420 }
8421
8422 static ssize_t
8423 rb_simple_read(struct file *filp, char __user *ubuf,
8424                size_t cnt, loff_t *ppos)
8425 {
8426         struct trace_array *tr = filp->private_data;
8427         char buf[64];
8428         int r;
8429
8430         r = tracer_tracing_is_on(tr);
8431         r = sprintf(buf, "%d\n", r);
8432
8433         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8434 }
8435
8436 static ssize_t
8437 rb_simple_write(struct file *filp, const char __user *ubuf,
8438                 size_t cnt, loff_t *ppos)
8439 {
8440         struct trace_array *tr = filp->private_data;
8441         struct trace_buffer *buffer = tr->array_buffer.buffer;
8442         unsigned long val;
8443         int ret;
8444
8445         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8446         if (ret)
8447                 return ret;
8448
8449         if (buffer) {
8450                 mutex_lock(&trace_types_lock);
8451                 if (!!val == tracer_tracing_is_on(tr)) {
8452                         val = 0; /* do nothing */
8453                 } else if (val) {
8454                         tracer_tracing_on(tr);
8455                         if (tr->current_trace->start)
8456                                 tr->current_trace->start(tr);
8457                 } else {
8458                         tracer_tracing_off(tr);
8459                         if (tr->current_trace->stop)
8460                                 tr->current_trace->stop(tr);
8461                 }
8462                 mutex_unlock(&trace_types_lock);
8463         }
8464
8465         (*ppos)++;
8466
8467         return cnt;
8468 }
8469
8470 static const struct file_operations rb_simple_fops = {
8471         .open           = tracing_open_generic_tr,
8472         .read           = rb_simple_read,
8473         .write          = rb_simple_write,
8474         .release        = tracing_release_generic_tr,
8475         .llseek         = default_llseek,
8476 };
8477
8478 static ssize_t
8479 buffer_percent_read(struct file *filp, char __user *ubuf,
8480                     size_t cnt, loff_t *ppos)
8481 {
8482         struct trace_array *tr = filp->private_data;
8483         char buf[64];
8484         int r;
8485
8486         r = tr->buffer_percent;
8487         r = sprintf(buf, "%d\n", r);
8488
8489         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8490 }
8491
8492 static ssize_t
8493 buffer_percent_write(struct file *filp, const char __user *ubuf,
8494                      size_t cnt, loff_t *ppos)
8495 {
8496         struct trace_array *tr = filp->private_data;
8497         unsigned long val;
8498         int ret;
8499
8500         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8501         if (ret)
8502                 return ret;
8503
8504         if (val > 100)
8505                 return -EINVAL;
8506
8507         if (!val)
8508                 val = 1;
8509
8510         tr->buffer_percent = val;
8511
8512         (*ppos)++;
8513
8514         return cnt;
8515 }
8516
8517 static const struct file_operations buffer_percent_fops = {
8518         .open           = tracing_open_generic_tr,
8519         .read           = buffer_percent_read,
8520         .write          = buffer_percent_write,
8521         .release        = tracing_release_generic_tr,
8522         .llseek         = default_llseek,
8523 };
8524
8525 static struct dentry *trace_instance_dir;
8526
8527 static void
8528 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8529
8530 static int
8531 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8532 {
8533         enum ring_buffer_flags rb_flags;
8534
8535         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8536
8537         buf->tr = tr;
8538
8539         buf->buffer = ring_buffer_alloc(size, rb_flags);
8540         if (!buf->buffer)
8541                 return -ENOMEM;
8542
8543         buf->data = alloc_percpu(struct trace_array_cpu);
8544         if (!buf->data) {
8545                 ring_buffer_free(buf->buffer);
8546                 buf->buffer = NULL;
8547                 return -ENOMEM;
8548         }
8549
8550         /* Allocate the first page for all buffers */
8551         set_buffer_entries(&tr->array_buffer,
8552                            ring_buffer_size(tr->array_buffer.buffer, 0));
8553
8554         return 0;
8555 }
8556
8557 static int allocate_trace_buffers(struct trace_array *tr, int size)
8558 {
8559         int ret;
8560
8561         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8562         if (ret)
8563                 return ret;
8564
8565 #ifdef CONFIG_TRACER_MAX_TRACE
8566         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8567                                     allocate_snapshot ? size : 1);
8568         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8569                 ring_buffer_free(tr->array_buffer.buffer);
8570                 tr->array_buffer.buffer = NULL;
8571                 free_percpu(tr->array_buffer.data);
8572                 tr->array_buffer.data = NULL;
8573                 return -ENOMEM;
8574         }
8575         tr->allocated_snapshot = allocate_snapshot;
8576
8577         /*
8578          * Only the top level trace array gets its snapshot allocated
8579          * from the kernel command line.
8580          */
8581         allocate_snapshot = false;
8582 #endif
8583
8584         return 0;
8585 }
8586
8587 static void free_trace_buffer(struct array_buffer *buf)
8588 {
8589         if (buf->buffer) {
8590                 ring_buffer_free(buf->buffer);
8591                 buf->buffer = NULL;
8592                 free_percpu(buf->data);
8593                 buf->data = NULL;
8594         }
8595 }
8596
8597 static void free_trace_buffers(struct trace_array *tr)
8598 {
8599         if (!tr)
8600                 return;
8601
8602         free_trace_buffer(&tr->array_buffer);
8603
8604 #ifdef CONFIG_TRACER_MAX_TRACE
8605         free_trace_buffer(&tr->max_buffer);
8606 #endif
8607 }
8608
8609 static void init_trace_flags_index(struct trace_array *tr)
8610 {
8611         int i;
8612
8613         /* Used by the trace options files */
8614         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8615                 tr->trace_flags_index[i] = i;
8616 }
8617
8618 static void __update_tracer_options(struct trace_array *tr)
8619 {
8620         struct tracer *t;
8621
8622         for (t = trace_types; t; t = t->next)
8623                 add_tracer_options(tr, t);
8624 }
8625
8626 static void update_tracer_options(struct trace_array *tr)
8627 {
8628         mutex_lock(&trace_types_lock);
8629         __update_tracer_options(tr);
8630         mutex_unlock(&trace_types_lock);
8631 }
8632
8633 /* Must have trace_types_lock held */
8634 struct trace_array *trace_array_find(const char *instance)
8635 {
8636         struct trace_array *tr, *found = NULL;
8637
8638         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8639                 if (tr->name && strcmp(tr->name, instance) == 0) {
8640                         found = tr;
8641                         break;
8642                 }
8643         }
8644
8645         return found;
8646 }
8647
8648 struct trace_array *trace_array_find_get(const char *instance)
8649 {
8650         struct trace_array *tr;
8651
8652         mutex_lock(&trace_types_lock);
8653         tr = trace_array_find(instance);
8654         if (tr)
8655                 tr->ref++;
8656         mutex_unlock(&trace_types_lock);
8657
8658         return tr;
8659 }
8660
8661 static struct trace_array *trace_array_create(const char *name)
8662 {
8663         struct trace_array *tr;
8664         int ret;
8665
8666         ret = -ENOMEM;
8667         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8668         if (!tr)
8669                 return ERR_PTR(ret);
8670
8671         tr->name = kstrdup(name, GFP_KERNEL);
8672         if (!tr->name)
8673                 goto out_free_tr;
8674
8675         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8676                 goto out_free_tr;
8677
8678         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8679
8680         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8681
8682         raw_spin_lock_init(&tr->start_lock);
8683
8684         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8685
8686         tr->current_trace = &nop_trace;
8687
8688         INIT_LIST_HEAD(&tr->systems);
8689         INIT_LIST_HEAD(&tr->events);
8690         INIT_LIST_HEAD(&tr->hist_vars);
8691         INIT_LIST_HEAD(&tr->err_log);
8692
8693         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8694                 goto out_free_tr;
8695
8696         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8697         if (!tr->dir)
8698                 goto out_free_tr;
8699
8700         ret = event_trace_add_tracer(tr->dir, tr);
8701         if (ret) {
8702                 tracefs_remove(tr->dir);
8703                 goto out_free_tr;
8704         }
8705
8706         ftrace_init_trace_array(tr);
8707
8708         init_tracer_tracefs(tr, tr->dir);
8709         init_trace_flags_index(tr);
8710         __update_tracer_options(tr);
8711
8712         list_add(&tr->list, &ftrace_trace_arrays);
8713
8714         tr->ref++;
8715
8716
8717         return tr;
8718
8719  out_free_tr:
8720         free_trace_buffers(tr);
8721         free_cpumask_var(tr->tracing_cpumask);
8722         kfree(tr->name);
8723         kfree(tr);
8724
8725         return ERR_PTR(ret);
8726 }
8727
8728 static int instance_mkdir(const char *name)
8729 {
8730         struct trace_array *tr;
8731         int ret;
8732
8733         mutex_lock(&event_mutex);
8734         mutex_lock(&trace_types_lock);
8735
8736         ret = -EEXIST;
8737         if (trace_array_find(name))
8738                 goto out_unlock;
8739
8740         tr = trace_array_create(name);
8741
8742         ret = PTR_ERR_OR_ZERO(tr);
8743
8744 out_unlock:
8745         mutex_unlock(&trace_types_lock);
8746         mutex_unlock(&event_mutex);
8747         return ret;
8748 }
8749
8750 /**
8751  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8752  * @name: The name of the trace array to be looked up/created.
8753  *
8754  * Returns pointer to trace array with given name.
8755  * NULL, if it cannot be created.
8756  *
8757  * NOTE: This function increments the reference counter associated with the
8758  * trace array returned. This makes sure it cannot be freed while in use.
8759  * Use trace_array_put() once the trace array is no longer needed.
8760  * If the trace_array is to be freed, trace_array_destroy() needs to
8761  * be called after the trace_array_put(), or simply let user space delete
8762  * it from the tracefs instances directory. But until the
8763  * trace_array_put() is called, user space can not delete it.
8764  *
8765  */
8766 struct trace_array *trace_array_get_by_name(const char *name)
8767 {
8768         struct trace_array *tr;
8769
8770         mutex_lock(&event_mutex);
8771         mutex_lock(&trace_types_lock);
8772
8773         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8774                 if (tr->name && strcmp(tr->name, name) == 0)
8775                         goto out_unlock;
8776         }
8777
8778         tr = trace_array_create(name);
8779
8780         if (IS_ERR(tr))
8781                 tr = NULL;
8782 out_unlock:
8783         if (tr)
8784                 tr->ref++;
8785
8786         mutex_unlock(&trace_types_lock);
8787         mutex_unlock(&event_mutex);
8788         return tr;
8789 }
8790 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8791
8792 static int __remove_instance(struct trace_array *tr)
8793 {
8794         int i;
8795
8796         /* Reference counter for a newly created trace array = 1. */
8797         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8798                 return -EBUSY;
8799
8800         list_del(&tr->list);
8801
8802         /* Disable all the flags that were enabled coming in */
8803         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8804                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8805                         set_tracer_flag(tr, 1 << i, 0);
8806         }
8807
8808         tracing_set_nop(tr);
8809         clear_ftrace_function_probes(tr);
8810         event_trace_del_tracer(tr);
8811         ftrace_clear_pids(tr);
8812         ftrace_destroy_function_files(tr);
8813         tracefs_remove(tr->dir);
8814         free_trace_buffers(tr);
8815
8816         for (i = 0; i < tr->nr_topts; i++) {
8817                 kfree(tr->topts[i].topts);
8818         }
8819         kfree(tr->topts);
8820
8821         free_cpumask_var(tr->tracing_cpumask);
8822         kfree(tr->name);
8823         kfree(tr);
8824         tr = NULL;
8825
8826         return 0;
8827 }
8828
8829 int trace_array_destroy(struct trace_array *this_tr)
8830 {
8831         struct trace_array *tr;
8832         int ret;
8833
8834         if (!this_tr)
8835                 return -EINVAL;
8836
8837         mutex_lock(&event_mutex);
8838         mutex_lock(&trace_types_lock);
8839
8840         ret = -ENODEV;
8841
8842         /* Making sure trace array exists before destroying it. */
8843         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8844                 if (tr == this_tr) {
8845                         ret = __remove_instance(tr);
8846                         break;
8847                 }
8848         }
8849
8850         mutex_unlock(&trace_types_lock);
8851         mutex_unlock(&event_mutex);
8852
8853         return ret;
8854 }
8855 EXPORT_SYMBOL_GPL(trace_array_destroy);
8856
8857 static int instance_rmdir(const char *name)
8858 {
8859         struct trace_array *tr;
8860         int ret;
8861
8862         mutex_lock(&event_mutex);
8863         mutex_lock(&trace_types_lock);
8864
8865         ret = -ENODEV;
8866         tr = trace_array_find(name);
8867         if (tr)
8868                 ret = __remove_instance(tr);
8869
8870         mutex_unlock(&trace_types_lock);
8871         mutex_unlock(&event_mutex);
8872
8873         return ret;
8874 }
8875
8876 static __init void create_trace_instances(struct dentry *d_tracer)
8877 {
8878         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8879                                                          instance_mkdir,
8880                                                          instance_rmdir);
8881         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8882                 return;
8883 }
8884
8885 static void
8886 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8887 {
8888         struct trace_event_file *file;
8889         int cpu;
8890
8891         trace_create_file("available_tracers", 0444, d_tracer,
8892                         tr, &show_traces_fops);
8893
8894         trace_create_file("current_tracer", 0644, d_tracer,
8895                         tr, &set_tracer_fops);
8896
8897         trace_create_file("tracing_cpumask", 0644, d_tracer,
8898                           tr, &tracing_cpumask_fops);
8899
8900         trace_create_file("trace_options", 0644, d_tracer,
8901                           tr, &tracing_iter_fops);
8902
8903         trace_create_file("trace", 0644, d_tracer,
8904                           tr, &tracing_fops);
8905
8906         trace_create_file("trace_pipe", 0444, d_tracer,
8907                           tr, &tracing_pipe_fops);
8908
8909         trace_create_file("buffer_size_kb", 0644, d_tracer,
8910                           tr, &tracing_entries_fops);
8911
8912         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8913                           tr, &tracing_total_entries_fops);
8914
8915         trace_create_file("free_buffer", 0200, d_tracer,
8916                           tr, &tracing_free_buffer_fops);
8917
8918         trace_create_file("trace_marker", 0220, d_tracer,
8919                           tr, &tracing_mark_fops);
8920
8921         file = __find_event_file(tr, "ftrace", "print");
8922         if (file && file->dir)
8923                 trace_create_file("trigger", 0644, file->dir, file,
8924                                   &event_trigger_fops);
8925         tr->trace_marker_file = file;
8926
8927         trace_create_file("trace_marker_raw", 0220, d_tracer,
8928                           tr, &tracing_mark_raw_fops);
8929
8930         trace_create_file("trace_clock", 0644, d_tracer, tr,
8931                           &trace_clock_fops);
8932
8933         trace_create_file("tracing_on", 0644, d_tracer,
8934                           tr, &rb_simple_fops);
8935
8936         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8937                           &trace_time_stamp_mode_fops);
8938
8939         tr->buffer_percent = 50;
8940
8941         trace_create_file("buffer_percent", 0444, d_tracer,
8942                         tr, &buffer_percent_fops);
8943
8944         create_trace_options_dir(tr);
8945
8946 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8947         trace_create_maxlat_file(tr, d_tracer);
8948 #endif
8949
8950         if (ftrace_create_function_files(tr, d_tracer))
8951                 MEM_FAIL(1, "Could not allocate function filter files");
8952
8953 #ifdef CONFIG_TRACER_SNAPSHOT
8954         trace_create_file("snapshot", 0644, d_tracer,
8955                           tr, &snapshot_fops);
8956 #endif
8957
8958         trace_create_file("error_log", 0644, d_tracer,
8959                           tr, &tracing_err_log_fops);
8960
8961         for_each_tracing_cpu(cpu)
8962                 tracing_init_tracefs_percpu(tr, cpu);
8963
8964         ftrace_init_tracefs(tr, d_tracer);
8965 }
8966
8967 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8968 {
8969         struct vfsmount *mnt;
8970         struct file_system_type *type;
8971
8972         /*
8973          * To maintain backward compatibility for tools that mount
8974          * debugfs to get to the tracing facility, tracefs is automatically
8975          * mounted to the debugfs/tracing directory.
8976          */
8977         type = get_fs_type("tracefs");
8978         if (!type)
8979                 return NULL;
8980         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8981         put_filesystem(type);
8982         if (IS_ERR(mnt))
8983                 return NULL;
8984         mntget(mnt);
8985
8986         return mnt;
8987 }
8988
8989 /**
8990  * tracing_init_dentry - initialize top level trace array
8991  *
8992  * This is called when creating files or directories in the tracing
8993  * directory. It is called via fs_initcall() by any of the boot up code
8994  * and expects to return the dentry of the top level tracing directory.
8995  */
8996 struct dentry *tracing_init_dentry(void)
8997 {
8998         struct trace_array *tr = &global_trace;
8999
9000         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9001                 pr_warn("Tracing disabled due to lockdown\n");
9002                 return ERR_PTR(-EPERM);
9003         }
9004
9005         /* The top level trace array uses  NULL as parent */
9006         if (tr->dir)
9007                 return NULL;
9008
9009         if (WARN_ON(!tracefs_initialized()))
9010                 return ERR_PTR(-ENODEV);
9011
9012         /*
9013          * As there may still be users that expect the tracing
9014          * files to exist in debugfs/tracing, we must automount
9015          * the tracefs file system there, so older tools still
9016          * work with the newer kerenl.
9017          */
9018         tr->dir = debugfs_create_automount("tracing", NULL,
9019                                            trace_automount, NULL);
9020
9021         return NULL;
9022 }
9023
9024 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9025 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9026
9027 static void __init trace_eval_init(void)
9028 {
9029         int len;
9030
9031         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9032         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9033 }
9034
9035 #ifdef CONFIG_MODULES
9036 static void trace_module_add_evals(struct module *mod)
9037 {
9038         if (!mod->num_trace_evals)
9039                 return;
9040
9041         /*
9042          * Modules with bad taint do not have events created, do
9043          * not bother with enums either.
9044          */
9045         if (trace_module_has_bad_taint(mod))
9046                 return;
9047
9048         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9049 }
9050
9051 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9052 static void trace_module_remove_evals(struct module *mod)
9053 {
9054         union trace_eval_map_item *map;
9055         union trace_eval_map_item **last = &trace_eval_maps;
9056
9057         if (!mod->num_trace_evals)
9058                 return;
9059
9060         mutex_lock(&trace_eval_mutex);
9061
9062         map = trace_eval_maps;
9063
9064         while (map) {
9065                 if (map->head.mod == mod)
9066                         break;
9067                 map = trace_eval_jmp_to_tail(map);
9068                 last = &map->tail.next;
9069                 map = map->tail.next;
9070         }
9071         if (!map)
9072                 goto out;
9073
9074         *last = trace_eval_jmp_to_tail(map)->tail.next;
9075         kfree(map);
9076  out:
9077         mutex_unlock(&trace_eval_mutex);
9078 }
9079 #else
9080 static inline void trace_module_remove_evals(struct module *mod) { }
9081 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9082
9083 static int trace_module_notify(struct notifier_block *self,
9084                                unsigned long val, void *data)
9085 {
9086         struct module *mod = data;
9087
9088         switch (val) {
9089         case MODULE_STATE_COMING:
9090                 trace_module_add_evals(mod);
9091                 break;
9092         case MODULE_STATE_GOING:
9093                 trace_module_remove_evals(mod);
9094                 break;
9095         }
9096
9097         return NOTIFY_OK;
9098 }
9099
9100 static struct notifier_block trace_module_nb = {
9101         .notifier_call = trace_module_notify,
9102         .priority = 0,
9103 };
9104 #endif /* CONFIG_MODULES */
9105
9106 static __init int tracer_init_tracefs(void)
9107 {
9108         struct dentry *d_tracer;
9109
9110         trace_access_lock_init();
9111
9112         d_tracer = tracing_init_dentry();
9113         if (IS_ERR(d_tracer))
9114                 return 0;
9115
9116         event_trace_init();
9117
9118         init_tracer_tracefs(&global_trace, d_tracer);
9119         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9120
9121         trace_create_file("tracing_thresh", 0644, d_tracer,
9122                         &global_trace, &tracing_thresh_fops);
9123
9124         trace_create_file("README", 0444, d_tracer,
9125                         NULL, &tracing_readme_fops);
9126
9127         trace_create_file("saved_cmdlines", 0444, d_tracer,
9128                         NULL, &tracing_saved_cmdlines_fops);
9129
9130         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9131                           NULL, &tracing_saved_cmdlines_size_fops);
9132
9133         trace_create_file("saved_tgids", 0444, d_tracer,
9134                         NULL, &tracing_saved_tgids_fops);
9135
9136         trace_eval_init();
9137
9138         trace_create_eval_file(d_tracer);
9139
9140 #ifdef CONFIG_MODULES
9141         register_module_notifier(&trace_module_nb);
9142 #endif
9143
9144 #ifdef CONFIG_DYNAMIC_FTRACE
9145         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9146                         NULL, &tracing_dyn_info_fops);
9147 #endif
9148
9149         create_trace_instances(d_tracer);
9150
9151         update_tracer_options(&global_trace);
9152
9153         return 0;
9154 }
9155
9156 static int trace_panic_handler(struct notifier_block *this,
9157                                unsigned long event, void *unused)
9158 {
9159         if (ftrace_dump_on_oops)
9160                 ftrace_dump(ftrace_dump_on_oops);
9161         return NOTIFY_OK;
9162 }
9163
9164 static struct notifier_block trace_panic_notifier = {
9165         .notifier_call  = trace_panic_handler,
9166         .next           = NULL,
9167         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9168 };
9169
9170 static int trace_die_handler(struct notifier_block *self,
9171                              unsigned long val,
9172                              void *data)
9173 {
9174         switch (val) {
9175         case DIE_OOPS:
9176                 if (ftrace_dump_on_oops)
9177                         ftrace_dump(ftrace_dump_on_oops);
9178                 break;
9179         default:
9180                 break;
9181         }
9182         return NOTIFY_OK;
9183 }
9184
9185 static struct notifier_block trace_die_notifier = {
9186         .notifier_call = trace_die_handler,
9187         .priority = 200
9188 };
9189
9190 /*
9191  * printk is set to max of 1024, we really don't need it that big.
9192  * Nothing should be printing 1000 characters anyway.
9193  */
9194 #define TRACE_MAX_PRINT         1000
9195
9196 /*
9197  * Define here KERN_TRACE so that we have one place to modify
9198  * it if we decide to change what log level the ftrace dump
9199  * should be at.
9200  */
9201 #define KERN_TRACE              KERN_EMERG
9202
9203 void
9204 trace_printk_seq(struct trace_seq *s)
9205 {
9206         /* Probably should print a warning here. */
9207         if (s->seq.len >= TRACE_MAX_PRINT)
9208                 s->seq.len = TRACE_MAX_PRINT;
9209
9210         /*
9211          * More paranoid code. Although the buffer size is set to
9212          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9213          * an extra layer of protection.
9214          */
9215         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9216                 s->seq.len = s->seq.size - 1;
9217
9218         /* should be zero ended, but we are paranoid. */
9219         s->buffer[s->seq.len] = 0;
9220
9221         printk(KERN_TRACE "%s", s->buffer);
9222
9223         trace_seq_init(s);
9224 }
9225
9226 void trace_init_global_iter(struct trace_iterator *iter)
9227 {
9228         iter->tr = &global_trace;
9229         iter->trace = iter->tr->current_trace;
9230         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9231         iter->array_buffer = &global_trace.array_buffer;
9232
9233         if (iter->trace && iter->trace->open)
9234                 iter->trace->open(iter);
9235
9236         /* Annotate start of buffers if we had overruns */
9237         if (ring_buffer_overruns(iter->array_buffer->buffer))
9238                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9239
9240         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9241         if (trace_clocks[iter->tr->clock_id].in_ns)
9242                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9243 }
9244
9245 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9246 {
9247         /* use static because iter can be a bit big for the stack */
9248         static struct trace_iterator iter;
9249         static atomic_t dump_running;
9250         struct trace_array *tr = &global_trace;
9251         unsigned int old_userobj;
9252         unsigned long flags;
9253         int cnt = 0, cpu;
9254
9255         /* Only allow one dump user at a time. */
9256         if (atomic_inc_return(&dump_running) != 1) {
9257                 atomic_dec(&dump_running);
9258                 return;
9259         }
9260
9261         /*
9262          * Always turn off tracing when we dump.
9263          * We don't need to show trace output of what happens
9264          * between multiple crashes.
9265          *
9266          * If the user does a sysrq-z, then they can re-enable
9267          * tracing with echo 1 > tracing_on.
9268          */
9269         tracing_off();
9270
9271         local_irq_save(flags);
9272         printk_nmi_direct_enter();
9273
9274         /* Simulate the iterator */
9275         trace_init_global_iter(&iter);
9276         /* Can not use kmalloc for iter.temp */
9277         iter.temp = static_temp_buf;
9278         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9279
9280         for_each_tracing_cpu(cpu) {
9281                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9282         }
9283
9284         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9285
9286         /* don't look at user memory in panic mode */
9287         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9288
9289         switch (oops_dump_mode) {
9290         case DUMP_ALL:
9291                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9292                 break;
9293         case DUMP_ORIG:
9294                 iter.cpu_file = raw_smp_processor_id();
9295                 break;
9296         case DUMP_NONE:
9297                 goto out_enable;
9298         default:
9299                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9300                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9301         }
9302
9303         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9304
9305         /* Did function tracer already get disabled? */
9306         if (ftrace_is_dead()) {
9307                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9308                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9309         }
9310
9311         /*
9312          * We need to stop all tracing on all CPUS to read the
9313          * the next buffer. This is a bit expensive, but is
9314          * not done often. We fill all what we can read,
9315          * and then release the locks again.
9316          */
9317
9318         while (!trace_empty(&iter)) {
9319
9320                 if (!cnt)
9321                         printk(KERN_TRACE "---------------------------------\n");
9322
9323                 cnt++;
9324
9325                 trace_iterator_reset(&iter);
9326                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9327
9328                 if (trace_find_next_entry_inc(&iter) != NULL) {
9329                         int ret;
9330
9331                         ret = print_trace_line(&iter);
9332                         if (ret != TRACE_TYPE_NO_CONSUME)
9333                                 trace_consume(&iter);
9334                 }
9335                 touch_nmi_watchdog();
9336
9337                 trace_printk_seq(&iter.seq);
9338         }
9339
9340         if (!cnt)
9341                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9342         else
9343                 printk(KERN_TRACE "---------------------------------\n");
9344
9345  out_enable:
9346         tr->trace_flags |= old_userobj;
9347
9348         for_each_tracing_cpu(cpu) {
9349                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9350         }
9351         atomic_dec(&dump_running);
9352         printk_nmi_direct_exit();
9353         local_irq_restore(flags);
9354 }
9355 EXPORT_SYMBOL_GPL(ftrace_dump);
9356
9357 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9358 {
9359         char **argv;
9360         int argc, ret;
9361
9362         argc = 0;
9363         ret = 0;
9364         argv = argv_split(GFP_KERNEL, buf, &argc);
9365         if (!argv)
9366                 return -ENOMEM;
9367
9368         if (argc)
9369                 ret = createfn(argc, argv);
9370
9371         argv_free(argv);
9372
9373         return ret;
9374 }
9375
9376 #define WRITE_BUFSIZE  4096
9377
9378 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9379                                 size_t count, loff_t *ppos,
9380                                 int (*createfn)(int, char **))
9381 {
9382         char *kbuf, *buf, *tmp;
9383         int ret = 0;
9384         size_t done = 0;
9385         size_t size;
9386
9387         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9388         if (!kbuf)
9389                 return -ENOMEM;
9390
9391         while (done < count) {
9392                 size = count - done;
9393
9394                 if (size >= WRITE_BUFSIZE)
9395                         size = WRITE_BUFSIZE - 1;
9396
9397                 if (copy_from_user(kbuf, buffer + done, size)) {
9398                         ret = -EFAULT;
9399                         goto out;
9400                 }
9401                 kbuf[size] = '\0';
9402                 buf = kbuf;
9403                 do {
9404                         tmp = strchr(buf, '\n');
9405                         if (tmp) {
9406                                 *tmp = '\0';
9407                                 size = tmp - buf + 1;
9408                         } else {
9409                                 size = strlen(buf);
9410                                 if (done + size < count) {
9411                                         if (buf != kbuf)
9412                                                 break;
9413                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9414                                         pr_warn("Line length is too long: Should be less than %d\n",
9415                                                 WRITE_BUFSIZE - 2);
9416                                         ret = -EINVAL;
9417                                         goto out;
9418                                 }
9419                         }
9420                         done += size;
9421
9422                         /* Remove comments */
9423                         tmp = strchr(buf, '#');
9424
9425                         if (tmp)
9426                                 *tmp = '\0';
9427
9428                         ret = trace_run_command(buf, createfn);
9429                         if (ret)
9430                                 goto out;
9431                         buf += size;
9432
9433                 } while (done < count);
9434         }
9435         ret = done;
9436
9437 out:
9438         kfree(kbuf);
9439
9440         return ret;
9441 }
9442
9443 __init static int tracer_alloc_buffers(void)
9444 {
9445         int ring_buf_size;
9446         int ret = -ENOMEM;
9447
9448
9449         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9450                 pr_warn("Tracing disabled due to lockdown\n");
9451                 return -EPERM;
9452         }
9453
9454         /*
9455          * Make sure we don't accidently add more trace options
9456          * than we have bits for.
9457          */
9458         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9459
9460         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9461                 goto out;
9462
9463         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9464                 goto out_free_buffer_mask;
9465
9466         /* Only allocate trace_printk buffers if a trace_printk exists */
9467         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9468                 /* Must be called before global_trace.buffer is allocated */
9469                 trace_printk_init_buffers();
9470
9471         /* To save memory, keep the ring buffer size to its minimum */
9472         if (ring_buffer_expanded)
9473                 ring_buf_size = trace_buf_size;
9474         else
9475                 ring_buf_size = 1;
9476
9477         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9478         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9479
9480         raw_spin_lock_init(&global_trace.start_lock);
9481
9482         /*
9483          * The prepare callbacks allocates some memory for the ring buffer. We
9484          * don't free the buffer if the if the CPU goes down. If we were to free
9485          * the buffer, then the user would lose any trace that was in the
9486          * buffer. The memory will be removed once the "instance" is removed.
9487          */
9488         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9489                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9490                                       NULL);
9491         if (ret < 0)
9492                 goto out_free_cpumask;
9493         /* Used for event triggers */
9494         ret = -ENOMEM;
9495         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9496         if (!temp_buffer)
9497                 goto out_rm_hp_state;
9498
9499         if (trace_create_savedcmd() < 0)
9500                 goto out_free_temp_buffer;
9501
9502         /* TODO: make the number of buffers hot pluggable with CPUS */
9503         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9504                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9505                 goto out_free_savedcmd;
9506         }
9507
9508         if (global_trace.buffer_disabled)
9509                 tracing_off();
9510
9511         if (trace_boot_clock) {
9512                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9513                 if (ret < 0)
9514                         pr_warn("Trace clock %s not defined, going back to default\n",
9515                                 trace_boot_clock);
9516         }
9517
9518         /*
9519          * register_tracer() might reference current_trace, so it
9520          * needs to be set before we register anything. This is
9521          * just a bootstrap of current_trace anyway.
9522          */
9523         global_trace.current_trace = &nop_trace;
9524
9525         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9526
9527         ftrace_init_global_array_ops(&global_trace);
9528
9529         init_trace_flags_index(&global_trace);
9530
9531         register_tracer(&nop_trace);
9532
9533         /* Function tracing may start here (via kernel command line) */
9534         init_function_trace();
9535
9536         /* All seems OK, enable tracing */
9537         tracing_disabled = 0;
9538
9539         atomic_notifier_chain_register(&panic_notifier_list,
9540                                        &trace_panic_notifier);
9541
9542         register_die_notifier(&trace_die_notifier);
9543
9544         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9545
9546         INIT_LIST_HEAD(&global_trace.systems);
9547         INIT_LIST_HEAD(&global_trace.events);
9548         INIT_LIST_HEAD(&global_trace.hist_vars);
9549         INIT_LIST_HEAD(&global_trace.err_log);
9550         list_add(&global_trace.list, &ftrace_trace_arrays);
9551
9552         apply_trace_boot_options();
9553
9554         register_snapshot_cmd();
9555
9556         return 0;
9557
9558 out_free_savedcmd:
9559         free_saved_cmdlines_buffer(savedcmd);
9560 out_free_temp_buffer:
9561         ring_buffer_free(temp_buffer);
9562 out_rm_hp_state:
9563         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9564 out_free_cpumask:
9565         free_cpumask_var(global_trace.tracing_cpumask);
9566 out_free_buffer_mask:
9567         free_cpumask_var(tracing_buffer_mask);
9568 out:
9569         return ret;
9570 }
9571
9572 void __init early_trace_init(void)
9573 {
9574         if (tracepoint_printk) {
9575                 tracepoint_print_iter =
9576                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9577                 if (MEM_FAIL(!tracepoint_print_iter,
9578                              "Failed to allocate trace iterator\n"))
9579                         tracepoint_printk = 0;
9580                 else
9581                         static_key_enable(&tracepoint_printk_key.key);
9582         }
9583         tracer_alloc_buffers();
9584 }
9585
9586 void __init trace_init(void)
9587 {
9588         trace_event_init();
9589 }
9590
9591 __init static int clear_boot_tracer(void)
9592 {
9593         /*
9594          * The default tracer at boot buffer is an init section.
9595          * This function is called in lateinit. If we did not
9596          * find the boot tracer, then clear it out, to prevent
9597          * later registration from accessing the buffer that is
9598          * about to be freed.
9599          */
9600         if (!default_bootup_tracer)
9601                 return 0;
9602
9603         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9604                default_bootup_tracer);
9605         default_bootup_tracer = NULL;
9606
9607         return 0;
9608 }
9609
9610 fs_initcall(tracer_init_tracefs);
9611 late_initcall_sync(clear_boot_tracer);
9612
9613 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9614 __init static int tracing_set_default_clock(void)
9615 {
9616         /* sched_clock_stable() is determined in late_initcall */
9617         if (!trace_boot_clock && !sched_clock_stable()) {
9618                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9619                         pr_warn("Can not set tracing clock due to lockdown\n");
9620                         return -EPERM;
9621                 }
9622
9623                 printk(KERN_WARNING
9624                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9625                        "If you want to keep using the local clock, then add:\n"
9626                        "  \"trace_clock=local\"\n"
9627                        "on the kernel command line\n");
9628                 tracing_set_clock(&global_trace, "global");
9629         }
9630
9631         return 0;
9632 }
9633 late_initcall_sync(tracing_set_default_clock);
9634 #endif