7fd432334ff521240942f2ce9bb7565c91504212
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
412
413 /* trace_options that are only supported by global_trace */
414 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
415                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
416
417 /* trace_flags that are default zero for instances */
418 #define ZEROED_TRACE_FLAGS \
419         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
420
421 /*
422  * The global_trace is the descriptor that holds the top-level tracing
423  * buffers for the live tracing.
424  */
425 static struct trace_array global_trace = {
426         .trace_flags = TRACE_DEFAULT_FLAGS,
427 };
428
429 LIST_HEAD(ftrace_trace_arrays);
430
431 int trace_array_get(struct trace_array *this_tr)
432 {
433         struct trace_array *tr;
434         int ret = -ENODEV;
435
436         mutex_lock(&trace_types_lock);
437         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
438                 if (tr == this_tr) {
439                         tr->ref++;
440                         ret = 0;
441                         break;
442                 }
443         }
444         mutex_unlock(&trace_types_lock);
445
446         return ret;
447 }
448
449 static void __trace_array_put(struct trace_array *this_tr)
450 {
451         WARN_ON(!this_tr->ref);
452         this_tr->ref--;
453 }
454
455 /**
456  * trace_array_put - Decrement the reference counter for this trace array.
457  * @this_tr : pointer to the trace array
458  *
459  * NOTE: Use this when we no longer need the trace array returned by
460  * trace_array_get_by_name(). This ensures the trace array can be later
461  * destroyed.
462  *
463  */
464 void trace_array_put(struct trace_array *this_tr)
465 {
466         if (!this_tr)
467                 return;
468
469         mutex_lock(&trace_types_lock);
470         __trace_array_put(this_tr);
471         mutex_unlock(&trace_types_lock);
472 }
473 EXPORT_SYMBOL_GPL(trace_array_put);
474
475 int tracing_check_open_get_tr(struct trace_array *tr)
476 {
477         int ret;
478
479         ret = security_locked_down(LOCKDOWN_TRACEFS);
480         if (ret)
481                 return ret;
482
483         if (tracing_disabled)
484                 return -ENODEV;
485
486         if (tr && trace_array_get(tr) < 0)
487                 return -ENODEV;
488
489         return 0;
490 }
491
492 int call_filter_check_discard(struct trace_event_call *call, void *rec,
493                               struct trace_buffer *buffer,
494                               struct ring_buffer_event *event)
495 {
496         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
497             !filter_match_preds(call->filter, rec)) {
498                 __trace_event_discard_commit(buffer, event);
499                 return 1;
500         }
501
502         return 0;
503 }
504
505 void trace_free_pid_list(struct trace_pid_list *pid_list)
506 {
507         vfree(pid_list->pids);
508         kfree(pid_list);
509 }
510
511 /**
512  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
513  * @filtered_pids: The list of pids to check
514  * @search_pid: The PID to find in @filtered_pids
515  *
516  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
517  */
518 bool
519 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
520 {
521         /*
522          * If pid_max changed after filtered_pids was created, we
523          * by default ignore all pids greater than the previous pid_max.
524          */
525         if (search_pid >= filtered_pids->pid_max)
526                 return false;
527
528         return test_bit(search_pid, filtered_pids->pids);
529 }
530
531 /**
532  * trace_ignore_this_task - should a task be ignored for tracing
533  * @filtered_pids: The list of pids to check
534  * @filtered_no_pids: The list of pids not to be traced
535  * @task: The task that should be ignored if not filtered
536  *
537  * Checks if @task should be traced or not from @filtered_pids.
538  * Returns true if @task should *NOT* be traced.
539  * Returns false if @task should be traced.
540  */
541 bool
542 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
543                        struct trace_pid_list *filtered_no_pids,
544                        struct task_struct *task)
545 {
546         /*
547          * If filterd_no_pids is not empty, and the task's pid is listed
548          * in filtered_no_pids, then return true.
549          * Otherwise, if filtered_pids is empty, that means we can
550          * trace all tasks. If it has content, then only trace pids
551          * within filtered_pids.
552          */
553
554         return (filtered_pids &&
555                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
556                 (filtered_no_pids &&
557                  trace_find_filtered_pid(filtered_no_pids, task->pid));
558 }
559
560 /**
561  * trace_filter_add_remove_task - Add or remove a task from a pid_list
562  * @pid_list: The list to modify
563  * @self: The current task for fork or NULL for exit
564  * @task: The task to add or remove
565  *
566  * If adding a task, if @self is defined, the task is only added if @self
567  * is also included in @pid_list. This happens on fork and tasks should
568  * only be added when the parent is listed. If @self is NULL, then the
569  * @task pid will be removed from the list, which would happen on exit
570  * of a task.
571  */
572 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
573                                   struct task_struct *self,
574                                   struct task_struct *task)
575 {
576         if (!pid_list)
577                 return;
578
579         /* For forks, we only add if the forking task is listed */
580         if (self) {
581                 if (!trace_find_filtered_pid(pid_list, self->pid))
582                         return;
583         }
584
585         /* Sorry, but we don't support pid_max changing after setting */
586         if (task->pid >= pid_list->pid_max)
587                 return;
588
589         /* "self" is set for forks, and NULL for exits */
590         if (self)
591                 set_bit(task->pid, pid_list->pids);
592         else
593                 clear_bit(task->pid, pid_list->pids);
594 }
595
596 /**
597  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
598  * @pid_list: The pid list to show
599  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
600  * @pos: The position of the file
601  *
602  * This is used by the seq_file "next" operation to iterate the pids
603  * listed in a trace_pid_list structure.
604  *
605  * Returns the pid+1 as we want to display pid of zero, but NULL would
606  * stop the iteration.
607  */
608 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
609 {
610         unsigned long pid = (unsigned long)v;
611
612         (*pos)++;
613
614         /* pid already is +1 of the actual prevous bit */
615         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
616
617         /* Return pid + 1 to allow zero to be represented */
618         if (pid < pid_list->pid_max)
619                 return (void *)(pid + 1);
620
621         return NULL;
622 }
623
624 /**
625  * trace_pid_start - Used for seq_file to start reading pid lists
626  * @pid_list: The pid list to show
627  * @pos: The position of the file
628  *
629  * This is used by seq_file "start" operation to start the iteration
630  * of listing pids.
631  *
632  * Returns the pid+1 as we want to display pid of zero, but NULL would
633  * stop the iteration.
634  */
635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
636 {
637         unsigned long pid;
638         loff_t l = 0;
639
640         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
641         if (pid >= pid_list->pid_max)
642                 return NULL;
643
644         /* Return pid + 1 so that zero can be the exit value */
645         for (pid++; pid && l < *pos;
646              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
647                 ;
648         return (void *)pid;
649 }
650
651 /**
652  * trace_pid_show - show the current pid in seq_file processing
653  * @m: The seq_file structure to write into
654  * @v: A void pointer of the pid (+1) value to display
655  *
656  * Can be directly used by seq_file operations to display the current
657  * pid value.
658  */
659 int trace_pid_show(struct seq_file *m, void *v)
660 {
661         unsigned long pid = (unsigned long)v - 1;
662
663         seq_printf(m, "%lu\n", pid);
664         return 0;
665 }
666
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE            127
669
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671                     struct trace_pid_list **new_pid_list,
672                     const char __user *ubuf, size_t cnt)
673 {
674         struct trace_pid_list *pid_list;
675         struct trace_parser parser;
676         unsigned long val;
677         int nr_pids = 0;
678         ssize_t read = 0;
679         ssize_t ret = 0;
680         loff_t pos;
681         pid_t pid;
682
683         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
684                 return -ENOMEM;
685
686         /*
687          * Always recreate a new array. The write is an all or nothing
688          * operation. Always create a new array when adding new pids by
689          * the user. If the operation fails, then the current list is
690          * not modified.
691          */
692         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
693         if (!pid_list) {
694                 trace_parser_put(&parser);
695                 return -ENOMEM;
696         }
697
698         pid_list->pid_max = READ_ONCE(pid_max);
699
700         /* Only truncating will shrink pid_max */
701         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
702                 pid_list->pid_max = filtered_pids->pid_max;
703
704         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
705         if (!pid_list->pids) {
706                 trace_parser_put(&parser);
707                 kfree(pid_list);
708                 return -ENOMEM;
709         }
710
711         if (filtered_pids) {
712                 /* copy the current bits to the new max */
713                 for_each_set_bit(pid, filtered_pids->pids,
714                                  filtered_pids->pid_max) {
715                         set_bit(pid, pid_list->pids);
716                         nr_pids++;
717                 }
718         }
719
720         while (cnt > 0) {
721
722                 pos = 0;
723
724                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
725                 if (ret < 0 || !trace_parser_loaded(&parser))
726                         break;
727
728                 read += ret;
729                 ubuf += ret;
730                 cnt -= ret;
731
732                 ret = -EINVAL;
733                 if (kstrtoul(parser.buffer, 0, &val))
734                         break;
735                 if (val >= pid_list->pid_max)
736                         break;
737
738                 pid = (pid_t)val;
739
740                 set_bit(pid, pid_list->pids);
741                 nr_pids++;
742
743                 trace_parser_clear(&parser);
744                 ret = 0;
745         }
746         trace_parser_put(&parser);
747
748         if (ret < 0) {
749                 trace_free_pid_list(pid_list);
750                 return ret;
751         }
752
753         if (!nr_pids) {
754                 /* Cleared the list of pids */
755                 trace_free_pid_list(pid_list);
756                 read = ret;
757                 pid_list = NULL;
758         }
759
760         *new_pid_list = pid_list;
761
762         return read;
763 }
764
765 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
766 {
767         u64 ts;
768
769         /* Early boot up does not have a buffer yet */
770         if (!buf->buffer)
771                 return trace_clock_local();
772
773         ts = ring_buffer_time_stamp(buf->buffer, cpu);
774         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
775
776         return ts;
777 }
778
779 u64 ftrace_now(int cpu)
780 {
781         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
782 }
783
784 /**
785  * tracing_is_enabled - Show if global_trace has been enabled
786  *
787  * Shows if the global trace has been enabled or not. It uses the
788  * mirror flag "buffer_disabled" to be used in fast paths such as for
789  * the irqsoff tracer. But it may be inaccurate due to races. If you
790  * need to know the accurate state, use tracing_is_on() which is a little
791  * slower, but accurate.
792  */
793 int tracing_is_enabled(void)
794 {
795         /*
796          * For quick access (irqsoff uses this in fast path), just
797          * return the mirror variable of the state of the ring buffer.
798          * It's a little racy, but we don't really care.
799          */
800         smp_rmb();
801         return !global_trace.buffer_disabled;
802 }
803
804 /*
805  * trace_buf_size is the size in bytes that is allocated
806  * for a buffer. Note, the number of bytes is always rounded
807  * to page size.
808  *
809  * This number is purposely set to a low number of 16384.
810  * If the dump on oops happens, it will be much appreciated
811  * to not have to wait for all that output. Anyway this can be
812  * boot time and run time configurable.
813  */
814 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
815
816 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
817
818 /* trace_types holds a link list of available tracers. */
819 static struct tracer            *trace_types __read_mostly;
820
821 /*
822  * trace_types_lock is used to protect the trace_types list.
823  */
824 DEFINE_MUTEX(trace_types_lock);
825
826 /*
827  * serialize the access of the ring buffer
828  *
829  * ring buffer serializes readers, but it is low level protection.
830  * The validity of the events (which returns by ring_buffer_peek() ..etc)
831  * are not protected by ring buffer.
832  *
833  * The content of events may become garbage if we allow other process consumes
834  * these events concurrently:
835  *   A) the page of the consumed events may become a normal page
836  *      (not reader page) in ring buffer, and this page will be rewrited
837  *      by events producer.
838  *   B) The page of the consumed events may become a page for splice_read,
839  *      and this page will be returned to system.
840  *
841  * These primitives allow multi process access to different cpu ring buffer
842  * concurrently.
843  *
844  * These primitives don't distinguish read-only and read-consume access.
845  * Multi read-only access are also serialized.
846  */
847
848 #ifdef CONFIG_SMP
849 static DECLARE_RWSEM(all_cpu_access_lock);
850 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
851
852 static inline void trace_access_lock(int cpu)
853 {
854         if (cpu == RING_BUFFER_ALL_CPUS) {
855                 /* gain it for accessing the whole ring buffer. */
856                 down_write(&all_cpu_access_lock);
857         } else {
858                 /* gain it for accessing a cpu ring buffer. */
859
860                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
861                 down_read(&all_cpu_access_lock);
862
863                 /* Secondly block other access to this @cpu ring buffer. */
864                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
865         }
866 }
867
868 static inline void trace_access_unlock(int cpu)
869 {
870         if (cpu == RING_BUFFER_ALL_CPUS) {
871                 up_write(&all_cpu_access_lock);
872         } else {
873                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
874                 up_read(&all_cpu_access_lock);
875         }
876 }
877
878 static inline void trace_access_lock_init(void)
879 {
880         int cpu;
881
882         for_each_possible_cpu(cpu)
883                 mutex_init(&per_cpu(cpu_access_lock, cpu));
884 }
885
886 #else
887
888 static DEFINE_MUTEX(access_lock);
889
890 static inline void trace_access_lock(int cpu)
891 {
892         (void)cpu;
893         mutex_lock(&access_lock);
894 }
895
896 static inline void trace_access_unlock(int cpu)
897 {
898         (void)cpu;
899         mutex_unlock(&access_lock);
900 }
901
902 static inline void trace_access_lock_init(void)
903 {
904 }
905
906 #endif
907
908 #ifdef CONFIG_STACKTRACE
909 static void __ftrace_trace_stack(struct trace_buffer *buffer,
910                                  unsigned int trace_ctx,
911                                  int skip, struct pt_regs *regs);
912 static inline void ftrace_trace_stack(struct trace_array *tr,
913                                       struct trace_buffer *buffer,
914                                       unsigned int trace_ctx,
915                                       int skip, struct pt_regs *regs);
916
917 #else
918 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                         unsigned int trace_ctx,
920                                         int skip, struct pt_regs *regs)
921 {
922 }
923 static inline void ftrace_trace_stack(struct trace_array *tr,
924                                       struct trace_buffer *buffer,
925                                       unsigned long trace_ctx,
926                                       int skip, struct pt_regs *regs)
927 {
928 }
929
930 #endif
931
932 static __always_inline void
933 trace_event_setup(struct ring_buffer_event *event,
934                   int type, unsigned int trace_ctx)
935 {
936         struct trace_entry *ent = ring_buffer_event_data(event);
937
938         tracing_generic_entry_update(ent, type, trace_ctx);
939 }
940
941 static __always_inline struct ring_buffer_event *
942 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
943                           int type,
944                           unsigned long len,
945                           unsigned int trace_ctx)
946 {
947         struct ring_buffer_event *event;
948
949         event = ring_buffer_lock_reserve(buffer, len);
950         if (event != NULL)
951                 trace_event_setup(event, type, trace_ctx);
952
953         return event;
954 }
955
956 void tracer_tracing_on(struct trace_array *tr)
957 {
958         if (tr->array_buffer.buffer)
959                 ring_buffer_record_on(tr->array_buffer.buffer);
960         /*
961          * This flag is looked at when buffers haven't been allocated
962          * yet, or by some tracers (like irqsoff), that just want to
963          * know if the ring buffer has been disabled, but it can handle
964          * races of where it gets disabled but we still do a record.
965          * As the check is in the fast path of the tracers, it is more
966          * important to be fast than accurate.
967          */
968         tr->buffer_disabled = 0;
969         /* Make the flag seen by readers */
970         smp_wmb();
971 }
972
973 /**
974  * tracing_on - enable tracing buffers
975  *
976  * This function enables tracing buffers that may have been
977  * disabled with tracing_off.
978  */
979 void tracing_on(void)
980 {
981         tracer_tracing_on(&global_trace);
982 }
983 EXPORT_SYMBOL_GPL(tracing_on);
984
985
986 static __always_inline void
987 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
988 {
989         __this_cpu_write(trace_taskinfo_save, true);
990
991         /* If this is the temp buffer, we need to commit fully */
992         if (this_cpu_read(trace_buffered_event) == event) {
993                 /* Length is in event->array[0] */
994                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
995                 /* Release the temp buffer */
996                 this_cpu_dec(trace_buffered_event_cnt);
997         } else
998                 ring_buffer_unlock_commit(buffer, event);
999 }
1000
1001 /**
1002  * __trace_puts - write a constant string into the trace buffer.
1003  * @ip:    The address of the caller
1004  * @str:   The constant string to write
1005  * @size:  The size of the string.
1006  */
1007 int __trace_puts(unsigned long ip, const char *str, int size)
1008 {
1009         struct ring_buffer_event *event;
1010         struct trace_buffer *buffer;
1011         struct print_entry *entry;
1012         unsigned int trace_ctx;
1013         int alloc;
1014
1015         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1016                 return 0;
1017
1018         if (unlikely(tracing_selftest_running || tracing_disabled))
1019                 return 0;
1020
1021         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1022
1023         trace_ctx = tracing_gen_ctx();
1024         buffer = global_trace.array_buffer.buffer;
1025         ring_buffer_nest_start(buffer);
1026         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1027                                             trace_ctx);
1028         if (!event) {
1029                 size = 0;
1030                 goto out;
1031         }
1032
1033         entry = ring_buffer_event_data(event);
1034         entry->ip = ip;
1035
1036         memcpy(&entry->buf, str, size);
1037
1038         /* Add a newline if necessary */
1039         if (entry->buf[size - 1] != '\n') {
1040                 entry->buf[size] = '\n';
1041                 entry->buf[size + 1] = '\0';
1042         } else
1043                 entry->buf[size] = '\0';
1044
1045         __buffer_unlock_commit(buffer, event);
1046         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1047  out:
1048         ring_buffer_nest_end(buffer);
1049         return size;
1050 }
1051 EXPORT_SYMBOL_GPL(__trace_puts);
1052
1053 /**
1054  * __trace_bputs - write the pointer to a constant string into trace buffer
1055  * @ip:    The address of the caller
1056  * @str:   The constant string to write to the buffer to
1057  */
1058 int __trace_bputs(unsigned long ip, const char *str)
1059 {
1060         struct ring_buffer_event *event;
1061         struct trace_buffer *buffer;
1062         struct bputs_entry *entry;
1063         unsigned int trace_ctx;
1064         int size = sizeof(struct bputs_entry);
1065         int ret = 0;
1066
1067         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1068                 return 0;
1069
1070         if (unlikely(tracing_selftest_running || tracing_disabled))
1071                 return 0;
1072
1073         trace_ctx = tracing_gen_ctx();
1074         buffer = global_trace.array_buffer.buffer;
1075
1076         ring_buffer_nest_start(buffer);
1077         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1078                                             trace_ctx);
1079         if (!event)
1080                 goto out;
1081
1082         entry = ring_buffer_event_data(event);
1083         entry->ip                       = ip;
1084         entry->str                      = str;
1085
1086         __buffer_unlock_commit(buffer, event);
1087         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1088
1089         ret = 1;
1090  out:
1091         ring_buffer_nest_end(buffer);
1092         return ret;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_bputs);
1095
1096 #ifdef CONFIG_TRACER_SNAPSHOT
1097 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1098                                            void *cond_data)
1099 {
1100         struct tracer *tracer = tr->current_trace;
1101         unsigned long flags;
1102
1103         if (in_nmi()) {
1104                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1105                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1106                 return;
1107         }
1108
1109         if (!tr->allocated_snapshot) {
1110                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1111                 internal_trace_puts("*** stopping trace here!   ***\n");
1112                 tracing_off();
1113                 return;
1114         }
1115
1116         /* Note, snapshot can not be used when the tracer uses it */
1117         if (tracer->use_max_tr) {
1118                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1119                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1120                 return;
1121         }
1122
1123         local_irq_save(flags);
1124         update_max_tr(tr, current, smp_processor_id(), cond_data);
1125         local_irq_restore(flags);
1126 }
1127
1128 void tracing_snapshot_instance(struct trace_array *tr)
1129 {
1130         tracing_snapshot_instance_cond(tr, NULL);
1131 }
1132
1133 /**
1134  * tracing_snapshot - take a snapshot of the current buffer.
1135  *
1136  * This causes a swap between the snapshot buffer and the current live
1137  * tracing buffer. You can use this to take snapshots of the live
1138  * trace when some condition is triggered, but continue to trace.
1139  *
1140  * Note, make sure to allocate the snapshot with either
1141  * a tracing_snapshot_alloc(), or by doing it manually
1142  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1143  *
1144  * If the snapshot buffer is not allocated, it will stop tracing.
1145  * Basically making a permanent snapshot.
1146  */
1147 void tracing_snapshot(void)
1148 {
1149         struct trace_array *tr = &global_trace;
1150
1151         tracing_snapshot_instance(tr);
1152 }
1153 EXPORT_SYMBOL_GPL(tracing_snapshot);
1154
1155 /**
1156  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1157  * @tr:         The tracing instance to snapshot
1158  * @cond_data:  The data to be tested conditionally, and possibly saved
1159  *
1160  * This is the same as tracing_snapshot() except that the snapshot is
1161  * conditional - the snapshot will only happen if the
1162  * cond_snapshot.update() implementation receiving the cond_data
1163  * returns true, which means that the trace array's cond_snapshot
1164  * update() operation used the cond_data to determine whether the
1165  * snapshot should be taken, and if it was, presumably saved it along
1166  * with the snapshot.
1167  */
1168 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1169 {
1170         tracing_snapshot_instance_cond(tr, cond_data);
1171 }
1172 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1173
1174 /**
1175  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1176  * @tr:         The tracing instance
1177  *
1178  * When the user enables a conditional snapshot using
1179  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1180  * with the snapshot.  This accessor is used to retrieve it.
1181  *
1182  * Should not be called from cond_snapshot.update(), since it takes
1183  * the tr->max_lock lock, which the code calling
1184  * cond_snapshot.update() has already done.
1185  *
1186  * Returns the cond_data associated with the trace array's snapshot.
1187  */
1188 void *tracing_cond_snapshot_data(struct trace_array *tr)
1189 {
1190         void *cond_data = NULL;
1191
1192         arch_spin_lock(&tr->max_lock);
1193
1194         if (tr->cond_snapshot)
1195                 cond_data = tr->cond_snapshot->cond_data;
1196
1197         arch_spin_unlock(&tr->max_lock);
1198
1199         return cond_data;
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1202
1203 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1204                                         struct array_buffer *size_buf, int cpu_id);
1205 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1206
1207 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1208 {
1209         int ret;
1210
1211         if (!tr->allocated_snapshot) {
1212
1213                 /* allocate spare buffer */
1214                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1215                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1216                 if (ret < 0)
1217                         return ret;
1218
1219                 tr->allocated_snapshot = true;
1220         }
1221
1222         return 0;
1223 }
1224
1225 static void free_snapshot(struct trace_array *tr)
1226 {
1227         /*
1228          * We don't free the ring buffer. instead, resize it because
1229          * The max_tr ring buffer has some state (e.g. ring->clock) and
1230          * we want preserve it.
1231          */
1232         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1233         set_buffer_entries(&tr->max_buffer, 1);
1234         tracing_reset_online_cpus(&tr->max_buffer);
1235         tr->allocated_snapshot = false;
1236 }
1237
1238 /**
1239  * tracing_alloc_snapshot - allocate snapshot buffer.
1240  *
1241  * This only allocates the snapshot buffer if it isn't already
1242  * allocated - it doesn't also take a snapshot.
1243  *
1244  * This is meant to be used in cases where the snapshot buffer needs
1245  * to be set up for events that can't sleep but need to be able to
1246  * trigger a snapshot.
1247  */
1248 int tracing_alloc_snapshot(void)
1249 {
1250         struct trace_array *tr = &global_trace;
1251         int ret;
1252
1253         ret = tracing_alloc_snapshot_instance(tr);
1254         WARN_ON(ret < 0);
1255
1256         return ret;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1259
1260 /**
1261  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1262  *
1263  * This is similar to tracing_snapshot(), but it will allocate the
1264  * snapshot buffer if it isn't already allocated. Use this only
1265  * where it is safe to sleep, as the allocation may sleep.
1266  *
1267  * This causes a swap between the snapshot buffer and the current live
1268  * tracing buffer. You can use this to take snapshots of the live
1269  * trace when some condition is triggered, but continue to trace.
1270  */
1271 void tracing_snapshot_alloc(void)
1272 {
1273         int ret;
1274
1275         ret = tracing_alloc_snapshot();
1276         if (ret < 0)
1277                 return;
1278
1279         tracing_snapshot();
1280 }
1281 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1282
1283 /**
1284  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1285  * @tr:         The tracing instance
1286  * @cond_data:  User data to associate with the snapshot
1287  * @update:     Implementation of the cond_snapshot update function
1288  *
1289  * Check whether the conditional snapshot for the given instance has
1290  * already been enabled, or if the current tracer is already using a
1291  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1292  * save the cond_data and update function inside.
1293  *
1294  * Returns 0 if successful, error otherwise.
1295  */
1296 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1297                                  cond_update_fn_t update)
1298 {
1299         struct cond_snapshot *cond_snapshot;
1300         int ret = 0;
1301
1302         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1303         if (!cond_snapshot)
1304                 return -ENOMEM;
1305
1306         cond_snapshot->cond_data = cond_data;
1307         cond_snapshot->update = update;
1308
1309         mutex_lock(&trace_types_lock);
1310
1311         ret = tracing_alloc_snapshot_instance(tr);
1312         if (ret)
1313                 goto fail_unlock;
1314
1315         if (tr->current_trace->use_max_tr) {
1316                 ret = -EBUSY;
1317                 goto fail_unlock;
1318         }
1319
1320         /*
1321          * The cond_snapshot can only change to NULL without the
1322          * trace_types_lock. We don't care if we race with it going
1323          * to NULL, but we want to make sure that it's not set to
1324          * something other than NULL when we get here, which we can
1325          * do safely with only holding the trace_types_lock and not
1326          * having to take the max_lock.
1327          */
1328         if (tr->cond_snapshot) {
1329                 ret = -EBUSY;
1330                 goto fail_unlock;
1331         }
1332
1333         arch_spin_lock(&tr->max_lock);
1334         tr->cond_snapshot = cond_snapshot;
1335         arch_spin_unlock(&tr->max_lock);
1336
1337         mutex_unlock(&trace_types_lock);
1338
1339         return ret;
1340
1341  fail_unlock:
1342         mutex_unlock(&trace_types_lock);
1343         kfree(cond_snapshot);
1344         return ret;
1345 }
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1347
1348 /**
1349  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1350  * @tr:         The tracing instance
1351  *
1352  * Check whether the conditional snapshot for the given instance is
1353  * enabled; if so, free the cond_snapshot associated with it,
1354  * otherwise return -EINVAL.
1355  *
1356  * Returns 0 if successful, error otherwise.
1357  */
1358 int tracing_snapshot_cond_disable(struct trace_array *tr)
1359 {
1360         int ret = 0;
1361
1362         arch_spin_lock(&tr->max_lock);
1363
1364         if (!tr->cond_snapshot)
1365                 ret = -EINVAL;
1366         else {
1367                 kfree(tr->cond_snapshot);
1368                 tr->cond_snapshot = NULL;
1369         }
1370
1371         arch_spin_unlock(&tr->max_lock);
1372
1373         return ret;
1374 }
1375 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1376 #else
1377 void tracing_snapshot(void)
1378 {
1379         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1380 }
1381 EXPORT_SYMBOL_GPL(tracing_snapshot);
1382 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1383 {
1384         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1385 }
1386 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1387 int tracing_alloc_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1390         return -ENODEV;
1391 }
1392 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1393 void tracing_snapshot_alloc(void)
1394 {
1395         /* Give warning */
1396         tracing_snapshot();
1397 }
1398 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1399 void *tracing_cond_snapshot_data(struct trace_array *tr)
1400 {
1401         return NULL;
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1404 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1405 {
1406         return -ENODEV;
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1409 int tracing_snapshot_cond_disable(struct trace_array *tr)
1410 {
1411         return false;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1414 #endif /* CONFIG_TRACER_SNAPSHOT */
1415
1416 void tracer_tracing_off(struct trace_array *tr)
1417 {
1418         if (tr->array_buffer.buffer)
1419                 ring_buffer_record_off(tr->array_buffer.buffer);
1420         /*
1421          * This flag is looked at when buffers haven't been allocated
1422          * yet, or by some tracers (like irqsoff), that just want to
1423          * know if the ring buffer has been disabled, but it can handle
1424          * races of where it gets disabled but we still do a record.
1425          * As the check is in the fast path of the tracers, it is more
1426          * important to be fast than accurate.
1427          */
1428         tr->buffer_disabled = 1;
1429         /* Make the flag seen by readers */
1430         smp_wmb();
1431 }
1432
1433 /**
1434  * tracing_off - turn off tracing buffers
1435  *
1436  * This function stops the tracing buffers from recording data.
1437  * It does not disable any overhead the tracers themselves may
1438  * be causing. This function simply causes all recording to
1439  * the ring buffers to fail.
1440  */
1441 void tracing_off(void)
1442 {
1443         tracer_tracing_off(&global_trace);
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_off);
1446
1447 void disable_trace_on_warning(void)
1448 {
1449         if (__disable_trace_on_warning) {
1450                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1451                         "Disabling tracing due to warning\n");
1452                 tracing_off();
1453         }
1454 }
1455
1456 /**
1457  * tracer_tracing_is_on - show real state of ring buffer enabled
1458  * @tr : the trace array to know if ring buffer is enabled
1459  *
1460  * Shows real state of the ring buffer if it is enabled or not.
1461  */
1462 bool tracer_tracing_is_on(struct trace_array *tr)
1463 {
1464         if (tr->array_buffer.buffer)
1465                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1466         return !tr->buffer_disabled;
1467 }
1468
1469 /**
1470  * tracing_is_on - show state of ring buffers enabled
1471  */
1472 int tracing_is_on(void)
1473 {
1474         return tracer_tracing_is_on(&global_trace);
1475 }
1476 EXPORT_SYMBOL_GPL(tracing_is_on);
1477
1478 static int __init set_buf_size(char *str)
1479 {
1480         unsigned long buf_size;
1481
1482         if (!str)
1483                 return 0;
1484         buf_size = memparse(str, &str);
1485         /* nr_entries can not be zero */
1486         if (buf_size == 0)
1487                 return 0;
1488         trace_buf_size = buf_size;
1489         return 1;
1490 }
1491 __setup("trace_buf_size=", set_buf_size);
1492
1493 static int __init set_tracing_thresh(char *str)
1494 {
1495         unsigned long threshold;
1496         int ret;
1497
1498         if (!str)
1499                 return 0;
1500         ret = kstrtoul(str, 0, &threshold);
1501         if (ret < 0)
1502                 return 0;
1503         tracing_thresh = threshold * 1000;
1504         return 1;
1505 }
1506 __setup("tracing_thresh=", set_tracing_thresh);
1507
1508 unsigned long nsecs_to_usecs(unsigned long nsecs)
1509 {
1510         return nsecs / 1000;
1511 }
1512
1513 /*
1514  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1515  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1516  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1517  * of strings in the order that the evals (enum) were defined.
1518  */
1519 #undef C
1520 #define C(a, b) b
1521
1522 /* These must match the bit postions in trace_iterator_flags */
1523 static const char *trace_options[] = {
1524         TRACE_FLAGS
1525         NULL
1526 };
1527
1528 static struct {
1529         u64 (*func)(void);
1530         const char *name;
1531         int in_ns;              /* is this clock in nanoseconds? */
1532 } trace_clocks[] = {
1533         { trace_clock_local,            "local",        1 },
1534         { trace_clock_global,           "global",       1 },
1535         { trace_clock_counter,          "counter",      0 },
1536         { trace_clock_jiffies,          "uptime",       0 },
1537         { trace_clock,                  "perf",         1 },
1538         { ktime_get_mono_fast_ns,       "mono",         1 },
1539         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1540         { ktime_get_boot_fast_ns,       "boot",         1 },
1541         ARCH_TRACE_CLOCKS
1542 };
1543
1544 bool trace_clock_in_ns(struct trace_array *tr)
1545 {
1546         if (trace_clocks[tr->clock_id].in_ns)
1547                 return true;
1548
1549         return false;
1550 }
1551
1552 /*
1553  * trace_parser_get_init - gets the buffer for trace parser
1554  */
1555 int trace_parser_get_init(struct trace_parser *parser, int size)
1556 {
1557         memset(parser, 0, sizeof(*parser));
1558
1559         parser->buffer = kmalloc(size, GFP_KERNEL);
1560         if (!parser->buffer)
1561                 return 1;
1562
1563         parser->size = size;
1564         return 0;
1565 }
1566
1567 /*
1568  * trace_parser_put - frees the buffer for trace parser
1569  */
1570 void trace_parser_put(struct trace_parser *parser)
1571 {
1572         kfree(parser->buffer);
1573         parser->buffer = NULL;
1574 }
1575
1576 /*
1577  * trace_get_user - reads the user input string separated by  space
1578  * (matched by isspace(ch))
1579  *
1580  * For each string found the 'struct trace_parser' is updated,
1581  * and the function returns.
1582  *
1583  * Returns number of bytes read.
1584  *
1585  * See kernel/trace/trace.h for 'struct trace_parser' details.
1586  */
1587 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1588         size_t cnt, loff_t *ppos)
1589 {
1590         char ch;
1591         size_t read = 0;
1592         ssize_t ret;
1593
1594         if (!*ppos)
1595                 trace_parser_clear(parser);
1596
1597         ret = get_user(ch, ubuf++);
1598         if (ret)
1599                 goto out;
1600
1601         read++;
1602         cnt--;
1603
1604         /*
1605          * The parser is not finished with the last write,
1606          * continue reading the user input without skipping spaces.
1607          */
1608         if (!parser->cont) {
1609                 /* skip white space */
1610                 while (cnt && isspace(ch)) {
1611                         ret = get_user(ch, ubuf++);
1612                         if (ret)
1613                                 goto out;
1614                         read++;
1615                         cnt--;
1616                 }
1617
1618                 parser->idx = 0;
1619
1620                 /* only spaces were written */
1621                 if (isspace(ch) || !ch) {
1622                         *ppos += read;
1623                         ret = read;
1624                         goto out;
1625                 }
1626         }
1627
1628         /* read the non-space input */
1629         while (cnt && !isspace(ch) && ch) {
1630                 if (parser->idx < parser->size - 1)
1631                         parser->buffer[parser->idx++] = ch;
1632                 else {
1633                         ret = -EINVAL;
1634                         goto out;
1635                 }
1636                 ret = get_user(ch, ubuf++);
1637                 if (ret)
1638                         goto out;
1639                 read++;
1640                 cnt--;
1641         }
1642
1643         /* We either got finished input or we have to wait for another call. */
1644         if (isspace(ch) || !ch) {
1645                 parser->buffer[parser->idx] = 0;
1646                 parser->cont = false;
1647         } else if (parser->idx < parser->size - 1) {
1648                 parser->cont = true;
1649                 parser->buffer[parser->idx++] = ch;
1650                 /* Make sure the parsed string always terminates with '\0'. */
1651                 parser->buffer[parser->idx] = 0;
1652         } else {
1653                 ret = -EINVAL;
1654                 goto out;
1655         }
1656
1657         *ppos += read;
1658         ret = read;
1659
1660 out:
1661         return ret;
1662 }
1663
1664 /* TODO add a seq_buf_to_buffer() */
1665 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1666 {
1667         int len;
1668
1669         if (trace_seq_used(s) <= s->seq.readpos)
1670                 return -EBUSY;
1671
1672         len = trace_seq_used(s) - s->seq.readpos;
1673         if (cnt > len)
1674                 cnt = len;
1675         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1676
1677         s->seq.readpos += cnt;
1678         return cnt;
1679 }
1680
1681 unsigned long __read_mostly     tracing_thresh;
1682 static const struct file_operations tracing_max_lat_fops;
1683
1684 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1685         defined(CONFIG_FSNOTIFY)
1686
1687 static struct workqueue_struct *fsnotify_wq;
1688
1689 static void latency_fsnotify_workfn(struct work_struct *work)
1690 {
1691         struct trace_array *tr = container_of(work, struct trace_array,
1692                                               fsnotify_work);
1693         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1694 }
1695
1696 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1697 {
1698         struct trace_array *tr = container_of(iwork, struct trace_array,
1699                                               fsnotify_irqwork);
1700         queue_work(fsnotify_wq, &tr->fsnotify_work);
1701 }
1702
1703 static void trace_create_maxlat_file(struct trace_array *tr,
1704                                      struct dentry *d_tracer)
1705 {
1706         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1707         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1708         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1709                                               d_tracer, &tr->max_latency,
1710                                               &tracing_max_lat_fops);
1711 }
1712
1713 __init static int latency_fsnotify_init(void)
1714 {
1715         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1716                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1717         if (!fsnotify_wq) {
1718                 pr_err("Unable to allocate tr_max_lat_wq\n");
1719                 return -ENOMEM;
1720         }
1721         return 0;
1722 }
1723
1724 late_initcall_sync(latency_fsnotify_init);
1725
1726 void latency_fsnotify(struct trace_array *tr)
1727 {
1728         if (!fsnotify_wq)
1729                 return;
1730         /*
1731          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1732          * possible that we are called from __schedule() or do_idle(), which
1733          * could cause a deadlock.
1734          */
1735         irq_work_queue(&tr->fsnotify_irqwork);
1736 }
1737
1738 /*
1739  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1740  *  defined(CONFIG_FSNOTIFY)
1741  */
1742 #else
1743
1744 #define trace_create_maxlat_file(tr, d_tracer)                          \
1745         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1746                           &tr->max_latency, &tracing_max_lat_fops)
1747
1748 #endif
1749
1750 #ifdef CONFIG_TRACER_MAX_TRACE
1751 /*
1752  * Copy the new maximum trace into the separate maximum-trace
1753  * structure. (this way the maximum trace is permanently saved,
1754  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1755  */
1756 static void
1757 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1758 {
1759         struct array_buffer *trace_buf = &tr->array_buffer;
1760         struct array_buffer *max_buf = &tr->max_buffer;
1761         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1762         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1763
1764         max_buf->cpu = cpu;
1765         max_buf->time_start = data->preempt_timestamp;
1766
1767         max_data->saved_latency = tr->max_latency;
1768         max_data->critical_start = data->critical_start;
1769         max_data->critical_end = data->critical_end;
1770
1771         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1772         max_data->pid = tsk->pid;
1773         /*
1774          * If tsk == current, then use current_uid(), as that does not use
1775          * RCU. The irq tracer can be called out of RCU scope.
1776          */
1777         if (tsk == current)
1778                 max_data->uid = current_uid();
1779         else
1780                 max_data->uid = task_uid(tsk);
1781
1782         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1783         max_data->policy = tsk->policy;
1784         max_data->rt_priority = tsk->rt_priority;
1785
1786         /* record this tasks comm */
1787         tracing_record_cmdline(tsk);
1788         latency_fsnotify(tr);
1789 }
1790
1791 /**
1792  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1793  * @tr: tracer
1794  * @tsk: the task with the latency
1795  * @cpu: The cpu that initiated the trace.
1796  * @cond_data: User data associated with a conditional snapshot
1797  *
1798  * Flip the buffers between the @tr and the max_tr and record information
1799  * about which task was the cause of this latency.
1800  */
1801 void
1802 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1803               void *cond_data)
1804 {
1805         if (tr->stop_count)
1806                 return;
1807
1808         WARN_ON_ONCE(!irqs_disabled());
1809
1810         if (!tr->allocated_snapshot) {
1811                 /* Only the nop tracer should hit this when disabling */
1812                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1813                 return;
1814         }
1815
1816         arch_spin_lock(&tr->max_lock);
1817
1818         /* Inherit the recordable setting from array_buffer */
1819         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1820                 ring_buffer_record_on(tr->max_buffer.buffer);
1821         else
1822                 ring_buffer_record_off(tr->max_buffer.buffer);
1823
1824 #ifdef CONFIG_TRACER_SNAPSHOT
1825         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1826                 goto out_unlock;
1827 #endif
1828         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1829
1830         __update_max_tr(tr, tsk, cpu);
1831
1832  out_unlock:
1833         arch_spin_unlock(&tr->max_lock);
1834 }
1835
1836 /**
1837  * update_max_tr_single - only copy one trace over, and reset the rest
1838  * @tr: tracer
1839  * @tsk: task with the latency
1840  * @cpu: the cpu of the buffer to copy.
1841  *
1842  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1843  */
1844 void
1845 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1846 {
1847         int ret;
1848
1849         if (tr->stop_count)
1850                 return;
1851
1852         WARN_ON_ONCE(!irqs_disabled());
1853         if (!tr->allocated_snapshot) {
1854                 /* Only the nop tracer should hit this when disabling */
1855                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1856                 return;
1857         }
1858
1859         arch_spin_lock(&tr->max_lock);
1860
1861         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1862
1863         if (ret == -EBUSY) {
1864                 /*
1865                  * We failed to swap the buffer due to a commit taking
1866                  * place on this CPU. We fail to record, but we reset
1867                  * the max trace buffer (no one writes directly to it)
1868                  * and flag that it failed.
1869                  */
1870                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1871                         "Failed to swap buffers due to commit in progress\n");
1872         }
1873
1874         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1875
1876         __update_max_tr(tr, tsk, cpu);
1877         arch_spin_unlock(&tr->max_lock);
1878 }
1879 #endif /* CONFIG_TRACER_MAX_TRACE */
1880
1881 static int wait_on_pipe(struct trace_iterator *iter, int full)
1882 {
1883         /* Iterators are static, they should be filled or empty */
1884         if (trace_buffer_iter(iter, iter->cpu_file))
1885                 return 0;
1886
1887         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1888                                 full);
1889 }
1890
1891 #ifdef CONFIG_FTRACE_STARTUP_TEST
1892 static bool selftests_can_run;
1893
1894 struct trace_selftests {
1895         struct list_head                list;
1896         struct tracer                   *type;
1897 };
1898
1899 static LIST_HEAD(postponed_selftests);
1900
1901 static int save_selftest(struct tracer *type)
1902 {
1903         struct trace_selftests *selftest;
1904
1905         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1906         if (!selftest)
1907                 return -ENOMEM;
1908
1909         selftest->type = type;
1910         list_add(&selftest->list, &postponed_selftests);
1911         return 0;
1912 }
1913
1914 static int run_tracer_selftest(struct tracer *type)
1915 {
1916         struct trace_array *tr = &global_trace;
1917         struct tracer *saved_tracer = tr->current_trace;
1918         int ret;
1919
1920         if (!type->selftest || tracing_selftest_disabled)
1921                 return 0;
1922
1923         /*
1924          * If a tracer registers early in boot up (before scheduling is
1925          * initialized and such), then do not run its selftests yet.
1926          * Instead, run it a little later in the boot process.
1927          */
1928         if (!selftests_can_run)
1929                 return save_selftest(type);
1930
1931         /*
1932          * Run a selftest on this tracer.
1933          * Here we reset the trace buffer, and set the current
1934          * tracer to be this tracer. The tracer can then run some
1935          * internal tracing to verify that everything is in order.
1936          * If we fail, we do not register this tracer.
1937          */
1938         tracing_reset_online_cpus(&tr->array_buffer);
1939
1940         tr->current_trace = type;
1941
1942 #ifdef CONFIG_TRACER_MAX_TRACE
1943         if (type->use_max_tr) {
1944                 /* If we expanded the buffers, make sure the max is expanded too */
1945                 if (ring_buffer_expanded)
1946                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1947                                            RING_BUFFER_ALL_CPUS);
1948                 tr->allocated_snapshot = true;
1949         }
1950 #endif
1951
1952         /* the test is responsible for initializing and enabling */
1953         pr_info("Testing tracer %s: ", type->name);
1954         ret = type->selftest(type, tr);
1955         /* the test is responsible for resetting too */
1956         tr->current_trace = saved_tracer;
1957         if (ret) {
1958                 printk(KERN_CONT "FAILED!\n");
1959                 /* Add the warning after printing 'FAILED' */
1960                 WARN_ON(1);
1961                 return -1;
1962         }
1963         /* Only reset on passing, to avoid touching corrupted buffers */
1964         tracing_reset_online_cpus(&tr->array_buffer);
1965
1966 #ifdef CONFIG_TRACER_MAX_TRACE
1967         if (type->use_max_tr) {
1968                 tr->allocated_snapshot = false;
1969
1970                 /* Shrink the max buffer again */
1971                 if (ring_buffer_expanded)
1972                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1973                                            RING_BUFFER_ALL_CPUS);
1974         }
1975 #endif
1976
1977         printk(KERN_CONT "PASSED\n");
1978         return 0;
1979 }
1980
1981 static __init int init_trace_selftests(void)
1982 {
1983         struct trace_selftests *p, *n;
1984         struct tracer *t, **last;
1985         int ret;
1986
1987         selftests_can_run = true;
1988
1989         mutex_lock(&trace_types_lock);
1990
1991         if (list_empty(&postponed_selftests))
1992                 goto out;
1993
1994         pr_info("Running postponed tracer tests:\n");
1995
1996         tracing_selftest_running = true;
1997         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1998                 /* This loop can take minutes when sanitizers are enabled, so
1999                  * lets make sure we allow RCU processing.
2000                  */
2001                 cond_resched();
2002                 ret = run_tracer_selftest(p->type);
2003                 /* If the test fails, then warn and remove from available_tracers */
2004                 if (ret < 0) {
2005                         WARN(1, "tracer: %s failed selftest, disabling\n",
2006                              p->type->name);
2007                         last = &trace_types;
2008                         for (t = trace_types; t; t = t->next) {
2009                                 if (t == p->type) {
2010                                         *last = t->next;
2011                                         break;
2012                                 }
2013                                 last = &t->next;
2014                         }
2015                 }
2016                 list_del(&p->list);
2017                 kfree(p);
2018         }
2019         tracing_selftest_running = false;
2020
2021  out:
2022         mutex_unlock(&trace_types_lock);
2023
2024         return 0;
2025 }
2026 core_initcall(init_trace_selftests);
2027 #else
2028 static inline int run_tracer_selftest(struct tracer *type)
2029 {
2030         return 0;
2031 }
2032 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2033
2034 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2035
2036 static void __init apply_trace_boot_options(void);
2037
2038 /**
2039  * register_tracer - register a tracer with the ftrace system.
2040  * @type: the plugin for the tracer
2041  *
2042  * Register a new plugin tracer.
2043  */
2044 int __init register_tracer(struct tracer *type)
2045 {
2046         struct tracer *t;
2047         int ret = 0;
2048
2049         if (!type->name) {
2050                 pr_info("Tracer must have a name\n");
2051                 return -1;
2052         }
2053
2054         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2055                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2056                 return -1;
2057         }
2058
2059         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2060                 pr_warn("Can not register tracer %s due to lockdown\n",
2061                            type->name);
2062                 return -EPERM;
2063         }
2064
2065         mutex_lock(&trace_types_lock);
2066
2067         tracing_selftest_running = true;
2068
2069         for (t = trace_types; t; t = t->next) {
2070                 if (strcmp(type->name, t->name) == 0) {
2071                         /* already found */
2072                         pr_info("Tracer %s already registered\n",
2073                                 type->name);
2074                         ret = -1;
2075                         goto out;
2076                 }
2077         }
2078
2079         if (!type->set_flag)
2080                 type->set_flag = &dummy_set_flag;
2081         if (!type->flags) {
2082                 /*allocate a dummy tracer_flags*/
2083                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2084                 if (!type->flags) {
2085                         ret = -ENOMEM;
2086                         goto out;
2087                 }
2088                 type->flags->val = 0;
2089                 type->flags->opts = dummy_tracer_opt;
2090         } else
2091                 if (!type->flags->opts)
2092                         type->flags->opts = dummy_tracer_opt;
2093
2094         /* store the tracer for __set_tracer_option */
2095         type->flags->trace = type;
2096
2097         ret = run_tracer_selftest(type);
2098         if (ret < 0)
2099                 goto out;
2100
2101         type->next = trace_types;
2102         trace_types = type;
2103         add_tracer_options(&global_trace, type);
2104
2105  out:
2106         tracing_selftest_running = false;
2107         mutex_unlock(&trace_types_lock);
2108
2109         if (ret || !default_bootup_tracer)
2110                 goto out_unlock;
2111
2112         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2113                 goto out_unlock;
2114
2115         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2116         /* Do we want this tracer to start on bootup? */
2117         tracing_set_tracer(&global_trace, type->name);
2118         default_bootup_tracer = NULL;
2119
2120         apply_trace_boot_options();
2121
2122         /* disable other selftests, since this will break it. */
2123         disable_tracing_selftest("running a tracer");
2124
2125  out_unlock:
2126         return ret;
2127 }
2128
2129 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2130 {
2131         struct trace_buffer *buffer = buf->buffer;
2132
2133         if (!buffer)
2134                 return;
2135
2136         ring_buffer_record_disable(buffer);
2137
2138         /* Make sure all commits have finished */
2139         synchronize_rcu();
2140         ring_buffer_reset_cpu(buffer, cpu);
2141
2142         ring_buffer_record_enable(buffer);
2143 }
2144
2145 void tracing_reset_online_cpus(struct array_buffer *buf)
2146 {
2147         struct trace_buffer *buffer = buf->buffer;
2148
2149         if (!buffer)
2150                 return;
2151
2152         ring_buffer_record_disable(buffer);
2153
2154         /* Make sure all commits have finished */
2155         synchronize_rcu();
2156
2157         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2158
2159         ring_buffer_reset_online_cpus(buffer);
2160
2161         ring_buffer_record_enable(buffer);
2162 }
2163
2164 /* Must have trace_types_lock held */
2165 void tracing_reset_all_online_cpus(void)
2166 {
2167         struct trace_array *tr;
2168
2169         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2170                 if (!tr->clear_trace)
2171                         continue;
2172                 tr->clear_trace = false;
2173                 tracing_reset_online_cpus(&tr->array_buffer);
2174 #ifdef CONFIG_TRACER_MAX_TRACE
2175                 tracing_reset_online_cpus(&tr->max_buffer);
2176 #endif
2177         }
2178 }
2179
2180 static int *tgid_map;
2181
2182 #define SAVED_CMDLINES_DEFAULT 128
2183 #define NO_CMDLINE_MAP UINT_MAX
2184 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2185 struct saved_cmdlines_buffer {
2186         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2187         unsigned *map_cmdline_to_pid;
2188         unsigned cmdline_num;
2189         int cmdline_idx;
2190         char *saved_cmdlines;
2191 };
2192 static struct saved_cmdlines_buffer *savedcmd;
2193
2194 /* temporary disable recording */
2195 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2196
2197 static inline char *get_saved_cmdlines(int idx)
2198 {
2199         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2200 }
2201
2202 static inline void set_cmdline(int idx, const char *cmdline)
2203 {
2204         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2205 }
2206
2207 static int allocate_cmdlines_buffer(unsigned int val,
2208                                     struct saved_cmdlines_buffer *s)
2209 {
2210         s->map_cmdline_to_pid = kmalloc_array(val,
2211                                               sizeof(*s->map_cmdline_to_pid),
2212                                               GFP_KERNEL);
2213         if (!s->map_cmdline_to_pid)
2214                 return -ENOMEM;
2215
2216         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2217         if (!s->saved_cmdlines) {
2218                 kfree(s->map_cmdline_to_pid);
2219                 return -ENOMEM;
2220         }
2221
2222         s->cmdline_idx = 0;
2223         s->cmdline_num = val;
2224         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2225                sizeof(s->map_pid_to_cmdline));
2226         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2227                val * sizeof(*s->map_cmdline_to_pid));
2228
2229         return 0;
2230 }
2231
2232 static int trace_create_savedcmd(void)
2233 {
2234         int ret;
2235
2236         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2237         if (!savedcmd)
2238                 return -ENOMEM;
2239
2240         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2241         if (ret < 0) {
2242                 kfree(savedcmd);
2243                 savedcmd = NULL;
2244                 return -ENOMEM;
2245         }
2246
2247         return 0;
2248 }
2249
2250 int is_tracing_stopped(void)
2251 {
2252         return global_trace.stop_count;
2253 }
2254
2255 /**
2256  * tracing_start - quick start of the tracer
2257  *
2258  * If tracing is enabled but was stopped by tracing_stop,
2259  * this will start the tracer back up.
2260  */
2261 void tracing_start(void)
2262 {
2263         struct trace_buffer *buffer;
2264         unsigned long flags;
2265
2266         if (tracing_disabled)
2267                 return;
2268
2269         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2270         if (--global_trace.stop_count) {
2271                 if (global_trace.stop_count < 0) {
2272                         /* Someone screwed up their debugging */
2273                         WARN_ON_ONCE(1);
2274                         global_trace.stop_count = 0;
2275                 }
2276                 goto out;
2277         }
2278
2279         /* Prevent the buffers from switching */
2280         arch_spin_lock(&global_trace.max_lock);
2281
2282         buffer = global_trace.array_buffer.buffer;
2283         if (buffer)
2284                 ring_buffer_record_enable(buffer);
2285
2286 #ifdef CONFIG_TRACER_MAX_TRACE
2287         buffer = global_trace.max_buffer.buffer;
2288         if (buffer)
2289                 ring_buffer_record_enable(buffer);
2290 #endif
2291
2292         arch_spin_unlock(&global_trace.max_lock);
2293
2294  out:
2295         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2296 }
2297
2298 static void tracing_start_tr(struct trace_array *tr)
2299 {
2300         struct trace_buffer *buffer;
2301         unsigned long flags;
2302
2303         if (tracing_disabled)
2304                 return;
2305
2306         /* If global, we need to also start the max tracer */
2307         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2308                 return tracing_start();
2309
2310         raw_spin_lock_irqsave(&tr->start_lock, flags);
2311
2312         if (--tr->stop_count) {
2313                 if (tr->stop_count < 0) {
2314                         /* Someone screwed up their debugging */
2315                         WARN_ON_ONCE(1);
2316                         tr->stop_count = 0;
2317                 }
2318                 goto out;
2319         }
2320
2321         buffer = tr->array_buffer.buffer;
2322         if (buffer)
2323                 ring_buffer_record_enable(buffer);
2324
2325  out:
2326         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2327 }
2328
2329 /**
2330  * tracing_stop - quick stop of the tracer
2331  *
2332  * Light weight way to stop tracing. Use in conjunction with
2333  * tracing_start.
2334  */
2335 void tracing_stop(void)
2336 {
2337         struct trace_buffer *buffer;
2338         unsigned long flags;
2339
2340         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2341         if (global_trace.stop_count++)
2342                 goto out;
2343
2344         /* Prevent the buffers from switching */
2345         arch_spin_lock(&global_trace.max_lock);
2346
2347         buffer = global_trace.array_buffer.buffer;
2348         if (buffer)
2349                 ring_buffer_record_disable(buffer);
2350
2351 #ifdef CONFIG_TRACER_MAX_TRACE
2352         buffer = global_trace.max_buffer.buffer;
2353         if (buffer)
2354                 ring_buffer_record_disable(buffer);
2355 #endif
2356
2357         arch_spin_unlock(&global_trace.max_lock);
2358
2359  out:
2360         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2361 }
2362
2363 static void tracing_stop_tr(struct trace_array *tr)
2364 {
2365         struct trace_buffer *buffer;
2366         unsigned long flags;
2367
2368         /* If global, we need to also stop the max tracer */
2369         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2370                 return tracing_stop();
2371
2372         raw_spin_lock_irqsave(&tr->start_lock, flags);
2373         if (tr->stop_count++)
2374                 goto out;
2375
2376         buffer = tr->array_buffer.buffer;
2377         if (buffer)
2378                 ring_buffer_record_disable(buffer);
2379
2380  out:
2381         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2382 }
2383
2384 static int trace_save_cmdline(struct task_struct *tsk)
2385 {
2386         unsigned pid, idx;
2387
2388         /* treat recording of idle task as a success */
2389         if (!tsk->pid)
2390                 return 1;
2391
2392         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2393                 return 0;
2394
2395         /*
2396          * It's not the end of the world if we don't get
2397          * the lock, but we also don't want to spin
2398          * nor do we want to disable interrupts,
2399          * so if we miss here, then better luck next time.
2400          */
2401         if (!arch_spin_trylock(&trace_cmdline_lock))
2402                 return 0;
2403
2404         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2405         if (idx == NO_CMDLINE_MAP) {
2406                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2407
2408                 /*
2409                  * Check whether the cmdline buffer at idx has a pid
2410                  * mapped. We are going to overwrite that entry so we
2411                  * need to clear the map_pid_to_cmdline. Otherwise we
2412                  * would read the new comm for the old pid.
2413                  */
2414                 pid = savedcmd->map_cmdline_to_pid[idx];
2415                 if (pid != NO_CMDLINE_MAP)
2416                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2417
2418                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2420
2421                 savedcmd->cmdline_idx = idx;
2422         }
2423
2424         set_cmdline(idx, tsk->comm);
2425
2426         arch_spin_unlock(&trace_cmdline_lock);
2427
2428         return 1;
2429 }
2430
2431 static void __trace_find_cmdline(int pid, char comm[])
2432 {
2433         unsigned map;
2434
2435         if (!pid) {
2436                 strcpy(comm, "<idle>");
2437                 return;
2438         }
2439
2440         if (WARN_ON_ONCE(pid < 0)) {
2441                 strcpy(comm, "<XXX>");
2442                 return;
2443         }
2444
2445         if (pid > PID_MAX_DEFAULT) {
2446                 strcpy(comm, "<...>");
2447                 return;
2448         }
2449
2450         map = savedcmd->map_pid_to_cmdline[pid];
2451         if (map != NO_CMDLINE_MAP)
2452                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2453         else
2454                 strcpy(comm, "<...>");
2455 }
2456
2457 void trace_find_cmdline(int pid, char comm[])
2458 {
2459         preempt_disable();
2460         arch_spin_lock(&trace_cmdline_lock);
2461
2462         __trace_find_cmdline(pid, comm);
2463
2464         arch_spin_unlock(&trace_cmdline_lock);
2465         preempt_enable();
2466 }
2467
2468 int trace_find_tgid(int pid)
2469 {
2470         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2471                 return 0;
2472
2473         return tgid_map[pid];
2474 }
2475
2476 static int trace_save_tgid(struct task_struct *tsk)
2477 {
2478         /* treat recording of idle task as a success */
2479         if (!tsk->pid)
2480                 return 1;
2481
2482         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2483                 return 0;
2484
2485         tgid_map[tsk->pid] = tsk->tgid;
2486         return 1;
2487 }
2488
2489 static bool tracing_record_taskinfo_skip(int flags)
2490 {
2491         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2492                 return true;
2493         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2494                 return true;
2495         if (!__this_cpu_read(trace_taskinfo_save))
2496                 return true;
2497         return false;
2498 }
2499
2500 /**
2501  * tracing_record_taskinfo - record the task info of a task
2502  *
2503  * @task:  task to record
2504  * @flags: TRACE_RECORD_CMDLINE for recording comm
2505  *         TRACE_RECORD_TGID for recording tgid
2506  */
2507 void tracing_record_taskinfo(struct task_struct *task, int flags)
2508 {
2509         bool done;
2510
2511         if (tracing_record_taskinfo_skip(flags))
2512                 return;
2513
2514         /*
2515          * Record as much task information as possible. If some fail, continue
2516          * to try to record the others.
2517          */
2518         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2519         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2520
2521         /* If recording any information failed, retry again soon. */
2522         if (!done)
2523                 return;
2524
2525         __this_cpu_write(trace_taskinfo_save, false);
2526 }
2527
2528 /**
2529  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2530  *
2531  * @prev: previous task during sched_switch
2532  * @next: next task during sched_switch
2533  * @flags: TRACE_RECORD_CMDLINE for recording comm
2534  *         TRACE_RECORD_TGID for recording tgid
2535  */
2536 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2537                                           struct task_struct *next, int flags)
2538 {
2539         bool done;
2540
2541         if (tracing_record_taskinfo_skip(flags))
2542                 return;
2543
2544         /*
2545          * Record as much task information as possible. If some fail, continue
2546          * to try to record the others.
2547          */
2548         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2549         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2550         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2551         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2552
2553         /* If recording any information failed, retry again soon. */
2554         if (!done)
2555                 return;
2556
2557         __this_cpu_write(trace_taskinfo_save, false);
2558 }
2559
2560 /* Helpers to record a specific task information */
2561 void tracing_record_cmdline(struct task_struct *task)
2562 {
2563         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2564 }
2565
2566 void tracing_record_tgid(struct task_struct *task)
2567 {
2568         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2569 }
2570
2571 /*
2572  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2573  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2574  * simplifies those functions and keeps them in sync.
2575  */
2576 enum print_line_t trace_handle_return(struct trace_seq *s)
2577 {
2578         return trace_seq_has_overflowed(s) ?
2579                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2580 }
2581 EXPORT_SYMBOL_GPL(trace_handle_return);
2582
2583 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2584 {
2585         unsigned int trace_flags = irqs_status;
2586         unsigned int pc;
2587
2588         pc = preempt_count();
2589
2590         if (pc & NMI_MASK)
2591                 trace_flags |= TRACE_FLAG_NMI;
2592         if (pc & HARDIRQ_MASK)
2593                 trace_flags |= TRACE_FLAG_HARDIRQ;
2594         if (in_serving_softirq())
2595                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2596
2597         if (tif_need_resched())
2598                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2599         if (test_preempt_need_resched())
2600                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2601         return (trace_flags << 16) | (pc & 0xff);
2602 }
2603
2604 struct ring_buffer_event *
2605 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2606                           int type,
2607                           unsigned long len,
2608                           unsigned int trace_ctx)
2609 {
2610         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2611 }
2612
2613 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2614 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2615 static int trace_buffered_event_ref;
2616
2617 /**
2618  * trace_buffered_event_enable - enable buffering events
2619  *
2620  * When events are being filtered, it is quicker to use a temporary
2621  * buffer to write the event data into if there's a likely chance
2622  * that it will not be committed. The discard of the ring buffer
2623  * is not as fast as committing, and is much slower than copying
2624  * a commit.
2625  *
2626  * When an event is to be filtered, allocate per cpu buffers to
2627  * write the event data into, and if the event is filtered and discarded
2628  * it is simply dropped, otherwise, the entire data is to be committed
2629  * in one shot.
2630  */
2631 void trace_buffered_event_enable(void)
2632 {
2633         struct ring_buffer_event *event;
2634         struct page *page;
2635         int cpu;
2636
2637         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2638
2639         if (trace_buffered_event_ref++)
2640                 return;
2641
2642         for_each_tracing_cpu(cpu) {
2643                 page = alloc_pages_node(cpu_to_node(cpu),
2644                                         GFP_KERNEL | __GFP_NORETRY, 0);
2645                 if (!page)
2646                         goto failed;
2647
2648                 event = page_address(page);
2649                 memset(event, 0, sizeof(*event));
2650
2651                 per_cpu(trace_buffered_event, cpu) = event;
2652
2653                 preempt_disable();
2654                 if (cpu == smp_processor_id() &&
2655                     __this_cpu_read(trace_buffered_event) !=
2656                     per_cpu(trace_buffered_event, cpu))
2657                         WARN_ON_ONCE(1);
2658                 preempt_enable();
2659         }
2660
2661         return;
2662  failed:
2663         trace_buffered_event_disable();
2664 }
2665
2666 static void enable_trace_buffered_event(void *data)
2667 {
2668         /* Probably not needed, but do it anyway */
2669         smp_rmb();
2670         this_cpu_dec(trace_buffered_event_cnt);
2671 }
2672
2673 static void disable_trace_buffered_event(void *data)
2674 {
2675         this_cpu_inc(trace_buffered_event_cnt);
2676 }
2677
2678 /**
2679  * trace_buffered_event_disable - disable buffering events
2680  *
2681  * When a filter is removed, it is faster to not use the buffered
2682  * events, and to commit directly into the ring buffer. Free up
2683  * the temp buffers when there are no more users. This requires
2684  * special synchronization with current events.
2685  */
2686 void trace_buffered_event_disable(void)
2687 {
2688         int cpu;
2689
2690         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2691
2692         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2693                 return;
2694
2695         if (--trace_buffered_event_ref)
2696                 return;
2697
2698         preempt_disable();
2699         /* For each CPU, set the buffer as used. */
2700         smp_call_function_many(tracing_buffer_mask,
2701                                disable_trace_buffered_event, NULL, 1);
2702         preempt_enable();
2703
2704         /* Wait for all current users to finish */
2705         synchronize_rcu();
2706
2707         for_each_tracing_cpu(cpu) {
2708                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2709                 per_cpu(trace_buffered_event, cpu) = NULL;
2710         }
2711         /*
2712          * Make sure trace_buffered_event is NULL before clearing
2713          * trace_buffered_event_cnt.
2714          */
2715         smp_wmb();
2716
2717         preempt_disable();
2718         /* Do the work on each cpu */
2719         smp_call_function_many(tracing_buffer_mask,
2720                                enable_trace_buffered_event, NULL, 1);
2721         preempt_enable();
2722 }
2723
2724 static struct trace_buffer *temp_buffer;
2725
2726 struct ring_buffer_event *
2727 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2728                           struct trace_event_file *trace_file,
2729                           int type, unsigned long len,
2730                           unsigned int trace_ctx)
2731 {
2732         struct ring_buffer_event *entry;
2733         int val;
2734
2735         *current_rb = trace_file->tr->array_buffer.buffer;
2736
2737         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2738              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2739             (entry = this_cpu_read(trace_buffered_event))) {
2740                 /* Try to use the per cpu buffer first */
2741                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2742                 if (val == 1) {
2743                         trace_event_setup(entry, type, trace_ctx);
2744                         entry->array[0] = len;
2745                         return entry;
2746                 }
2747                 this_cpu_dec(trace_buffered_event_cnt);
2748         }
2749
2750         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2751                                             trace_ctx);
2752         /*
2753          * If tracing is off, but we have triggers enabled
2754          * we still need to look at the event data. Use the temp_buffer
2755          * to store the trace event for the trigger to use. It's recursive
2756          * safe and will not be recorded anywhere.
2757          */
2758         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2759                 *current_rb = temp_buffer;
2760                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2761                                                     trace_ctx);
2762         }
2763         return entry;
2764 }
2765 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2766
2767 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2768 static DEFINE_MUTEX(tracepoint_printk_mutex);
2769
2770 static void output_printk(struct trace_event_buffer *fbuffer)
2771 {
2772         struct trace_event_call *event_call;
2773         struct trace_event_file *file;
2774         struct trace_event *event;
2775         unsigned long flags;
2776         struct trace_iterator *iter = tracepoint_print_iter;
2777
2778         /* We should never get here if iter is NULL */
2779         if (WARN_ON_ONCE(!iter))
2780                 return;
2781
2782         event_call = fbuffer->trace_file->event_call;
2783         if (!event_call || !event_call->event.funcs ||
2784             !event_call->event.funcs->trace)
2785                 return;
2786
2787         file = fbuffer->trace_file;
2788         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2789             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2790              !filter_match_preds(file->filter, fbuffer->entry)))
2791                 return;
2792
2793         event = &fbuffer->trace_file->event_call->event;
2794
2795         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2796         trace_seq_init(&iter->seq);
2797         iter->ent = fbuffer->entry;
2798         event_call->event.funcs->trace(iter, 0, event);
2799         trace_seq_putc(&iter->seq, 0);
2800         printk("%s", iter->seq.buffer);
2801
2802         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2803 }
2804
2805 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2806                              void *buffer, size_t *lenp,
2807                              loff_t *ppos)
2808 {
2809         int save_tracepoint_printk;
2810         int ret;
2811
2812         mutex_lock(&tracepoint_printk_mutex);
2813         save_tracepoint_printk = tracepoint_printk;
2814
2815         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2816
2817         /*
2818          * This will force exiting early, as tracepoint_printk
2819          * is always zero when tracepoint_printk_iter is not allocated
2820          */
2821         if (!tracepoint_print_iter)
2822                 tracepoint_printk = 0;
2823
2824         if (save_tracepoint_printk == tracepoint_printk)
2825                 goto out;
2826
2827         if (tracepoint_printk)
2828                 static_key_enable(&tracepoint_printk_key.key);
2829         else
2830                 static_key_disable(&tracepoint_printk_key.key);
2831
2832  out:
2833         mutex_unlock(&tracepoint_printk_mutex);
2834
2835         return ret;
2836 }
2837
2838 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2839 {
2840         if (static_key_false(&tracepoint_printk_key.key))
2841                 output_printk(fbuffer);
2842
2843         if (static_branch_unlikely(&trace_event_exports_enabled))
2844                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2845         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2846                                     fbuffer->event, fbuffer->entry,
2847                                     fbuffer->trace_ctx, fbuffer->regs);
2848 }
2849 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2850
2851 /*
2852  * Skip 3:
2853  *
2854  *   trace_buffer_unlock_commit_regs()
2855  *   trace_event_buffer_commit()
2856  *   trace_event_raw_event_xxx()
2857  */
2858 # define STACK_SKIP 3
2859
2860 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2861                                      struct trace_buffer *buffer,
2862                                      struct ring_buffer_event *event,
2863                                      unsigned int trace_ctx,
2864                                      struct pt_regs *regs)
2865 {
2866         __buffer_unlock_commit(buffer, event);
2867
2868         /*
2869          * If regs is not set, then skip the necessary functions.
2870          * Note, we can still get here via blktrace, wakeup tracer
2871          * and mmiotrace, but that's ok if they lose a function or
2872          * two. They are not that meaningful.
2873          */
2874         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2875         ftrace_trace_userstack(tr, buffer, trace_ctx);
2876 }
2877
2878 /*
2879  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2880  */
2881 void
2882 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2883                                    struct ring_buffer_event *event)
2884 {
2885         __buffer_unlock_commit(buffer, event);
2886 }
2887
2888 void
2889 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2890                parent_ip, unsigned int trace_ctx)
2891 {
2892         struct trace_event_call *call = &event_function;
2893         struct trace_buffer *buffer = tr->array_buffer.buffer;
2894         struct ring_buffer_event *event;
2895         struct ftrace_entry *entry;
2896
2897         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2898                                             trace_ctx);
2899         if (!event)
2900                 return;
2901         entry   = ring_buffer_event_data(event);
2902         entry->ip                       = ip;
2903         entry->parent_ip                = parent_ip;
2904
2905         if (!call_filter_check_discard(call, entry, buffer, event)) {
2906                 if (static_branch_unlikely(&trace_function_exports_enabled))
2907                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2908                 __buffer_unlock_commit(buffer, event);
2909         }
2910 }
2911
2912 #ifdef CONFIG_STACKTRACE
2913
2914 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2915 #define FTRACE_KSTACK_NESTING   4
2916
2917 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2918
2919 struct ftrace_stack {
2920         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2921 };
2922
2923
2924 struct ftrace_stacks {
2925         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2926 };
2927
2928 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2929 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2930
2931 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2932                                  unsigned int trace_ctx,
2933                                  int skip, struct pt_regs *regs)
2934 {
2935         struct trace_event_call *call = &event_kernel_stack;
2936         struct ring_buffer_event *event;
2937         unsigned int size, nr_entries;
2938         struct ftrace_stack *fstack;
2939         struct stack_entry *entry;
2940         int stackidx;
2941
2942         /*
2943          * Add one, for this function and the call to save_stack_trace()
2944          * If regs is set, then these functions will not be in the way.
2945          */
2946 #ifndef CONFIG_UNWINDER_ORC
2947         if (!regs)
2948                 skip++;
2949 #endif
2950
2951         preempt_disable_notrace();
2952
2953         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2954
2955         /* This should never happen. If it does, yell once and skip */
2956         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2957                 goto out;
2958
2959         /*
2960          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2961          * interrupt will either see the value pre increment or post
2962          * increment. If the interrupt happens pre increment it will have
2963          * restored the counter when it returns.  We just need a barrier to
2964          * keep gcc from moving things around.
2965          */
2966         barrier();
2967
2968         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2969         size = ARRAY_SIZE(fstack->calls);
2970
2971         if (regs) {
2972                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2973                                                    size, skip);
2974         } else {
2975                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2976         }
2977
2978         size = nr_entries * sizeof(unsigned long);
2979         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2980                                             sizeof(*entry) + size, trace_ctx);
2981         if (!event)
2982                 goto out;
2983         entry = ring_buffer_event_data(event);
2984
2985         memcpy(&entry->caller, fstack->calls, size);
2986         entry->size = nr_entries;
2987
2988         if (!call_filter_check_discard(call, entry, buffer, event))
2989                 __buffer_unlock_commit(buffer, event);
2990
2991  out:
2992         /* Again, don't let gcc optimize things here */
2993         barrier();
2994         __this_cpu_dec(ftrace_stack_reserve);
2995         preempt_enable_notrace();
2996
2997 }
2998
2999 static inline void ftrace_trace_stack(struct trace_array *tr,
3000                                       struct trace_buffer *buffer,
3001                                       unsigned int trace_ctx,
3002                                       int skip, struct pt_regs *regs)
3003 {
3004         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3005                 return;
3006
3007         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3008 }
3009
3010 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3011                    int skip)
3012 {
3013         struct trace_buffer *buffer = tr->array_buffer.buffer;
3014
3015         if (rcu_is_watching()) {
3016                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3017                 return;
3018         }
3019
3020         /*
3021          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3022          * but if the above rcu_is_watching() failed, then the NMI
3023          * triggered someplace critical, and rcu_irq_enter() should
3024          * not be called from NMI.
3025          */
3026         if (unlikely(in_nmi()))
3027                 return;
3028
3029         rcu_irq_enter_irqson();
3030         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3031         rcu_irq_exit_irqson();
3032 }
3033
3034 /**
3035  * trace_dump_stack - record a stack back trace in the trace buffer
3036  * @skip: Number of functions to skip (helper handlers)
3037  */
3038 void trace_dump_stack(int skip)
3039 {
3040         if (tracing_disabled || tracing_selftest_running)
3041                 return;
3042
3043 #ifndef CONFIG_UNWINDER_ORC
3044         /* Skip 1 to skip this function. */
3045         skip++;
3046 #endif
3047         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3048                              tracing_gen_ctx(), skip, NULL);
3049 }
3050 EXPORT_SYMBOL_GPL(trace_dump_stack);
3051
3052 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3053 static DEFINE_PER_CPU(int, user_stack_count);
3054
3055 static void
3056 ftrace_trace_userstack(struct trace_array *tr,
3057                        struct trace_buffer *buffer, unsigned int trace_ctx)
3058 {
3059         struct trace_event_call *call = &event_user_stack;
3060         struct ring_buffer_event *event;
3061         struct userstack_entry *entry;
3062
3063         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3064                 return;
3065
3066         /*
3067          * NMIs can not handle page faults, even with fix ups.
3068          * The save user stack can (and often does) fault.
3069          */
3070         if (unlikely(in_nmi()))
3071                 return;
3072
3073         /*
3074          * prevent recursion, since the user stack tracing may
3075          * trigger other kernel events.
3076          */
3077         preempt_disable();
3078         if (__this_cpu_read(user_stack_count))
3079                 goto out;
3080
3081         __this_cpu_inc(user_stack_count);
3082
3083         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3084                                             sizeof(*entry), trace_ctx);
3085         if (!event)
3086                 goto out_drop_count;
3087         entry   = ring_buffer_event_data(event);
3088
3089         entry->tgid             = current->tgid;
3090         memset(&entry->caller, 0, sizeof(entry->caller));
3091
3092         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3093         if (!call_filter_check_discard(call, entry, buffer, event))
3094                 __buffer_unlock_commit(buffer, event);
3095
3096  out_drop_count:
3097         __this_cpu_dec(user_stack_count);
3098  out:
3099         preempt_enable();
3100 }
3101 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3102 static void ftrace_trace_userstack(struct trace_array *tr,
3103                                    struct trace_buffer *buffer,
3104                                    unsigned int trace_ctx)
3105 {
3106 }
3107 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3108
3109 #endif /* CONFIG_STACKTRACE */
3110
3111 /* created for use with alloc_percpu */
3112 struct trace_buffer_struct {
3113         int nesting;
3114         char buffer[4][TRACE_BUF_SIZE];
3115 };
3116
3117 static struct trace_buffer_struct *trace_percpu_buffer;
3118
3119 /*
3120  * This allows for lockless recording.  If we're nested too deeply, then
3121  * this returns NULL.
3122  */
3123 static char *get_trace_buf(void)
3124 {
3125         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3126
3127         if (!buffer || buffer->nesting >= 4)
3128                 return NULL;
3129
3130         buffer->nesting++;
3131
3132         /* Interrupts must see nesting incremented before we use the buffer */
3133         barrier();
3134         return &buffer->buffer[buffer->nesting - 1][0];
3135 }
3136
3137 static void put_trace_buf(void)
3138 {
3139         /* Don't let the decrement of nesting leak before this */
3140         barrier();
3141         this_cpu_dec(trace_percpu_buffer->nesting);
3142 }
3143
3144 static int alloc_percpu_trace_buffer(void)
3145 {
3146         struct trace_buffer_struct *buffers;
3147
3148         if (trace_percpu_buffer)
3149                 return 0;
3150
3151         buffers = alloc_percpu(struct trace_buffer_struct);
3152         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3153                 return -ENOMEM;
3154
3155         trace_percpu_buffer = buffers;
3156         return 0;
3157 }
3158
3159 static int buffers_allocated;
3160
3161 void trace_printk_init_buffers(void)
3162 {
3163         if (buffers_allocated)
3164                 return;
3165
3166         if (alloc_percpu_trace_buffer())
3167                 return;
3168
3169         /* trace_printk() is for debug use only. Don't use it in production. */
3170
3171         pr_warn("\n");
3172         pr_warn("**********************************************************\n");
3173         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3174         pr_warn("**                                                      **\n");
3175         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3176         pr_warn("**                                                      **\n");
3177         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3178         pr_warn("** unsafe for production use.                           **\n");
3179         pr_warn("**                                                      **\n");
3180         pr_warn("** If you see this message and you are not debugging    **\n");
3181         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3182         pr_warn("**                                                      **\n");
3183         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3184         pr_warn("**********************************************************\n");
3185
3186         /* Expand the buffers to set size */
3187         tracing_update_buffers();
3188
3189         buffers_allocated = 1;
3190
3191         /*
3192          * trace_printk_init_buffers() can be called by modules.
3193          * If that happens, then we need to start cmdline recording
3194          * directly here. If the global_trace.buffer is already
3195          * allocated here, then this was called by module code.
3196          */
3197         if (global_trace.array_buffer.buffer)
3198                 tracing_start_cmdline_record();
3199 }
3200 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3201
3202 void trace_printk_start_comm(void)
3203 {
3204         /* Start tracing comms if trace printk is set */
3205         if (!buffers_allocated)
3206                 return;
3207         tracing_start_cmdline_record();
3208 }
3209
3210 static void trace_printk_start_stop_comm(int enabled)
3211 {
3212         if (!buffers_allocated)
3213                 return;
3214
3215         if (enabled)
3216                 tracing_start_cmdline_record();
3217         else
3218                 tracing_stop_cmdline_record();
3219 }
3220
3221 /**
3222  * trace_vbprintk - write binary msg to tracing buffer
3223  * @ip:    The address of the caller
3224  * @fmt:   The string format to write to the buffer
3225  * @args:  Arguments for @fmt
3226  */
3227 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3228 {
3229         struct trace_event_call *call = &event_bprint;
3230         struct ring_buffer_event *event;
3231         struct trace_buffer *buffer;
3232         struct trace_array *tr = &global_trace;
3233         struct bprint_entry *entry;
3234         unsigned int trace_ctx;
3235         char *tbuffer;
3236         int len = 0, size;
3237
3238         if (unlikely(tracing_selftest_running || tracing_disabled))
3239                 return 0;
3240
3241         /* Don't pollute graph traces with trace_vprintk internals */
3242         pause_graph_tracing();
3243
3244         trace_ctx = tracing_gen_ctx();
3245         preempt_disable_notrace();
3246
3247         tbuffer = get_trace_buf();
3248         if (!tbuffer) {
3249                 len = 0;
3250                 goto out_nobuffer;
3251         }
3252
3253         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3254
3255         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3256                 goto out_put;
3257
3258         size = sizeof(*entry) + sizeof(u32) * len;
3259         buffer = tr->array_buffer.buffer;
3260         ring_buffer_nest_start(buffer);
3261         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3262                                             trace_ctx);
3263         if (!event)
3264                 goto out;
3265         entry = ring_buffer_event_data(event);
3266         entry->ip                       = ip;
3267         entry->fmt                      = fmt;
3268
3269         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3270         if (!call_filter_check_discard(call, entry, buffer, event)) {
3271                 __buffer_unlock_commit(buffer, event);
3272                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3273         }
3274
3275 out:
3276         ring_buffer_nest_end(buffer);
3277 out_put:
3278         put_trace_buf();
3279
3280 out_nobuffer:
3281         preempt_enable_notrace();
3282         unpause_graph_tracing();
3283
3284         return len;
3285 }
3286 EXPORT_SYMBOL_GPL(trace_vbprintk);
3287
3288 __printf(3, 0)
3289 static int
3290 __trace_array_vprintk(struct trace_buffer *buffer,
3291                       unsigned long ip, const char *fmt, va_list args)
3292 {
3293         struct trace_event_call *call = &event_print;
3294         struct ring_buffer_event *event;
3295         int len = 0, size;
3296         struct print_entry *entry;
3297         unsigned int trace_ctx;
3298         char *tbuffer;
3299
3300         if (tracing_disabled || tracing_selftest_running)
3301                 return 0;
3302
3303         /* Don't pollute graph traces with trace_vprintk internals */
3304         pause_graph_tracing();
3305
3306         trace_ctx = tracing_gen_ctx();
3307         preempt_disable_notrace();
3308
3309
3310         tbuffer = get_trace_buf();
3311         if (!tbuffer) {
3312                 len = 0;
3313                 goto out_nobuffer;
3314         }
3315
3316         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3317
3318         size = sizeof(*entry) + len + 1;
3319         ring_buffer_nest_start(buffer);
3320         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3321                                             trace_ctx);
3322         if (!event)
3323                 goto out;
3324         entry = ring_buffer_event_data(event);
3325         entry->ip = ip;
3326
3327         memcpy(&entry->buf, tbuffer, len + 1);
3328         if (!call_filter_check_discard(call, entry, buffer, event)) {
3329                 __buffer_unlock_commit(buffer, event);
3330                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3331         }
3332
3333 out:
3334         ring_buffer_nest_end(buffer);
3335         put_trace_buf();
3336
3337 out_nobuffer:
3338         preempt_enable_notrace();
3339         unpause_graph_tracing();
3340
3341         return len;
3342 }
3343
3344 __printf(3, 0)
3345 int trace_array_vprintk(struct trace_array *tr,
3346                         unsigned long ip, const char *fmt, va_list args)
3347 {
3348         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3349 }
3350
3351 /**
3352  * trace_array_printk - Print a message to a specific instance
3353  * @tr: The instance trace_array descriptor
3354  * @ip: The instruction pointer that this is called from.
3355  * @fmt: The format to print (printf format)
3356  *
3357  * If a subsystem sets up its own instance, they have the right to
3358  * printk strings into their tracing instance buffer using this
3359  * function. Note, this function will not write into the top level
3360  * buffer (use trace_printk() for that), as writing into the top level
3361  * buffer should only have events that can be individually disabled.
3362  * trace_printk() is only used for debugging a kernel, and should not
3363  * be ever encorporated in normal use.
3364  *
3365  * trace_array_printk() can be used, as it will not add noise to the
3366  * top level tracing buffer.
3367  *
3368  * Note, trace_array_init_printk() must be called on @tr before this
3369  * can be used.
3370  */
3371 __printf(3, 0)
3372 int trace_array_printk(struct trace_array *tr,
3373                        unsigned long ip, const char *fmt, ...)
3374 {
3375         int ret;
3376         va_list ap;
3377
3378         if (!tr)
3379                 return -ENOENT;
3380
3381         /* This is only allowed for created instances */
3382         if (tr == &global_trace)
3383                 return 0;
3384
3385         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3386                 return 0;
3387
3388         va_start(ap, fmt);
3389         ret = trace_array_vprintk(tr, ip, fmt, ap);
3390         va_end(ap);
3391         return ret;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_array_printk);
3394
3395 /**
3396  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3397  * @tr: The trace array to initialize the buffers for
3398  *
3399  * As trace_array_printk() only writes into instances, they are OK to
3400  * have in the kernel (unlike trace_printk()). This needs to be called
3401  * before trace_array_printk() can be used on a trace_array.
3402  */
3403 int trace_array_init_printk(struct trace_array *tr)
3404 {
3405         if (!tr)
3406                 return -ENOENT;
3407
3408         /* This is only allowed for created instances */
3409         if (tr == &global_trace)
3410                 return -EINVAL;
3411
3412         return alloc_percpu_trace_buffer();
3413 }
3414 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3415
3416 __printf(3, 4)
3417 int trace_array_printk_buf(struct trace_buffer *buffer,
3418                            unsigned long ip, const char *fmt, ...)
3419 {
3420         int ret;
3421         va_list ap;
3422
3423         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3424                 return 0;
3425
3426         va_start(ap, fmt);
3427         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3428         va_end(ap);
3429         return ret;
3430 }
3431
3432 __printf(2, 0)
3433 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3434 {
3435         return trace_array_vprintk(&global_trace, ip, fmt, args);
3436 }
3437 EXPORT_SYMBOL_GPL(trace_vprintk);
3438
3439 static void trace_iterator_increment(struct trace_iterator *iter)
3440 {
3441         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3442
3443         iter->idx++;
3444         if (buf_iter)
3445                 ring_buffer_iter_advance(buf_iter);
3446 }
3447
3448 static struct trace_entry *
3449 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3450                 unsigned long *lost_events)
3451 {
3452         struct ring_buffer_event *event;
3453         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3454
3455         if (buf_iter) {
3456                 event = ring_buffer_iter_peek(buf_iter, ts);
3457                 if (lost_events)
3458                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3459                                 (unsigned long)-1 : 0;
3460         } else {
3461                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3462                                          lost_events);
3463         }
3464
3465         if (event) {
3466                 iter->ent_size = ring_buffer_event_length(event);
3467                 return ring_buffer_event_data(event);
3468         }
3469         iter->ent_size = 0;
3470         return NULL;
3471 }
3472
3473 static struct trace_entry *
3474 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3475                   unsigned long *missing_events, u64 *ent_ts)
3476 {
3477         struct trace_buffer *buffer = iter->array_buffer->buffer;
3478         struct trace_entry *ent, *next = NULL;
3479         unsigned long lost_events = 0, next_lost = 0;
3480         int cpu_file = iter->cpu_file;
3481         u64 next_ts = 0, ts;
3482         int next_cpu = -1;
3483         int next_size = 0;
3484         int cpu;
3485
3486         /*
3487          * If we are in a per_cpu trace file, don't bother by iterating over
3488          * all cpu and peek directly.
3489          */
3490         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3491                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3492                         return NULL;
3493                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3494                 if (ent_cpu)
3495                         *ent_cpu = cpu_file;
3496
3497                 return ent;
3498         }
3499
3500         for_each_tracing_cpu(cpu) {
3501
3502                 if (ring_buffer_empty_cpu(buffer, cpu))
3503                         continue;
3504
3505                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3506
3507                 /*
3508                  * Pick the entry with the smallest timestamp:
3509                  */
3510                 if (ent && (!next || ts < next_ts)) {
3511                         next = ent;
3512                         next_cpu = cpu;
3513                         next_ts = ts;
3514                         next_lost = lost_events;
3515                         next_size = iter->ent_size;
3516                 }
3517         }
3518
3519         iter->ent_size = next_size;
3520
3521         if (ent_cpu)
3522                 *ent_cpu = next_cpu;
3523
3524         if (ent_ts)
3525                 *ent_ts = next_ts;
3526
3527         if (missing_events)
3528                 *missing_events = next_lost;
3529
3530         return next;
3531 }
3532
3533 #define STATIC_TEMP_BUF_SIZE    128
3534 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3535
3536 /* Find the next real entry, without updating the iterator itself */
3537 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3538                                           int *ent_cpu, u64 *ent_ts)
3539 {
3540         /* __find_next_entry will reset ent_size */
3541         int ent_size = iter->ent_size;
3542         struct trace_entry *entry;
3543
3544         /*
3545          * If called from ftrace_dump(), then the iter->temp buffer
3546          * will be the static_temp_buf and not created from kmalloc.
3547          * If the entry size is greater than the buffer, we can
3548          * not save it. Just return NULL in that case. This is only
3549          * used to add markers when two consecutive events' time
3550          * stamps have a large delta. See trace_print_lat_context()
3551          */
3552         if (iter->temp == static_temp_buf &&
3553             STATIC_TEMP_BUF_SIZE < ent_size)
3554                 return NULL;
3555
3556         /*
3557          * The __find_next_entry() may call peek_next_entry(), which may
3558          * call ring_buffer_peek() that may make the contents of iter->ent
3559          * undefined. Need to copy iter->ent now.
3560          */
3561         if (iter->ent && iter->ent != iter->temp) {
3562                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3563                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3564                         void *temp;
3565                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3566                         if (!temp)
3567                                 return NULL;
3568                         kfree(iter->temp);
3569                         iter->temp = temp;
3570                         iter->temp_size = iter->ent_size;
3571                 }
3572                 memcpy(iter->temp, iter->ent, iter->ent_size);
3573                 iter->ent = iter->temp;
3574         }
3575         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3576         /* Put back the original ent_size */
3577         iter->ent_size = ent_size;
3578
3579         return entry;
3580 }
3581
3582 /* Find the next real entry, and increment the iterator to the next entry */
3583 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3584 {
3585         iter->ent = __find_next_entry(iter, &iter->cpu,
3586                                       &iter->lost_events, &iter->ts);
3587
3588         if (iter->ent)
3589                 trace_iterator_increment(iter);
3590
3591         return iter->ent ? iter : NULL;
3592 }
3593
3594 static void trace_consume(struct trace_iterator *iter)
3595 {
3596         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3597                             &iter->lost_events);
3598 }
3599
3600 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3601 {
3602         struct trace_iterator *iter = m->private;
3603         int i = (int)*pos;
3604         void *ent;
3605
3606         WARN_ON_ONCE(iter->leftover);
3607
3608         (*pos)++;
3609
3610         /* can't go backwards */
3611         if (iter->idx > i)
3612                 return NULL;
3613
3614         if (iter->idx < 0)
3615                 ent = trace_find_next_entry_inc(iter);
3616         else
3617                 ent = iter;
3618
3619         while (ent && iter->idx < i)
3620                 ent = trace_find_next_entry_inc(iter);
3621
3622         iter->pos = *pos;
3623
3624         return ent;
3625 }
3626
3627 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3628 {
3629         struct ring_buffer_iter *buf_iter;
3630         unsigned long entries = 0;
3631         u64 ts;
3632
3633         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3634
3635         buf_iter = trace_buffer_iter(iter, cpu);
3636         if (!buf_iter)
3637                 return;
3638
3639         ring_buffer_iter_reset(buf_iter);
3640
3641         /*
3642          * We could have the case with the max latency tracers
3643          * that a reset never took place on a cpu. This is evident
3644          * by the timestamp being before the start of the buffer.
3645          */
3646         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3647                 if (ts >= iter->array_buffer->time_start)
3648                         break;
3649                 entries++;
3650                 ring_buffer_iter_advance(buf_iter);
3651         }
3652
3653         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3654 }
3655
3656 /*
3657  * The current tracer is copied to avoid a global locking
3658  * all around.
3659  */
3660 static void *s_start(struct seq_file *m, loff_t *pos)
3661 {
3662         struct trace_iterator *iter = m->private;
3663         struct trace_array *tr = iter->tr;
3664         int cpu_file = iter->cpu_file;
3665         void *p = NULL;
3666         loff_t l = 0;
3667         int cpu;
3668
3669         /*
3670          * copy the tracer to avoid using a global lock all around.
3671          * iter->trace is a copy of current_trace, the pointer to the
3672          * name may be used instead of a strcmp(), as iter->trace->name
3673          * will point to the same string as current_trace->name.
3674          */
3675         mutex_lock(&trace_types_lock);
3676         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3677                 *iter->trace = *tr->current_trace;
3678         mutex_unlock(&trace_types_lock);
3679
3680 #ifdef CONFIG_TRACER_MAX_TRACE
3681         if (iter->snapshot && iter->trace->use_max_tr)
3682                 return ERR_PTR(-EBUSY);
3683 #endif
3684
3685         if (!iter->snapshot)
3686                 atomic_inc(&trace_record_taskinfo_disabled);
3687
3688         if (*pos != iter->pos) {
3689                 iter->ent = NULL;
3690                 iter->cpu = 0;
3691                 iter->idx = -1;
3692
3693                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3694                         for_each_tracing_cpu(cpu)
3695                                 tracing_iter_reset(iter, cpu);
3696                 } else
3697                         tracing_iter_reset(iter, cpu_file);
3698
3699                 iter->leftover = 0;
3700                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3701                         ;
3702
3703         } else {
3704                 /*
3705                  * If we overflowed the seq_file before, then we want
3706                  * to just reuse the trace_seq buffer again.
3707                  */
3708                 if (iter->leftover)
3709                         p = iter;
3710                 else {
3711                         l = *pos - 1;
3712                         p = s_next(m, p, &l);
3713                 }
3714         }
3715
3716         trace_event_read_lock();
3717         trace_access_lock(cpu_file);
3718         return p;
3719 }
3720
3721 static void s_stop(struct seq_file *m, void *p)
3722 {
3723         struct trace_iterator *iter = m->private;
3724
3725 #ifdef CONFIG_TRACER_MAX_TRACE
3726         if (iter->snapshot && iter->trace->use_max_tr)
3727                 return;
3728 #endif
3729
3730         if (!iter->snapshot)
3731                 atomic_dec(&trace_record_taskinfo_disabled);
3732
3733         trace_access_unlock(iter->cpu_file);
3734         trace_event_read_unlock();
3735 }
3736
3737 static void
3738 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3739                       unsigned long *entries, int cpu)
3740 {
3741         unsigned long count;
3742
3743         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3744         /*
3745          * If this buffer has skipped entries, then we hold all
3746          * entries for the trace and we need to ignore the
3747          * ones before the time stamp.
3748          */
3749         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3750                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3751                 /* total is the same as the entries */
3752                 *total = count;
3753         } else
3754                 *total = count +
3755                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3756         *entries = count;
3757 }
3758
3759 static void
3760 get_total_entries(struct array_buffer *buf,
3761                   unsigned long *total, unsigned long *entries)
3762 {
3763         unsigned long t, e;
3764         int cpu;
3765
3766         *total = 0;
3767         *entries = 0;
3768
3769         for_each_tracing_cpu(cpu) {
3770                 get_total_entries_cpu(buf, &t, &e, cpu);
3771                 *total += t;
3772                 *entries += e;
3773         }
3774 }
3775
3776 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3777 {
3778         unsigned long total, entries;
3779
3780         if (!tr)
3781                 tr = &global_trace;
3782
3783         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3784
3785         return entries;
3786 }
3787
3788 unsigned long trace_total_entries(struct trace_array *tr)
3789 {
3790         unsigned long total, entries;
3791
3792         if (!tr)
3793                 tr = &global_trace;
3794
3795         get_total_entries(&tr->array_buffer, &total, &entries);
3796
3797         return entries;
3798 }
3799
3800 static void print_lat_help_header(struct seq_file *m)
3801 {
3802         seq_puts(m, "#                    _------=> CPU#            \n"
3803                     "#                   / _-----=> irqs-off        \n"
3804                     "#                  | / _----=> need-resched    \n"
3805                     "#                  || / _---=> hardirq/softirq \n"
3806                     "#                  ||| / _--=> preempt-depth   \n"
3807                     "#                  |||| /     delay            \n"
3808                     "#  cmd     pid     ||||| time  |   caller      \n"
3809                     "#     \\   /        |||||  \\    |   /         \n");
3810 }
3811
3812 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3813 {
3814         unsigned long total;
3815         unsigned long entries;
3816
3817         get_total_entries(buf, &total, &entries);
3818         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3819                    entries, total, num_online_cpus());
3820         seq_puts(m, "#\n");
3821 }
3822
3823 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3824                                    unsigned int flags)
3825 {
3826         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3827
3828         print_event_info(buf, m);
3829
3830         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3831         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3832 }
3833
3834 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3835                                        unsigned int flags)
3836 {
3837         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3838         const char *space = "            ";
3839         int prec = tgid ? 12 : 2;
3840
3841         print_event_info(buf, m);
3842
3843         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3844         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3845         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3846         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3847         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3848         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3849         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3850 }
3851
3852 void
3853 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3854 {
3855         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3856         struct array_buffer *buf = iter->array_buffer;
3857         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3858         struct tracer *type = iter->trace;
3859         unsigned long entries;
3860         unsigned long total;
3861         const char *name = "preemption";
3862
3863         name = type->name;
3864
3865         get_total_entries(buf, &total, &entries);
3866
3867         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3868                    name, UTS_RELEASE);
3869         seq_puts(m, "# -----------------------------------"
3870                  "---------------------------------\n");
3871         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3872                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3873                    nsecs_to_usecs(data->saved_latency),
3874                    entries,
3875                    total,
3876                    buf->cpu,
3877 #if defined(CONFIG_PREEMPT_NONE)
3878                    "server",
3879 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3880                    "desktop",
3881 #elif defined(CONFIG_PREEMPT)
3882                    "preempt",
3883 #elif defined(CONFIG_PREEMPT_RT)
3884                    "preempt_rt",
3885 #else
3886                    "unknown",
3887 #endif
3888                    /* These are reserved for later use */
3889                    0, 0, 0, 0);
3890 #ifdef CONFIG_SMP
3891         seq_printf(m, " #P:%d)\n", num_online_cpus());
3892 #else
3893         seq_puts(m, ")\n");
3894 #endif
3895         seq_puts(m, "#    -----------------\n");
3896         seq_printf(m, "#    | task: %.16s-%d "
3897                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3898                    data->comm, data->pid,
3899                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3900                    data->policy, data->rt_priority);
3901         seq_puts(m, "#    -----------------\n");
3902
3903         if (data->critical_start) {
3904                 seq_puts(m, "#  => started at: ");
3905                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3906                 trace_print_seq(m, &iter->seq);
3907                 seq_puts(m, "\n#  => ended at:   ");
3908                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3909                 trace_print_seq(m, &iter->seq);
3910                 seq_puts(m, "\n#\n");
3911         }
3912
3913         seq_puts(m, "#\n");
3914 }
3915
3916 static void test_cpu_buff_start(struct trace_iterator *iter)
3917 {
3918         struct trace_seq *s = &iter->seq;
3919         struct trace_array *tr = iter->tr;
3920
3921         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3922                 return;
3923
3924         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3925                 return;
3926
3927         if (cpumask_available(iter->started) &&
3928             cpumask_test_cpu(iter->cpu, iter->started))
3929                 return;
3930
3931         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3932                 return;
3933
3934         if (cpumask_available(iter->started))
3935                 cpumask_set_cpu(iter->cpu, iter->started);
3936
3937         /* Don't print started cpu buffer for the first entry of the trace */
3938         if (iter->idx > 1)
3939                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3940                                 iter->cpu);
3941 }
3942
3943 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3944 {
3945         struct trace_array *tr = iter->tr;
3946         struct trace_seq *s = &iter->seq;
3947         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3948         struct trace_entry *entry;
3949         struct trace_event *event;
3950
3951         entry = iter->ent;
3952
3953         test_cpu_buff_start(iter);
3954
3955         event = ftrace_find_event(entry->type);
3956
3957         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3958                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3959                         trace_print_lat_context(iter);
3960                 else
3961                         trace_print_context(iter);
3962         }
3963
3964         if (trace_seq_has_overflowed(s))
3965                 return TRACE_TYPE_PARTIAL_LINE;
3966
3967         if (event)
3968                 return event->funcs->trace(iter, sym_flags, event);
3969
3970         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3971
3972         return trace_handle_return(s);
3973 }
3974
3975 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3976 {
3977         struct trace_array *tr = iter->tr;
3978         struct trace_seq *s = &iter->seq;
3979         struct trace_entry *entry;
3980         struct trace_event *event;
3981
3982         entry = iter->ent;
3983
3984         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3985                 trace_seq_printf(s, "%d %d %llu ",
3986                                  entry->pid, iter->cpu, iter->ts);
3987
3988         if (trace_seq_has_overflowed(s))
3989                 return TRACE_TYPE_PARTIAL_LINE;
3990
3991         event = ftrace_find_event(entry->type);
3992         if (event)
3993                 return event->funcs->raw(iter, 0, event);
3994
3995         trace_seq_printf(s, "%d ?\n", entry->type);
3996
3997         return trace_handle_return(s);
3998 }
3999
4000 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4001 {
4002         struct trace_array *tr = iter->tr;
4003         struct trace_seq *s = &iter->seq;
4004         unsigned char newline = '\n';
4005         struct trace_entry *entry;
4006         struct trace_event *event;
4007
4008         entry = iter->ent;
4009
4010         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4011                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4012                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4013                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4014                 if (trace_seq_has_overflowed(s))
4015                         return TRACE_TYPE_PARTIAL_LINE;
4016         }
4017
4018         event = ftrace_find_event(entry->type);
4019         if (event) {
4020                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4021                 if (ret != TRACE_TYPE_HANDLED)
4022                         return ret;
4023         }
4024
4025         SEQ_PUT_FIELD(s, newline);
4026
4027         return trace_handle_return(s);
4028 }
4029
4030 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4031 {
4032         struct trace_array *tr = iter->tr;
4033         struct trace_seq *s = &iter->seq;
4034         struct trace_entry *entry;
4035         struct trace_event *event;
4036
4037         entry = iter->ent;
4038
4039         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4040                 SEQ_PUT_FIELD(s, entry->pid);
4041                 SEQ_PUT_FIELD(s, iter->cpu);
4042                 SEQ_PUT_FIELD(s, iter->ts);
4043                 if (trace_seq_has_overflowed(s))
4044                         return TRACE_TYPE_PARTIAL_LINE;
4045         }
4046
4047         event = ftrace_find_event(entry->type);
4048         return event ? event->funcs->binary(iter, 0, event) :
4049                 TRACE_TYPE_HANDLED;
4050 }
4051
4052 int trace_empty(struct trace_iterator *iter)
4053 {
4054         struct ring_buffer_iter *buf_iter;
4055         int cpu;
4056
4057         /* If we are looking at one CPU buffer, only check that one */
4058         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4059                 cpu = iter->cpu_file;
4060                 buf_iter = trace_buffer_iter(iter, cpu);
4061                 if (buf_iter) {
4062                         if (!ring_buffer_iter_empty(buf_iter))
4063                                 return 0;
4064                 } else {
4065                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4066                                 return 0;
4067                 }
4068                 return 1;
4069         }
4070
4071         for_each_tracing_cpu(cpu) {
4072                 buf_iter = trace_buffer_iter(iter, cpu);
4073                 if (buf_iter) {
4074                         if (!ring_buffer_iter_empty(buf_iter))
4075                                 return 0;
4076                 } else {
4077                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4078                                 return 0;
4079                 }
4080         }
4081
4082         return 1;
4083 }
4084
4085 /*  Called with trace_event_read_lock() held. */
4086 enum print_line_t print_trace_line(struct trace_iterator *iter)
4087 {
4088         struct trace_array *tr = iter->tr;
4089         unsigned long trace_flags = tr->trace_flags;
4090         enum print_line_t ret;
4091
4092         if (iter->lost_events) {
4093                 if (iter->lost_events == (unsigned long)-1)
4094                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4095                                          iter->cpu);
4096                 else
4097                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4098                                          iter->cpu, iter->lost_events);
4099                 if (trace_seq_has_overflowed(&iter->seq))
4100                         return TRACE_TYPE_PARTIAL_LINE;
4101         }
4102
4103         if (iter->trace && iter->trace->print_line) {
4104                 ret = iter->trace->print_line(iter);
4105                 if (ret != TRACE_TYPE_UNHANDLED)
4106                         return ret;
4107         }
4108
4109         if (iter->ent->type == TRACE_BPUTS &&
4110                         trace_flags & TRACE_ITER_PRINTK &&
4111                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4112                 return trace_print_bputs_msg_only(iter);
4113
4114         if (iter->ent->type == TRACE_BPRINT &&
4115                         trace_flags & TRACE_ITER_PRINTK &&
4116                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4117                 return trace_print_bprintk_msg_only(iter);
4118
4119         if (iter->ent->type == TRACE_PRINT &&
4120                         trace_flags & TRACE_ITER_PRINTK &&
4121                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4122                 return trace_print_printk_msg_only(iter);
4123
4124         if (trace_flags & TRACE_ITER_BIN)
4125                 return print_bin_fmt(iter);
4126
4127         if (trace_flags & TRACE_ITER_HEX)
4128                 return print_hex_fmt(iter);
4129
4130         if (trace_flags & TRACE_ITER_RAW)
4131                 return print_raw_fmt(iter);
4132
4133         return print_trace_fmt(iter);
4134 }
4135
4136 void trace_latency_header(struct seq_file *m)
4137 {
4138         struct trace_iterator *iter = m->private;
4139         struct trace_array *tr = iter->tr;
4140
4141         /* print nothing if the buffers are empty */
4142         if (trace_empty(iter))
4143                 return;
4144
4145         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4146                 print_trace_header(m, iter);
4147
4148         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4149                 print_lat_help_header(m);
4150 }
4151
4152 void trace_default_header(struct seq_file *m)
4153 {
4154         struct trace_iterator *iter = m->private;
4155         struct trace_array *tr = iter->tr;
4156         unsigned long trace_flags = tr->trace_flags;
4157
4158         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4159                 return;
4160
4161         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4162                 /* print nothing if the buffers are empty */
4163                 if (trace_empty(iter))
4164                         return;
4165                 print_trace_header(m, iter);
4166                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4167                         print_lat_help_header(m);
4168         } else {
4169                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4170                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4171                                 print_func_help_header_irq(iter->array_buffer,
4172                                                            m, trace_flags);
4173                         else
4174                                 print_func_help_header(iter->array_buffer, m,
4175                                                        trace_flags);
4176                 }
4177         }
4178 }
4179
4180 static void test_ftrace_alive(struct seq_file *m)
4181 {
4182         if (!ftrace_is_dead())
4183                 return;
4184         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4185                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4186 }
4187
4188 #ifdef CONFIG_TRACER_MAX_TRACE
4189 static void show_snapshot_main_help(struct seq_file *m)
4190 {
4191         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4192                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4193                     "#                      Takes a snapshot of the main buffer.\n"
4194                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4195                     "#                      (Doesn't have to be '2' works with any number that\n"
4196                     "#                       is not a '0' or '1')\n");
4197 }
4198
4199 static void show_snapshot_percpu_help(struct seq_file *m)
4200 {
4201         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4202 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4203         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4204                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4205 #else
4206         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4207                     "#                     Must use main snapshot file to allocate.\n");
4208 #endif
4209         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4210                     "#                      (Doesn't have to be '2' works with any number that\n"
4211                     "#                       is not a '0' or '1')\n");
4212 }
4213
4214 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4215 {
4216         if (iter->tr->allocated_snapshot)
4217                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4218         else
4219                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4220
4221         seq_puts(m, "# Snapshot commands:\n");
4222         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4223                 show_snapshot_main_help(m);
4224         else
4225                 show_snapshot_percpu_help(m);
4226 }
4227 #else
4228 /* Should never be called */
4229 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4230 #endif
4231
4232 static int s_show(struct seq_file *m, void *v)
4233 {
4234         struct trace_iterator *iter = v;
4235         int ret;
4236
4237         if (iter->ent == NULL) {
4238                 if (iter->tr) {
4239                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4240                         seq_puts(m, "#\n");
4241                         test_ftrace_alive(m);
4242                 }
4243                 if (iter->snapshot && trace_empty(iter))
4244                         print_snapshot_help(m, iter);
4245                 else if (iter->trace && iter->trace->print_header)
4246                         iter->trace->print_header(m);
4247                 else
4248                         trace_default_header(m);
4249
4250         } else if (iter->leftover) {
4251                 /*
4252                  * If we filled the seq_file buffer earlier, we
4253                  * want to just show it now.
4254                  */
4255                 ret = trace_print_seq(m, &iter->seq);
4256
4257                 /* ret should this time be zero, but you never know */
4258                 iter->leftover = ret;
4259
4260         } else {
4261                 print_trace_line(iter);
4262                 ret = trace_print_seq(m, &iter->seq);
4263                 /*
4264                  * If we overflow the seq_file buffer, then it will
4265                  * ask us for this data again at start up.
4266                  * Use that instead.
4267                  *  ret is 0 if seq_file write succeeded.
4268                  *        -1 otherwise.
4269                  */
4270                 iter->leftover = ret;
4271         }
4272
4273         return 0;
4274 }
4275
4276 /*
4277  * Should be used after trace_array_get(), trace_types_lock
4278  * ensures that i_cdev was already initialized.
4279  */
4280 static inline int tracing_get_cpu(struct inode *inode)
4281 {
4282         if (inode->i_cdev) /* See trace_create_cpu_file() */
4283                 return (long)inode->i_cdev - 1;
4284         return RING_BUFFER_ALL_CPUS;
4285 }
4286
4287 static const struct seq_operations tracer_seq_ops = {
4288         .start          = s_start,
4289         .next           = s_next,
4290         .stop           = s_stop,
4291         .show           = s_show,
4292 };
4293
4294 static struct trace_iterator *
4295 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4296 {
4297         struct trace_array *tr = inode->i_private;
4298         struct trace_iterator *iter;
4299         int cpu;
4300
4301         if (tracing_disabled)
4302                 return ERR_PTR(-ENODEV);
4303
4304         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4305         if (!iter)
4306                 return ERR_PTR(-ENOMEM);
4307
4308         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4309                                     GFP_KERNEL);
4310         if (!iter->buffer_iter)
4311                 goto release;
4312
4313         /*
4314          * trace_find_next_entry() may need to save off iter->ent.
4315          * It will place it into the iter->temp buffer. As most
4316          * events are less than 128, allocate a buffer of that size.
4317          * If one is greater, then trace_find_next_entry() will
4318          * allocate a new buffer to adjust for the bigger iter->ent.
4319          * It's not critical if it fails to get allocated here.
4320          */
4321         iter->temp = kmalloc(128, GFP_KERNEL);
4322         if (iter->temp)
4323                 iter->temp_size = 128;
4324
4325         /*
4326          * We make a copy of the current tracer to avoid concurrent
4327          * changes on it while we are reading.
4328          */
4329         mutex_lock(&trace_types_lock);
4330         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4331         if (!iter->trace)
4332                 goto fail;
4333
4334         *iter->trace = *tr->current_trace;
4335
4336         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4337                 goto fail;
4338
4339         iter->tr = tr;
4340
4341 #ifdef CONFIG_TRACER_MAX_TRACE
4342         /* Currently only the top directory has a snapshot */
4343         if (tr->current_trace->print_max || snapshot)
4344                 iter->array_buffer = &tr->max_buffer;
4345         else
4346 #endif
4347                 iter->array_buffer = &tr->array_buffer;
4348         iter->snapshot = snapshot;
4349         iter->pos = -1;
4350         iter->cpu_file = tracing_get_cpu(inode);
4351         mutex_init(&iter->mutex);
4352
4353         /* Notify the tracer early; before we stop tracing. */
4354         if (iter->trace->open)
4355                 iter->trace->open(iter);
4356
4357         /* Annotate start of buffers if we had overruns */
4358         if (ring_buffer_overruns(iter->array_buffer->buffer))
4359                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4360
4361         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4362         if (trace_clocks[tr->clock_id].in_ns)
4363                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4364
4365         /*
4366          * If pause-on-trace is enabled, then stop the trace while
4367          * dumping, unless this is the "snapshot" file
4368          */
4369         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4370                 tracing_stop_tr(tr);
4371
4372         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4373                 for_each_tracing_cpu(cpu) {
4374                         iter->buffer_iter[cpu] =
4375                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4376                                                          cpu, GFP_KERNEL);
4377                 }
4378                 ring_buffer_read_prepare_sync();
4379                 for_each_tracing_cpu(cpu) {
4380                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4381                         tracing_iter_reset(iter, cpu);
4382                 }
4383         } else {
4384                 cpu = iter->cpu_file;
4385                 iter->buffer_iter[cpu] =
4386                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4387                                                  cpu, GFP_KERNEL);
4388                 ring_buffer_read_prepare_sync();
4389                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4390                 tracing_iter_reset(iter, cpu);
4391         }
4392
4393         mutex_unlock(&trace_types_lock);
4394
4395         return iter;
4396
4397  fail:
4398         mutex_unlock(&trace_types_lock);
4399         kfree(iter->trace);
4400         kfree(iter->temp);
4401         kfree(iter->buffer_iter);
4402 release:
4403         seq_release_private(inode, file);
4404         return ERR_PTR(-ENOMEM);
4405 }
4406
4407 int tracing_open_generic(struct inode *inode, struct file *filp)
4408 {
4409         int ret;
4410
4411         ret = tracing_check_open_get_tr(NULL);
4412         if (ret)
4413                 return ret;
4414
4415         filp->private_data = inode->i_private;
4416         return 0;
4417 }
4418
4419 bool tracing_is_disabled(void)
4420 {
4421         return (tracing_disabled) ? true: false;
4422 }
4423
4424 /*
4425  * Open and update trace_array ref count.
4426  * Must have the current trace_array passed to it.
4427  */
4428 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4429 {
4430         struct trace_array *tr = inode->i_private;
4431         int ret;
4432
4433         ret = tracing_check_open_get_tr(tr);
4434         if (ret)
4435                 return ret;
4436
4437         filp->private_data = inode->i_private;
4438
4439         return 0;
4440 }
4441
4442 static int tracing_release(struct inode *inode, struct file *file)
4443 {
4444         struct trace_array *tr = inode->i_private;
4445         struct seq_file *m = file->private_data;
4446         struct trace_iterator *iter;
4447         int cpu;
4448
4449         if (!(file->f_mode & FMODE_READ)) {
4450                 trace_array_put(tr);
4451                 return 0;
4452         }
4453
4454         /* Writes do not use seq_file */
4455         iter = m->private;
4456         mutex_lock(&trace_types_lock);
4457
4458         for_each_tracing_cpu(cpu) {
4459                 if (iter->buffer_iter[cpu])
4460                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4461         }
4462
4463         if (iter->trace && iter->trace->close)
4464                 iter->trace->close(iter);
4465
4466         if (!iter->snapshot && tr->stop_count)
4467                 /* reenable tracing if it was previously enabled */
4468                 tracing_start_tr(tr);
4469
4470         __trace_array_put(tr);
4471
4472         mutex_unlock(&trace_types_lock);
4473
4474         mutex_destroy(&iter->mutex);
4475         free_cpumask_var(iter->started);
4476         kfree(iter->temp);
4477         kfree(iter->trace);
4478         kfree(iter->buffer_iter);
4479         seq_release_private(inode, file);
4480
4481         return 0;
4482 }
4483
4484 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4485 {
4486         struct trace_array *tr = inode->i_private;
4487
4488         trace_array_put(tr);
4489         return 0;
4490 }
4491
4492 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4493 {
4494         struct trace_array *tr = inode->i_private;
4495
4496         trace_array_put(tr);
4497
4498         return single_release(inode, file);
4499 }
4500
4501 static int tracing_open(struct inode *inode, struct file *file)
4502 {
4503         struct trace_array *tr = inode->i_private;
4504         struct trace_iterator *iter;
4505         int ret;
4506
4507         ret = tracing_check_open_get_tr(tr);
4508         if (ret)
4509                 return ret;
4510
4511         /* If this file was open for write, then erase contents */
4512         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4513                 int cpu = tracing_get_cpu(inode);
4514                 struct array_buffer *trace_buf = &tr->array_buffer;
4515
4516 #ifdef CONFIG_TRACER_MAX_TRACE
4517                 if (tr->current_trace->print_max)
4518                         trace_buf = &tr->max_buffer;
4519 #endif
4520
4521                 if (cpu == RING_BUFFER_ALL_CPUS)
4522                         tracing_reset_online_cpus(trace_buf);
4523                 else
4524                         tracing_reset_cpu(trace_buf, cpu);
4525         }
4526
4527         if (file->f_mode & FMODE_READ) {
4528                 iter = __tracing_open(inode, file, false);
4529                 if (IS_ERR(iter))
4530                         ret = PTR_ERR(iter);
4531                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4532                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4533         }
4534
4535         if (ret < 0)
4536                 trace_array_put(tr);
4537
4538         return ret;
4539 }
4540
4541 /*
4542  * Some tracers are not suitable for instance buffers.
4543  * A tracer is always available for the global array (toplevel)
4544  * or if it explicitly states that it is.
4545  */
4546 static bool
4547 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4548 {
4549         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4550 }
4551
4552 /* Find the next tracer that this trace array may use */
4553 static struct tracer *
4554 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4555 {
4556         while (t && !trace_ok_for_array(t, tr))
4557                 t = t->next;
4558
4559         return t;
4560 }
4561
4562 static void *
4563 t_next(struct seq_file *m, void *v, loff_t *pos)
4564 {
4565         struct trace_array *tr = m->private;
4566         struct tracer *t = v;
4567
4568         (*pos)++;
4569
4570         if (t)
4571                 t = get_tracer_for_array(tr, t->next);
4572
4573         return t;
4574 }
4575
4576 static void *t_start(struct seq_file *m, loff_t *pos)
4577 {
4578         struct trace_array *tr = m->private;
4579         struct tracer *t;
4580         loff_t l = 0;
4581
4582         mutex_lock(&trace_types_lock);
4583
4584         t = get_tracer_for_array(tr, trace_types);
4585         for (; t && l < *pos; t = t_next(m, t, &l))
4586                         ;
4587
4588         return t;
4589 }
4590
4591 static void t_stop(struct seq_file *m, void *p)
4592 {
4593         mutex_unlock(&trace_types_lock);
4594 }
4595
4596 static int t_show(struct seq_file *m, void *v)
4597 {
4598         struct tracer *t = v;
4599
4600         if (!t)
4601                 return 0;
4602
4603         seq_puts(m, t->name);
4604         if (t->next)
4605                 seq_putc(m, ' ');
4606         else
4607                 seq_putc(m, '\n');
4608
4609         return 0;
4610 }
4611
4612 static const struct seq_operations show_traces_seq_ops = {
4613         .start          = t_start,
4614         .next           = t_next,
4615         .stop           = t_stop,
4616         .show           = t_show,
4617 };
4618
4619 static int show_traces_open(struct inode *inode, struct file *file)
4620 {
4621         struct trace_array *tr = inode->i_private;
4622         struct seq_file *m;
4623         int ret;
4624
4625         ret = tracing_check_open_get_tr(tr);
4626         if (ret)
4627                 return ret;
4628
4629         ret = seq_open(file, &show_traces_seq_ops);
4630         if (ret) {
4631                 trace_array_put(tr);
4632                 return ret;
4633         }
4634
4635         m = file->private_data;
4636         m->private = tr;
4637
4638         return 0;
4639 }
4640
4641 static int show_traces_release(struct inode *inode, struct file *file)
4642 {
4643         struct trace_array *tr = inode->i_private;
4644
4645         trace_array_put(tr);
4646         return seq_release(inode, file);
4647 }
4648
4649 static ssize_t
4650 tracing_write_stub(struct file *filp, const char __user *ubuf,
4651                    size_t count, loff_t *ppos)
4652 {
4653         return count;
4654 }
4655
4656 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4657 {
4658         int ret;
4659
4660         if (file->f_mode & FMODE_READ)
4661                 ret = seq_lseek(file, offset, whence);
4662         else
4663                 file->f_pos = ret = 0;
4664
4665         return ret;
4666 }
4667
4668 static const struct file_operations tracing_fops = {
4669         .open           = tracing_open,
4670         .read           = seq_read,
4671         .write          = tracing_write_stub,
4672         .llseek         = tracing_lseek,
4673         .release        = tracing_release,
4674 };
4675
4676 static const struct file_operations show_traces_fops = {
4677         .open           = show_traces_open,
4678         .read           = seq_read,
4679         .llseek         = seq_lseek,
4680         .release        = show_traces_release,
4681 };
4682
4683 static ssize_t
4684 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4685                      size_t count, loff_t *ppos)
4686 {
4687         struct trace_array *tr = file_inode(filp)->i_private;
4688         char *mask_str;
4689         int len;
4690
4691         len = snprintf(NULL, 0, "%*pb\n",
4692                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4693         mask_str = kmalloc(len, GFP_KERNEL);
4694         if (!mask_str)
4695                 return -ENOMEM;
4696
4697         len = snprintf(mask_str, len, "%*pb\n",
4698                        cpumask_pr_args(tr->tracing_cpumask));
4699         if (len >= count) {
4700                 count = -EINVAL;
4701                 goto out_err;
4702         }
4703         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4704
4705 out_err:
4706         kfree(mask_str);
4707
4708         return count;
4709 }
4710
4711 int tracing_set_cpumask(struct trace_array *tr,
4712                         cpumask_var_t tracing_cpumask_new)
4713 {
4714         int cpu;
4715
4716         if (!tr)
4717                 return -EINVAL;
4718
4719         local_irq_disable();
4720         arch_spin_lock(&tr->max_lock);
4721         for_each_tracing_cpu(cpu) {
4722                 /*
4723                  * Increase/decrease the disabled counter if we are
4724                  * about to flip a bit in the cpumask:
4725                  */
4726                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4727                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4728                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4729                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4730                 }
4731                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4732                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4733                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4734                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4735                 }
4736         }
4737         arch_spin_unlock(&tr->max_lock);
4738         local_irq_enable();
4739
4740         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4741
4742         return 0;
4743 }
4744
4745 static ssize_t
4746 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4747                       size_t count, loff_t *ppos)
4748 {
4749         struct trace_array *tr = file_inode(filp)->i_private;
4750         cpumask_var_t tracing_cpumask_new;
4751         int err;
4752
4753         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4754                 return -ENOMEM;
4755
4756         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4757         if (err)
4758                 goto err_free;
4759
4760         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4761         if (err)
4762                 goto err_free;
4763
4764         free_cpumask_var(tracing_cpumask_new);
4765
4766         return count;
4767
4768 err_free:
4769         free_cpumask_var(tracing_cpumask_new);
4770
4771         return err;
4772 }
4773
4774 static const struct file_operations tracing_cpumask_fops = {
4775         .open           = tracing_open_generic_tr,
4776         .read           = tracing_cpumask_read,
4777         .write          = tracing_cpumask_write,
4778         .release        = tracing_release_generic_tr,
4779         .llseek         = generic_file_llseek,
4780 };
4781
4782 static int tracing_trace_options_show(struct seq_file *m, void *v)
4783 {
4784         struct tracer_opt *trace_opts;
4785         struct trace_array *tr = m->private;
4786         u32 tracer_flags;
4787         int i;
4788
4789         mutex_lock(&trace_types_lock);
4790         tracer_flags = tr->current_trace->flags->val;
4791         trace_opts = tr->current_trace->flags->opts;
4792
4793         for (i = 0; trace_options[i]; i++) {
4794                 if (tr->trace_flags & (1 << i))
4795                         seq_printf(m, "%s\n", trace_options[i]);
4796                 else
4797                         seq_printf(m, "no%s\n", trace_options[i]);
4798         }
4799
4800         for (i = 0; trace_opts[i].name; i++) {
4801                 if (tracer_flags & trace_opts[i].bit)
4802                         seq_printf(m, "%s\n", trace_opts[i].name);
4803                 else
4804                         seq_printf(m, "no%s\n", trace_opts[i].name);
4805         }
4806         mutex_unlock(&trace_types_lock);
4807
4808         return 0;
4809 }
4810
4811 static int __set_tracer_option(struct trace_array *tr,
4812                                struct tracer_flags *tracer_flags,
4813                                struct tracer_opt *opts, int neg)
4814 {
4815         struct tracer *trace = tracer_flags->trace;
4816         int ret;
4817
4818         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4819         if (ret)
4820                 return ret;
4821
4822         if (neg)
4823                 tracer_flags->val &= ~opts->bit;
4824         else
4825                 tracer_flags->val |= opts->bit;
4826         return 0;
4827 }
4828
4829 /* Try to assign a tracer specific option */
4830 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4831 {
4832         struct tracer *trace = tr->current_trace;
4833         struct tracer_flags *tracer_flags = trace->flags;
4834         struct tracer_opt *opts = NULL;
4835         int i;
4836
4837         for (i = 0; tracer_flags->opts[i].name; i++) {
4838                 opts = &tracer_flags->opts[i];
4839
4840                 if (strcmp(cmp, opts->name) == 0)
4841                         return __set_tracer_option(tr, trace->flags, opts, neg);
4842         }
4843
4844         return -EINVAL;
4845 }
4846
4847 /* Some tracers require overwrite to stay enabled */
4848 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4849 {
4850         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4851                 return -1;
4852
4853         return 0;
4854 }
4855
4856 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4857 {
4858         if ((mask == TRACE_ITER_RECORD_TGID) ||
4859             (mask == TRACE_ITER_RECORD_CMD))
4860                 lockdep_assert_held(&event_mutex);
4861
4862         /* do nothing if flag is already set */
4863         if (!!(tr->trace_flags & mask) == !!enabled)
4864                 return 0;
4865
4866         /* Give the tracer a chance to approve the change */
4867         if (tr->current_trace->flag_changed)
4868                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4869                         return -EINVAL;
4870
4871         if (enabled)
4872                 tr->trace_flags |= mask;
4873         else
4874                 tr->trace_flags &= ~mask;
4875
4876         if (mask == TRACE_ITER_RECORD_CMD)
4877                 trace_event_enable_cmd_record(enabled);
4878
4879         if (mask == TRACE_ITER_RECORD_TGID) {
4880                 if (!tgid_map)
4881                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4882                                            sizeof(*tgid_map),
4883                                            GFP_KERNEL);
4884                 if (!tgid_map) {
4885                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4886                         return -ENOMEM;
4887                 }
4888
4889                 trace_event_enable_tgid_record(enabled);
4890         }
4891
4892         if (mask == TRACE_ITER_EVENT_FORK)
4893                 trace_event_follow_fork(tr, enabled);
4894
4895         if (mask == TRACE_ITER_FUNC_FORK)
4896                 ftrace_pid_follow_fork(tr, enabled);
4897
4898         if (mask == TRACE_ITER_OVERWRITE) {
4899                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4902 #endif
4903         }
4904
4905         if (mask == TRACE_ITER_PRINTK) {
4906                 trace_printk_start_stop_comm(enabled);
4907                 trace_printk_control(enabled);
4908         }
4909
4910         return 0;
4911 }
4912
4913 int trace_set_options(struct trace_array *tr, char *option)
4914 {
4915         char *cmp;
4916         int neg = 0;
4917         int ret;
4918         size_t orig_len = strlen(option);
4919         int len;
4920
4921         cmp = strstrip(option);
4922
4923         len = str_has_prefix(cmp, "no");
4924         if (len)
4925                 neg = 1;
4926
4927         cmp += len;
4928
4929         mutex_lock(&event_mutex);
4930         mutex_lock(&trace_types_lock);
4931
4932         ret = match_string(trace_options, -1, cmp);
4933         /* If no option could be set, test the specific tracer options */
4934         if (ret < 0)
4935                 ret = set_tracer_option(tr, cmp, neg);
4936         else
4937                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4938
4939         mutex_unlock(&trace_types_lock);
4940         mutex_unlock(&event_mutex);
4941
4942         /*
4943          * If the first trailing whitespace is replaced with '\0' by strstrip,
4944          * turn it back into a space.
4945          */
4946         if (orig_len > strlen(option))
4947                 option[strlen(option)] = ' ';
4948
4949         return ret;
4950 }
4951
4952 static void __init apply_trace_boot_options(void)
4953 {
4954         char *buf = trace_boot_options_buf;
4955         char *option;
4956
4957         while (true) {
4958                 option = strsep(&buf, ",");
4959
4960                 if (!option)
4961                         break;
4962
4963                 if (*option)
4964                         trace_set_options(&global_trace, option);
4965
4966                 /* Put back the comma to allow this to be called again */
4967                 if (buf)
4968                         *(buf - 1) = ',';
4969         }
4970 }
4971
4972 static ssize_t
4973 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4974                         size_t cnt, loff_t *ppos)
4975 {
4976         struct seq_file *m = filp->private_data;
4977         struct trace_array *tr = m->private;
4978         char buf[64];
4979         int ret;
4980
4981         if (cnt >= sizeof(buf))
4982                 return -EINVAL;
4983
4984         if (copy_from_user(buf, ubuf, cnt))
4985                 return -EFAULT;
4986
4987         buf[cnt] = 0;
4988
4989         ret = trace_set_options(tr, buf);
4990         if (ret < 0)
4991                 return ret;
4992
4993         *ppos += cnt;
4994
4995         return cnt;
4996 }
4997
4998 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4999 {
5000         struct trace_array *tr = inode->i_private;
5001         int ret;
5002
5003         ret = tracing_check_open_get_tr(tr);
5004         if (ret)
5005                 return ret;
5006
5007         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5008         if (ret < 0)
5009                 trace_array_put(tr);
5010
5011         return ret;
5012 }
5013
5014 static const struct file_operations tracing_iter_fops = {
5015         .open           = tracing_trace_options_open,
5016         .read           = seq_read,
5017         .llseek         = seq_lseek,
5018         .release        = tracing_single_release_tr,
5019         .write          = tracing_trace_options_write,
5020 };
5021
5022 static const char readme_msg[] =
5023         "tracing mini-HOWTO:\n\n"
5024         "# echo 0 > tracing_on : quick way to disable tracing\n"
5025         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5026         " Important files:\n"
5027         "  trace\t\t\t- The static contents of the buffer\n"
5028         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5029         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5030         "  current_tracer\t- function and latency tracers\n"
5031         "  available_tracers\t- list of configured tracers for current_tracer\n"
5032         "  error_log\t- error log for failed commands (that support it)\n"
5033         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5034         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5035         "  trace_clock\t\t-change the clock used to order events\n"
5036         "       local:   Per cpu clock but may not be synced across CPUs\n"
5037         "      global:   Synced across CPUs but slows tracing down.\n"
5038         "     counter:   Not a clock, but just an increment\n"
5039         "      uptime:   Jiffy counter from time of boot\n"
5040         "        perf:   Same clock that perf events use\n"
5041 #ifdef CONFIG_X86_64
5042         "     x86-tsc:   TSC cycle counter\n"
5043 #endif
5044         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5045         "       delta:   Delta difference against a buffer-wide timestamp\n"
5046         "    absolute:   Absolute (standalone) timestamp\n"
5047         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5048         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5049         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5050         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5051         "\t\t\t  Remove sub-buffer with rmdir\n"
5052         "  trace_options\t\t- Set format or modify how tracing happens\n"
5053         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5054         "\t\t\t  option name\n"
5055         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5056 #ifdef CONFIG_DYNAMIC_FTRACE
5057         "\n  available_filter_functions - list of functions that can be filtered on\n"
5058         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5059         "\t\t\t  functions\n"
5060         "\t     accepts: func_full_name or glob-matching-pattern\n"
5061         "\t     modules: Can select a group via module\n"
5062         "\t      Format: :mod:<module-name>\n"
5063         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5064         "\t    triggers: a command to perform when function is hit\n"
5065         "\t      Format: <function>:<trigger>[:count]\n"
5066         "\t     trigger: traceon, traceoff\n"
5067         "\t\t      enable_event:<system>:<event>\n"
5068         "\t\t      disable_event:<system>:<event>\n"
5069 #ifdef CONFIG_STACKTRACE
5070         "\t\t      stacktrace\n"
5071 #endif
5072 #ifdef CONFIG_TRACER_SNAPSHOT
5073         "\t\t      snapshot\n"
5074 #endif
5075         "\t\t      dump\n"
5076         "\t\t      cpudump\n"
5077         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5078         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5079         "\t     The first one will disable tracing every time do_fault is hit\n"
5080         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5081         "\t       The first time do trap is hit and it disables tracing, the\n"
5082         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5083         "\t       the counter will not decrement. It only decrements when the\n"
5084         "\t       trigger did work\n"
5085         "\t     To remove trigger without count:\n"
5086         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5087         "\t     To remove trigger with a count:\n"
5088         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5089         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5090         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5091         "\t    modules: Can select a group via module command :mod:\n"
5092         "\t    Does not accept triggers\n"
5093 #endif /* CONFIG_DYNAMIC_FTRACE */
5094 #ifdef CONFIG_FUNCTION_TRACER
5095         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5096         "\t\t    (function)\n"
5097         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5098         "\t\t    (function)\n"
5099 #endif
5100 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5101         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5102         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5103         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5104 #endif
5105 #ifdef CONFIG_TRACER_SNAPSHOT
5106         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5107         "\t\t\t  snapshot buffer. Read the contents for more\n"
5108         "\t\t\t  information\n"
5109 #endif
5110 #ifdef CONFIG_STACK_TRACER
5111         "  stack_trace\t\t- Shows the max stack trace when active\n"
5112         "  stack_max_size\t- Shows current max stack size that was traced\n"
5113         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5114         "\t\t\t  new trace)\n"
5115 #ifdef CONFIG_DYNAMIC_FTRACE
5116         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5117         "\t\t\t  traces\n"
5118 #endif
5119 #endif /* CONFIG_STACK_TRACER */
5120 #ifdef CONFIG_DYNAMIC_EVENTS
5121         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5122         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5123 #endif
5124 #ifdef CONFIG_KPROBE_EVENTS
5125         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5126         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5127 #endif
5128 #ifdef CONFIG_UPROBE_EVENTS
5129         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5130         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5131 #endif
5132 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5133         "\t  accepts: event-definitions (one definition per line)\n"
5134         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5135         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5136 #ifdef CONFIG_HIST_TRIGGERS
5137         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5138 #endif
5139         "\t           -:[<group>/]<event>\n"
5140 #ifdef CONFIG_KPROBE_EVENTS
5141         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5142   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5143 #endif
5144 #ifdef CONFIG_UPROBE_EVENTS
5145   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5146 #endif
5147         "\t     args: <name>=fetcharg[:type]\n"
5148         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5149 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5150         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5151 #else
5152         "\t           $stack<index>, $stack, $retval, $comm,\n"
5153 #endif
5154         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5155         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5156         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5157         "\t           <type>\\[<array-size>\\]\n"
5158 #ifdef CONFIG_HIST_TRIGGERS
5159         "\t    field: <stype> <name>;\n"
5160         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5161         "\t           [unsigned] char/int/long\n"
5162 #endif
5163 #endif
5164         "  events/\t\t- Directory containing all trace event subsystems:\n"
5165         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5166         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5167         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5168         "\t\t\t  events\n"
5169         "      filter\t\t- If set, only events passing filter are traced\n"
5170         "  events/<system>/<event>/\t- Directory containing control files for\n"
5171         "\t\t\t  <event>:\n"
5172         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5173         "      filter\t\t- If set, only events passing filter are traced\n"
5174         "      trigger\t\t- If set, a command to perform when event is hit\n"
5175         "\t    Format: <trigger>[:count][if <filter>]\n"
5176         "\t   trigger: traceon, traceoff\n"
5177         "\t            enable_event:<system>:<event>\n"
5178         "\t            disable_event:<system>:<event>\n"
5179 #ifdef CONFIG_HIST_TRIGGERS
5180         "\t            enable_hist:<system>:<event>\n"
5181         "\t            disable_hist:<system>:<event>\n"
5182 #endif
5183 #ifdef CONFIG_STACKTRACE
5184         "\t\t    stacktrace\n"
5185 #endif
5186 #ifdef CONFIG_TRACER_SNAPSHOT
5187         "\t\t    snapshot\n"
5188 #endif
5189 #ifdef CONFIG_HIST_TRIGGERS
5190         "\t\t    hist (see below)\n"
5191 #endif
5192         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5193         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5194         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5195         "\t                  events/block/block_unplug/trigger\n"
5196         "\t   The first disables tracing every time block_unplug is hit.\n"
5197         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5198         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5199         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5200         "\t   Like function triggers, the counter is only decremented if it\n"
5201         "\t    enabled or disabled tracing.\n"
5202         "\t   To remove a trigger without a count:\n"
5203         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5204         "\t   To remove a trigger with a count:\n"
5205         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5206         "\t   Filters can be ignored when removing a trigger.\n"
5207 #ifdef CONFIG_HIST_TRIGGERS
5208         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5209         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5210         "\t            [:values=<field1[,field2,...]>]\n"
5211         "\t            [:sort=<field1[,field2,...]>]\n"
5212         "\t            [:size=#entries]\n"
5213         "\t            [:pause][:continue][:clear]\n"
5214         "\t            [:name=histname1]\n"
5215         "\t            [:<handler>.<action>]\n"
5216         "\t            [if <filter>]\n\n"
5217         "\t    When a matching event is hit, an entry is added to a hash\n"
5218         "\t    table using the key(s) and value(s) named, and the value of a\n"
5219         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5220         "\t    correspond to fields in the event's format description.  Keys\n"
5221         "\t    can be any field, or the special string 'stacktrace'.\n"
5222         "\t    Compound keys consisting of up to two fields can be specified\n"
5223         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5224         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5225         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5226         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5227         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5228         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5229         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5230         "\t    its histogram data will be shared with other triggers of the\n"
5231         "\t    same name, and trigger hits will update this common data.\n\n"
5232         "\t    Reading the 'hist' file for the event will dump the hash\n"
5233         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5234         "\t    triggers attached to an event, there will be a table for each\n"
5235         "\t    trigger in the output.  The table displayed for a named\n"
5236         "\t    trigger will be the same as any other instance having the\n"
5237         "\t    same name.  The default format used to display a given field\n"
5238         "\t    can be modified by appending any of the following modifiers\n"
5239         "\t    to the field name, as applicable:\n\n"
5240         "\t            .hex        display a number as a hex value\n"
5241         "\t            .sym        display an address as a symbol\n"
5242         "\t            .sym-offset display an address as a symbol and offset\n"
5243         "\t            .execname   display a common_pid as a program name\n"
5244         "\t            .syscall    display a syscall id as a syscall name\n"
5245         "\t            .log2       display log2 value rather than raw number\n"
5246         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5247         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5248         "\t    trigger or to start a hist trigger but not log any events\n"
5249         "\t    until told to do so.  'continue' can be used to start or\n"
5250         "\t    restart a paused hist trigger.\n\n"
5251         "\t    The 'clear' parameter will clear the contents of a running\n"
5252         "\t    hist trigger and leave its current paused/active state\n"
5253         "\t    unchanged.\n\n"
5254         "\t    The enable_hist and disable_hist triggers can be used to\n"
5255         "\t    have one event conditionally start and stop another event's\n"
5256         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5257         "\t    the enable_event and disable_event triggers.\n\n"
5258         "\t    Hist trigger handlers and actions are executed whenever a\n"
5259         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5260         "\t        <handler>.<action>\n\n"
5261         "\t    The available handlers are:\n\n"
5262         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5263         "\t        onmax(var)               - invoke if var exceeds current max\n"
5264         "\t        onchange(var)            - invoke action if var changes\n\n"
5265         "\t    The available actions are:\n\n"
5266         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5267         "\t        save(field,...)                      - save current event fields\n"
5268 #ifdef CONFIG_TRACER_SNAPSHOT
5269         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5270 #endif
5271 #ifdef CONFIG_SYNTH_EVENTS
5272         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5273         "\t  Write into this file to define/undefine new synthetic events.\n"
5274         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5275 #endif
5276 #endif
5277 ;
5278
5279 static ssize_t
5280 tracing_readme_read(struct file *filp, char __user *ubuf,
5281                        size_t cnt, loff_t *ppos)
5282 {
5283         return simple_read_from_buffer(ubuf, cnt, ppos,
5284                                         readme_msg, strlen(readme_msg));
5285 }
5286
5287 static const struct file_operations tracing_readme_fops = {
5288         .open           = tracing_open_generic,
5289         .read           = tracing_readme_read,
5290         .llseek         = generic_file_llseek,
5291 };
5292
5293 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5294 {
5295         int *ptr = v;
5296
5297         if (*pos || m->count)
5298                 ptr++;
5299
5300         (*pos)++;
5301
5302         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5303                 if (trace_find_tgid(*ptr))
5304                         return ptr;
5305         }
5306
5307         return NULL;
5308 }
5309
5310 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5311 {
5312         void *v;
5313         loff_t l = 0;
5314
5315         if (!tgid_map)
5316                 return NULL;
5317
5318         v = &tgid_map[0];
5319         while (l <= *pos) {
5320                 v = saved_tgids_next(m, v, &l);
5321                 if (!v)
5322                         return NULL;
5323         }
5324
5325         return v;
5326 }
5327
5328 static void saved_tgids_stop(struct seq_file *m, void *v)
5329 {
5330 }
5331
5332 static int saved_tgids_show(struct seq_file *m, void *v)
5333 {
5334         int pid = (int *)v - tgid_map;
5335
5336         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5337         return 0;
5338 }
5339
5340 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5341         .start          = saved_tgids_start,
5342         .stop           = saved_tgids_stop,
5343         .next           = saved_tgids_next,
5344         .show           = saved_tgids_show,
5345 };
5346
5347 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5348 {
5349         int ret;
5350
5351         ret = tracing_check_open_get_tr(NULL);
5352         if (ret)
5353                 return ret;
5354
5355         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5356 }
5357
5358
5359 static const struct file_operations tracing_saved_tgids_fops = {
5360         .open           = tracing_saved_tgids_open,
5361         .read           = seq_read,
5362         .llseek         = seq_lseek,
5363         .release        = seq_release,
5364 };
5365
5366 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5367 {
5368         unsigned int *ptr = v;
5369
5370         if (*pos || m->count)
5371                 ptr++;
5372
5373         (*pos)++;
5374
5375         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5376              ptr++) {
5377                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5378                         continue;
5379
5380                 return ptr;
5381         }
5382
5383         return NULL;
5384 }
5385
5386 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5387 {
5388         void *v;
5389         loff_t l = 0;
5390
5391         preempt_disable();
5392         arch_spin_lock(&trace_cmdline_lock);
5393
5394         v = &savedcmd->map_cmdline_to_pid[0];
5395         while (l <= *pos) {
5396                 v = saved_cmdlines_next(m, v, &l);
5397                 if (!v)
5398                         return NULL;
5399         }
5400
5401         return v;
5402 }
5403
5404 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5405 {
5406         arch_spin_unlock(&trace_cmdline_lock);
5407         preempt_enable();
5408 }
5409
5410 static int saved_cmdlines_show(struct seq_file *m, void *v)
5411 {
5412         char buf[TASK_COMM_LEN];
5413         unsigned int *pid = v;
5414
5415         __trace_find_cmdline(*pid, buf);
5416         seq_printf(m, "%d %s\n", *pid, buf);
5417         return 0;
5418 }
5419
5420 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5421         .start          = saved_cmdlines_start,
5422         .next           = saved_cmdlines_next,
5423         .stop           = saved_cmdlines_stop,
5424         .show           = saved_cmdlines_show,
5425 };
5426
5427 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5428 {
5429         int ret;
5430
5431         ret = tracing_check_open_get_tr(NULL);
5432         if (ret)
5433                 return ret;
5434
5435         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5436 }
5437
5438 static const struct file_operations tracing_saved_cmdlines_fops = {
5439         .open           = tracing_saved_cmdlines_open,
5440         .read           = seq_read,
5441         .llseek         = seq_lseek,
5442         .release        = seq_release,
5443 };
5444
5445 static ssize_t
5446 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5447                                  size_t cnt, loff_t *ppos)
5448 {
5449         char buf[64];
5450         int r;
5451
5452         arch_spin_lock(&trace_cmdline_lock);
5453         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5454         arch_spin_unlock(&trace_cmdline_lock);
5455
5456         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5457 }
5458
5459 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5460 {
5461         kfree(s->saved_cmdlines);
5462         kfree(s->map_cmdline_to_pid);
5463         kfree(s);
5464 }
5465
5466 static int tracing_resize_saved_cmdlines(unsigned int val)
5467 {
5468         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5469
5470         s = kmalloc(sizeof(*s), GFP_KERNEL);
5471         if (!s)
5472                 return -ENOMEM;
5473
5474         if (allocate_cmdlines_buffer(val, s) < 0) {
5475                 kfree(s);
5476                 return -ENOMEM;
5477         }
5478
5479         arch_spin_lock(&trace_cmdline_lock);
5480         savedcmd_temp = savedcmd;
5481         savedcmd = s;
5482         arch_spin_unlock(&trace_cmdline_lock);
5483         free_saved_cmdlines_buffer(savedcmd_temp);
5484
5485         return 0;
5486 }
5487
5488 static ssize_t
5489 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5490                                   size_t cnt, loff_t *ppos)
5491 {
5492         unsigned long val;
5493         int ret;
5494
5495         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5496         if (ret)
5497                 return ret;
5498
5499         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5500         if (!val || val > PID_MAX_DEFAULT)
5501                 return -EINVAL;
5502
5503         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5504         if (ret < 0)
5505                 return ret;
5506
5507         *ppos += cnt;
5508
5509         return cnt;
5510 }
5511
5512 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5513         .open           = tracing_open_generic,
5514         .read           = tracing_saved_cmdlines_size_read,
5515         .write          = tracing_saved_cmdlines_size_write,
5516 };
5517
5518 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5519 static union trace_eval_map_item *
5520 update_eval_map(union trace_eval_map_item *ptr)
5521 {
5522         if (!ptr->map.eval_string) {
5523                 if (ptr->tail.next) {
5524                         ptr = ptr->tail.next;
5525                         /* Set ptr to the next real item (skip head) */
5526                         ptr++;
5527                 } else
5528                         return NULL;
5529         }
5530         return ptr;
5531 }
5532
5533 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5534 {
5535         union trace_eval_map_item *ptr = v;
5536
5537         /*
5538          * Paranoid! If ptr points to end, we don't want to increment past it.
5539          * This really should never happen.
5540          */
5541         (*pos)++;
5542         ptr = update_eval_map(ptr);
5543         if (WARN_ON_ONCE(!ptr))
5544                 return NULL;
5545
5546         ptr++;
5547         ptr = update_eval_map(ptr);
5548
5549         return ptr;
5550 }
5551
5552 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5553 {
5554         union trace_eval_map_item *v;
5555         loff_t l = 0;
5556
5557         mutex_lock(&trace_eval_mutex);
5558
5559         v = trace_eval_maps;
5560         if (v)
5561                 v++;
5562
5563         while (v && l < *pos) {
5564                 v = eval_map_next(m, v, &l);
5565         }
5566
5567         return v;
5568 }
5569
5570 static void eval_map_stop(struct seq_file *m, void *v)
5571 {
5572         mutex_unlock(&trace_eval_mutex);
5573 }
5574
5575 static int eval_map_show(struct seq_file *m, void *v)
5576 {
5577         union trace_eval_map_item *ptr = v;
5578
5579         seq_printf(m, "%s %ld (%s)\n",
5580                    ptr->map.eval_string, ptr->map.eval_value,
5581                    ptr->map.system);
5582
5583         return 0;
5584 }
5585
5586 static const struct seq_operations tracing_eval_map_seq_ops = {
5587         .start          = eval_map_start,
5588         .next           = eval_map_next,
5589         .stop           = eval_map_stop,
5590         .show           = eval_map_show,
5591 };
5592
5593 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5594 {
5595         int ret;
5596
5597         ret = tracing_check_open_get_tr(NULL);
5598         if (ret)
5599                 return ret;
5600
5601         return seq_open(filp, &tracing_eval_map_seq_ops);
5602 }
5603
5604 static const struct file_operations tracing_eval_map_fops = {
5605         .open           = tracing_eval_map_open,
5606         .read           = seq_read,
5607         .llseek         = seq_lseek,
5608         .release        = seq_release,
5609 };
5610
5611 static inline union trace_eval_map_item *
5612 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5613 {
5614         /* Return tail of array given the head */
5615         return ptr + ptr->head.length + 1;
5616 }
5617
5618 static void
5619 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5620                            int len)
5621 {
5622         struct trace_eval_map **stop;
5623         struct trace_eval_map **map;
5624         union trace_eval_map_item *map_array;
5625         union trace_eval_map_item *ptr;
5626
5627         stop = start + len;
5628
5629         /*
5630          * The trace_eval_maps contains the map plus a head and tail item,
5631          * where the head holds the module and length of array, and the
5632          * tail holds a pointer to the next list.
5633          */
5634         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5635         if (!map_array) {
5636                 pr_warn("Unable to allocate trace eval mapping\n");
5637                 return;
5638         }
5639
5640         mutex_lock(&trace_eval_mutex);
5641
5642         if (!trace_eval_maps)
5643                 trace_eval_maps = map_array;
5644         else {
5645                 ptr = trace_eval_maps;
5646                 for (;;) {
5647                         ptr = trace_eval_jmp_to_tail(ptr);
5648                         if (!ptr->tail.next)
5649                                 break;
5650                         ptr = ptr->tail.next;
5651
5652                 }
5653                 ptr->tail.next = map_array;
5654         }
5655         map_array->head.mod = mod;
5656         map_array->head.length = len;
5657         map_array++;
5658
5659         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5660                 map_array->map = **map;
5661                 map_array++;
5662         }
5663         memset(map_array, 0, sizeof(*map_array));
5664
5665         mutex_unlock(&trace_eval_mutex);
5666 }
5667
5668 static void trace_create_eval_file(struct dentry *d_tracer)
5669 {
5670         trace_create_file("eval_map", 0444, d_tracer,
5671                           NULL, &tracing_eval_map_fops);
5672 }
5673
5674 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5675 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5676 static inline void trace_insert_eval_map_file(struct module *mod,
5677                               struct trace_eval_map **start, int len) { }
5678 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5679
5680 static void trace_insert_eval_map(struct module *mod,
5681                                   struct trace_eval_map **start, int len)
5682 {
5683         struct trace_eval_map **map;
5684
5685         if (len <= 0)
5686                 return;
5687
5688         map = start;
5689
5690         trace_event_eval_update(map, len);
5691
5692         trace_insert_eval_map_file(mod, start, len);
5693 }
5694
5695 static ssize_t
5696 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5697                        size_t cnt, loff_t *ppos)
5698 {
5699         struct trace_array *tr = filp->private_data;
5700         char buf[MAX_TRACER_SIZE+2];
5701         int r;
5702
5703         mutex_lock(&trace_types_lock);
5704         r = sprintf(buf, "%s\n", tr->current_trace->name);
5705         mutex_unlock(&trace_types_lock);
5706
5707         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5708 }
5709
5710 int tracer_init(struct tracer *t, struct trace_array *tr)
5711 {
5712         tracing_reset_online_cpus(&tr->array_buffer);
5713         return t->init(tr);
5714 }
5715
5716 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5717 {
5718         int cpu;
5719
5720         for_each_tracing_cpu(cpu)
5721                 per_cpu_ptr(buf->data, cpu)->entries = val;
5722 }
5723
5724 #ifdef CONFIG_TRACER_MAX_TRACE
5725 /* resize @tr's buffer to the size of @size_tr's entries */
5726 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5727                                         struct array_buffer *size_buf, int cpu_id)
5728 {
5729         int cpu, ret = 0;
5730
5731         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5732                 for_each_tracing_cpu(cpu) {
5733                         ret = ring_buffer_resize(trace_buf->buffer,
5734                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5735                         if (ret < 0)
5736                                 break;
5737                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5738                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5739                 }
5740         } else {
5741                 ret = ring_buffer_resize(trace_buf->buffer,
5742                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5743                 if (ret == 0)
5744                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5745                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5746         }
5747
5748         return ret;
5749 }
5750 #endif /* CONFIG_TRACER_MAX_TRACE */
5751
5752 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5753                                         unsigned long size, int cpu)
5754 {
5755         int ret;
5756
5757         /*
5758          * If kernel or user changes the size of the ring buffer
5759          * we use the size that was given, and we can forget about
5760          * expanding it later.
5761          */
5762         ring_buffer_expanded = true;
5763
5764         /* May be called before buffers are initialized */
5765         if (!tr->array_buffer.buffer)
5766                 return 0;
5767
5768         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5769         if (ret < 0)
5770                 return ret;
5771
5772 #ifdef CONFIG_TRACER_MAX_TRACE
5773         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5774             !tr->current_trace->use_max_tr)
5775                 goto out;
5776
5777         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5778         if (ret < 0) {
5779                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5780                                                      &tr->array_buffer, cpu);
5781                 if (r < 0) {
5782                         /*
5783                          * AARGH! We are left with different
5784                          * size max buffer!!!!
5785                          * The max buffer is our "snapshot" buffer.
5786                          * When a tracer needs a snapshot (one of the
5787                          * latency tracers), it swaps the max buffer
5788                          * with the saved snap shot. We succeeded to
5789                          * update the size of the main buffer, but failed to
5790                          * update the size of the max buffer. But when we tried
5791                          * to reset the main buffer to the original size, we
5792                          * failed there too. This is very unlikely to
5793                          * happen, but if it does, warn and kill all
5794                          * tracing.
5795                          */
5796                         WARN_ON(1);
5797                         tracing_disabled = 1;
5798                 }
5799                 return ret;
5800         }
5801
5802         if (cpu == RING_BUFFER_ALL_CPUS)
5803                 set_buffer_entries(&tr->max_buffer, size);
5804         else
5805                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5806
5807  out:
5808 #endif /* CONFIG_TRACER_MAX_TRACE */
5809
5810         if (cpu == RING_BUFFER_ALL_CPUS)
5811                 set_buffer_entries(&tr->array_buffer, size);
5812         else
5813                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5814
5815         return ret;
5816 }
5817
5818 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5819                                   unsigned long size, int cpu_id)
5820 {
5821         int ret = size;
5822
5823         mutex_lock(&trace_types_lock);
5824
5825         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5826                 /* make sure, this cpu is enabled in the mask */
5827                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5828                         ret = -EINVAL;
5829                         goto out;
5830                 }
5831         }
5832
5833         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5834         if (ret < 0)
5835                 ret = -ENOMEM;
5836
5837 out:
5838         mutex_unlock(&trace_types_lock);
5839
5840         return ret;
5841 }
5842
5843
5844 /**
5845  * tracing_update_buffers - used by tracing facility to expand ring buffers
5846  *
5847  * To save on memory when the tracing is never used on a system with it
5848  * configured in. The ring buffers are set to a minimum size. But once
5849  * a user starts to use the tracing facility, then they need to grow
5850  * to their default size.
5851  *
5852  * This function is to be called when a tracer is about to be used.
5853  */
5854 int tracing_update_buffers(void)
5855 {
5856         int ret = 0;
5857
5858         mutex_lock(&trace_types_lock);
5859         if (!ring_buffer_expanded)
5860                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5861                                                 RING_BUFFER_ALL_CPUS);
5862         mutex_unlock(&trace_types_lock);
5863
5864         return ret;
5865 }
5866
5867 struct trace_option_dentry;
5868
5869 static void
5870 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5871
5872 /*
5873  * Used to clear out the tracer before deletion of an instance.
5874  * Must have trace_types_lock held.
5875  */
5876 static void tracing_set_nop(struct trace_array *tr)
5877 {
5878         if (tr->current_trace == &nop_trace)
5879                 return;
5880         
5881         tr->current_trace->enabled--;
5882
5883         if (tr->current_trace->reset)
5884                 tr->current_trace->reset(tr);
5885
5886         tr->current_trace = &nop_trace;
5887 }
5888
5889 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5890 {
5891         /* Only enable if the directory has been created already. */
5892         if (!tr->dir)
5893                 return;
5894
5895         create_trace_option_files(tr, t);
5896 }
5897
5898 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5899 {
5900         struct tracer *t;
5901 #ifdef CONFIG_TRACER_MAX_TRACE
5902         bool had_max_tr;
5903 #endif
5904         int ret = 0;
5905
5906         mutex_lock(&trace_types_lock);
5907
5908         if (!ring_buffer_expanded) {
5909                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5910                                                 RING_BUFFER_ALL_CPUS);
5911                 if (ret < 0)
5912                         goto out;
5913                 ret = 0;
5914         }
5915
5916         for (t = trace_types; t; t = t->next) {
5917                 if (strcmp(t->name, buf) == 0)
5918                         break;
5919         }
5920         if (!t) {
5921                 ret = -EINVAL;
5922                 goto out;
5923         }
5924         if (t == tr->current_trace)
5925                 goto out;
5926
5927 #ifdef CONFIG_TRACER_SNAPSHOT
5928         if (t->use_max_tr) {
5929                 arch_spin_lock(&tr->max_lock);
5930                 if (tr->cond_snapshot)
5931                         ret = -EBUSY;
5932                 arch_spin_unlock(&tr->max_lock);
5933                 if (ret)
5934                         goto out;
5935         }
5936 #endif
5937         /* Some tracers won't work on kernel command line */
5938         if (system_state < SYSTEM_RUNNING && t->noboot) {
5939                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5940                         t->name);
5941                 goto out;
5942         }
5943
5944         /* Some tracers are only allowed for the top level buffer */
5945         if (!trace_ok_for_array(t, tr)) {
5946                 ret = -EINVAL;
5947                 goto out;
5948         }
5949
5950         /* If trace pipe files are being read, we can't change the tracer */
5951         if (tr->trace_ref) {
5952                 ret = -EBUSY;
5953                 goto out;
5954         }
5955
5956         trace_branch_disable();
5957
5958         tr->current_trace->enabled--;
5959
5960         if (tr->current_trace->reset)
5961                 tr->current_trace->reset(tr);
5962
5963         /* Current trace needs to be nop_trace before synchronize_rcu */
5964         tr->current_trace = &nop_trace;
5965
5966 #ifdef CONFIG_TRACER_MAX_TRACE
5967         had_max_tr = tr->allocated_snapshot;
5968
5969         if (had_max_tr && !t->use_max_tr) {
5970                 /*
5971                  * We need to make sure that the update_max_tr sees that
5972                  * current_trace changed to nop_trace to keep it from
5973                  * swapping the buffers after we resize it.
5974                  * The update_max_tr is called from interrupts disabled
5975                  * so a synchronized_sched() is sufficient.
5976                  */
5977                 synchronize_rcu();
5978                 free_snapshot(tr);
5979         }
5980 #endif
5981
5982 #ifdef CONFIG_TRACER_MAX_TRACE
5983         if (t->use_max_tr && !had_max_tr) {
5984                 ret = tracing_alloc_snapshot_instance(tr);
5985                 if (ret < 0)
5986                         goto out;
5987         }
5988 #endif
5989
5990         if (t->init) {
5991                 ret = tracer_init(t, tr);
5992                 if (ret)
5993                         goto out;
5994         }
5995
5996         tr->current_trace = t;
5997         tr->current_trace->enabled++;
5998         trace_branch_enable(tr);
5999  out:
6000         mutex_unlock(&trace_types_lock);
6001
6002         return ret;
6003 }
6004
6005 static ssize_t
6006 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6007                         size_t cnt, loff_t *ppos)
6008 {
6009         struct trace_array *tr = filp->private_data;
6010         char buf[MAX_TRACER_SIZE+1];
6011         int i;
6012         size_t ret;
6013         int err;
6014
6015         ret = cnt;
6016
6017         if (cnt > MAX_TRACER_SIZE)
6018                 cnt = MAX_TRACER_SIZE;
6019
6020         if (copy_from_user(buf, ubuf, cnt))
6021                 return -EFAULT;
6022
6023         buf[cnt] = 0;
6024
6025         /* strip ending whitespace. */
6026         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6027                 buf[i] = 0;
6028
6029         err = tracing_set_tracer(tr, buf);
6030         if (err)
6031                 return err;
6032
6033         *ppos += ret;
6034
6035         return ret;
6036 }
6037
6038 static ssize_t
6039 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6040                    size_t cnt, loff_t *ppos)
6041 {
6042         char buf[64];
6043         int r;
6044
6045         r = snprintf(buf, sizeof(buf), "%ld\n",
6046                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6047         if (r > sizeof(buf))
6048                 r = sizeof(buf);
6049         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6050 }
6051
6052 static ssize_t
6053 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6054                     size_t cnt, loff_t *ppos)
6055 {
6056         unsigned long val;
6057         int ret;
6058
6059         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6060         if (ret)
6061                 return ret;
6062
6063         *ptr = val * 1000;
6064
6065         return cnt;
6066 }
6067
6068 static ssize_t
6069 tracing_thresh_read(struct file *filp, char __user *ubuf,
6070                     size_t cnt, loff_t *ppos)
6071 {
6072         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6073 }
6074
6075 static ssize_t
6076 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6077                      size_t cnt, loff_t *ppos)
6078 {
6079         struct trace_array *tr = filp->private_data;
6080         int ret;
6081
6082         mutex_lock(&trace_types_lock);
6083         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6084         if (ret < 0)
6085                 goto out;
6086
6087         if (tr->current_trace->update_thresh) {
6088                 ret = tr->current_trace->update_thresh(tr);
6089                 if (ret < 0)
6090                         goto out;
6091         }
6092
6093         ret = cnt;
6094 out:
6095         mutex_unlock(&trace_types_lock);
6096
6097         return ret;
6098 }
6099
6100 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6101
6102 static ssize_t
6103 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6104                      size_t cnt, loff_t *ppos)
6105 {
6106         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6107 }
6108
6109 static ssize_t
6110 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6111                       size_t cnt, loff_t *ppos)
6112 {
6113         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6114 }
6115
6116 #endif
6117
6118 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6119 {
6120         struct trace_array *tr = inode->i_private;
6121         struct trace_iterator *iter;
6122         int ret;
6123
6124         ret = tracing_check_open_get_tr(tr);
6125         if (ret)
6126                 return ret;
6127
6128         mutex_lock(&trace_types_lock);
6129
6130         /* create a buffer to store the information to pass to userspace */
6131         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6132         if (!iter) {
6133                 ret = -ENOMEM;
6134                 __trace_array_put(tr);
6135                 goto out;
6136         }
6137
6138         trace_seq_init(&iter->seq);
6139         iter->trace = tr->current_trace;
6140
6141         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6142                 ret = -ENOMEM;
6143                 goto fail;
6144         }
6145
6146         /* trace pipe does not show start of buffer */
6147         cpumask_setall(iter->started);
6148
6149         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6150                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6151
6152         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6153         if (trace_clocks[tr->clock_id].in_ns)
6154                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6155
6156         iter->tr = tr;
6157         iter->array_buffer = &tr->array_buffer;
6158         iter->cpu_file = tracing_get_cpu(inode);
6159         mutex_init(&iter->mutex);
6160         filp->private_data = iter;
6161
6162         if (iter->trace->pipe_open)
6163                 iter->trace->pipe_open(iter);
6164
6165         nonseekable_open(inode, filp);
6166
6167         tr->trace_ref++;
6168 out:
6169         mutex_unlock(&trace_types_lock);
6170         return ret;
6171
6172 fail:
6173         kfree(iter);
6174         __trace_array_put(tr);
6175         mutex_unlock(&trace_types_lock);
6176         return ret;
6177 }
6178
6179 static int tracing_release_pipe(struct inode *inode, struct file *file)
6180 {
6181         struct trace_iterator *iter = file->private_data;
6182         struct trace_array *tr = inode->i_private;
6183
6184         mutex_lock(&trace_types_lock);
6185
6186         tr->trace_ref--;
6187
6188         if (iter->trace->pipe_close)
6189                 iter->trace->pipe_close(iter);
6190
6191         mutex_unlock(&trace_types_lock);
6192
6193         free_cpumask_var(iter->started);
6194         mutex_destroy(&iter->mutex);
6195         kfree(iter);
6196
6197         trace_array_put(tr);
6198
6199         return 0;
6200 }
6201
6202 static __poll_t
6203 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6204 {
6205         struct trace_array *tr = iter->tr;
6206
6207         /* Iterators are static, they should be filled or empty */
6208         if (trace_buffer_iter(iter, iter->cpu_file))
6209                 return EPOLLIN | EPOLLRDNORM;
6210
6211         if (tr->trace_flags & TRACE_ITER_BLOCK)
6212                 /*
6213                  * Always select as readable when in blocking mode
6214                  */
6215                 return EPOLLIN | EPOLLRDNORM;
6216         else
6217                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6218                                              filp, poll_table);
6219 }
6220
6221 static __poll_t
6222 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6223 {
6224         struct trace_iterator *iter = filp->private_data;
6225
6226         return trace_poll(iter, filp, poll_table);
6227 }
6228
6229 /* Must be called with iter->mutex held. */
6230 static int tracing_wait_pipe(struct file *filp)
6231 {
6232         struct trace_iterator *iter = filp->private_data;
6233         int ret;
6234
6235         while (trace_empty(iter)) {
6236
6237                 if ((filp->f_flags & O_NONBLOCK)) {
6238                         return -EAGAIN;
6239                 }
6240
6241                 /*
6242                  * We block until we read something and tracing is disabled.
6243                  * We still block if tracing is disabled, but we have never
6244                  * read anything. This allows a user to cat this file, and
6245                  * then enable tracing. But after we have read something,
6246                  * we give an EOF when tracing is again disabled.
6247                  *
6248                  * iter->pos will be 0 if we haven't read anything.
6249                  */
6250                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6251                         break;
6252
6253                 mutex_unlock(&iter->mutex);
6254
6255                 ret = wait_on_pipe(iter, 0);
6256
6257                 mutex_lock(&iter->mutex);
6258
6259                 if (ret)
6260                         return ret;
6261         }
6262
6263         return 1;
6264 }
6265
6266 /*
6267  * Consumer reader.
6268  */
6269 static ssize_t
6270 tracing_read_pipe(struct file *filp, char __user *ubuf,
6271                   size_t cnt, loff_t *ppos)
6272 {
6273         struct trace_iterator *iter = filp->private_data;
6274         ssize_t sret;
6275
6276         /*
6277          * Avoid more than one consumer on a single file descriptor
6278          * This is just a matter of traces coherency, the ring buffer itself
6279          * is protected.
6280          */
6281         mutex_lock(&iter->mutex);
6282
6283         /* return any leftover data */
6284         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6285         if (sret != -EBUSY)
6286                 goto out;
6287
6288         trace_seq_init(&iter->seq);
6289
6290         if (iter->trace->read) {
6291                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6292                 if (sret)
6293                         goto out;
6294         }
6295
6296 waitagain:
6297         sret = tracing_wait_pipe(filp);
6298         if (sret <= 0)
6299                 goto out;
6300
6301         /* stop when tracing is finished */
6302         if (trace_empty(iter)) {
6303                 sret = 0;
6304                 goto out;
6305         }
6306
6307         if (cnt >= PAGE_SIZE)
6308                 cnt = PAGE_SIZE - 1;
6309
6310         /* reset all but tr, trace, and overruns */
6311         memset(&iter->seq, 0,
6312                sizeof(struct trace_iterator) -
6313                offsetof(struct trace_iterator, seq));
6314         cpumask_clear(iter->started);
6315         trace_seq_init(&iter->seq);
6316         iter->pos = -1;
6317
6318         trace_event_read_lock();
6319         trace_access_lock(iter->cpu_file);
6320         while (trace_find_next_entry_inc(iter) != NULL) {
6321                 enum print_line_t ret;
6322                 int save_len = iter->seq.seq.len;
6323
6324                 ret = print_trace_line(iter);
6325                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6326                         /* don't print partial lines */
6327                         iter->seq.seq.len = save_len;
6328                         break;
6329                 }
6330                 if (ret != TRACE_TYPE_NO_CONSUME)
6331                         trace_consume(iter);
6332
6333                 if (trace_seq_used(&iter->seq) >= cnt)
6334                         break;
6335
6336                 /*
6337                  * Setting the full flag means we reached the trace_seq buffer
6338                  * size and we should leave by partial output condition above.
6339                  * One of the trace_seq_* functions is not used properly.
6340                  */
6341                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6342                           iter->ent->type);
6343         }
6344         trace_access_unlock(iter->cpu_file);
6345         trace_event_read_unlock();
6346
6347         /* Now copy what we have to the user */
6348         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6349         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6350                 trace_seq_init(&iter->seq);
6351
6352         /*
6353          * If there was nothing to send to user, in spite of consuming trace
6354          * entries, go back to wait for more entries.
6355          */
6356         if (sret == -EBUSY)
6357                 goto waitagain;
6358
6359 out:
6360         mutex_unlock(&iter->mutex);
6361
6362         return sret;
6363 }
6364
6365 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6366                                      unsigned int idx)
6367 {
6368         __free_page(spd->pages[idx]);
6369 }
6370
6371 static size_t
6372 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6373 {
6374         size_t count;
6375         int save_len;
6376         int ret;
6377
6378         /* Seq buffer is page-sized, exactly what we need. */
6379         for (;;) {
6380                 save_len = iter->seq.seq.len;
6381                 ret = print_trace_line(iter);
6382
6383                 if (trace_seq_has_overflowed(&iter->seq)) {
6384                         iter->seq.seq.len = save_len;
6385                         break;
6386                 }
6387
6388                 /*
6389                  * This should not be hit, because it should only
6390                  * be set if the iter->seq overflowed. But check it
6391                  * anyway to be safe.
6392                  */
6393                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6394                         iter->seq.seq.len = save_len;
6395                         break;
6396                 }
6397
6398                 count = trace_seq_used(&iter->seq) - save_len;
6399                 if (rem < count) {
6400                         rem = 0;
6401                         iter->seq.seq.len = save_len;
6402                         break;
6403                 }
6404
6405                 if (ret != TRACE_TYPE_NO_CONSUME)
6406                         trace_consume(iter);
6407                 rem -= count;
6408                 if (!trace_find_next_entry_inc(iter))   {
6409                         rem = 0;
6410                         iter->ent = NULL;
6411                         break;
6412                 }
6413         }
6414
6415         return rem;
6416 }
6417
6418 static ssize_t tracing_splice_read_pipe(struct file *filp,
6419                                         loff_t *ppos,
6420                                         struct pipe_inode_info *pipe,
6421                                         size_t len,
6422                                         unsigned int flags)
6423 {
6424         struct page *pages_def[PIPE_DEF_BUFFERS];
6425         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6426         struct trace_iterator *iter = filp->private_data;
6427         struct splice_pipe_desc spd = {
6428                 .pages          = pages_def,
6429                 .partial        = partial_def,
6430                 .nr_pages       = 0, /* This gets updated below. */
6431                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6432                 .ops            = &default_pipe_buf_ops,
6433                 .spd_release    = tracing_spd_release_pipe,
6434         };
6435         ssize_t ret;
6436         size_t rem;
6437         unsigned int i;
6438
6439         if (splice_grow_spd(pipe, &spd))
6440                 return -ENOMEM;
6441
6442         mutex_lock(&iter->mutex);
6443
6444         if (iter->trace->splice_read) {
6445                 ret = iter->trace->splice_read(iter, filp,
6446                                                ppos, pipe, len, flags);
6447                 if (ret)
6448                         goto out_err;
6449         }
6450
6451         ret = tracing_wait_pipe(filp);
6452         if (ret <= 0)
6453                 goto out_err;
6454
6455         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6456                 ret = -EFAULT;
6457                 goto out_err;
6458         }
6459
6460         trace_event_read_lock();
6461         trace_access_lock(iter->cpu_file);
6462
6463         /* Fill as many pages as possible. */
6464         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6465                 spd.pages[i] = alloc_page(GFP_KERNEL);
6466                 if (!spd.pages[i])
6467                         break;
6468
6469                 rem = tracing_fill_pipe_page(rem, iter);
6470
6471                 /* Copy the data into the page, so we can start over. */
6472                 ret = trace_seq_to_buffer(&iter->seq,
6473                                           page_address(spd.pages[i]),
6474                                           trace_seq_used(&iter->seq));
6475                 if (ret < 0) {
6476                         __free_page(spd.pages[i]);
6477                         break;
6478                 }
6479                 spd.partial[i].offset = 0;
6480                 spd.partial[i].len = trace_seq_used(&iter->seq);
6481
6482                 trace_seq_init(&iter->seq);
6483         }
6484
6485         trace_access_unlock(iter->cpu_file);
6486         trace_event_read_unlock();
6487         mutex_unlock(&iter->mutex);
6488
6489         spd.nr_pages = i;
6490
6491         if (i)
6492                 ret = splice_to_pipe(pipe, &spd);
6493         else
6494                 ret = 0;
6495 out:
6496         splice_shrink_spd(&spd);
6497         return ret;
6498
6499 out_err:
6500         mutex_unlock(&iter->mutex);
6501         goto out;
6502 }
6503
6504 static ssize_t
6505 tracing_entries_read(struct file *filp, char __user *ubuf,
6506                      size_t cnt, loff_t *ppos)
6507 {
6508         struct inode *inode = file_inode(filp);
6509         struct trace_array *tr = inode->i_private;
6510         int cpu = tracing_get_cpu(inode);
6511         char buf[64];
6512         int r = 0;
6513         ssize_t ret;
6514
6515         mutex_lock(&trace_types_lock);
6516
6517         if (cpu == RING_BUFFER_ALL_CPUS) {
6518                 int cpu, buf_size_same;
6519                 unsigned long size;
6520
6521                 size = 0;
6522                 buf_size_same = 1;
6523                 /* check if all cpu sizes are same */
6524                 for_each_tracing_cpu(cpu) {
6525                         /* fill in the size from first enabled cpu */
6526                         if (size == 0)
6527                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6528                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6529                                 buf_size_same = 0;
6530                                 break;
6531                         }
6532                 }
6533
6534                 if (buf_size_same) {
6535                         if (!ring_buffer_expanded)
6536                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6537                                             size >> 10,
6538                                             trace_buf_size >> 10);
6539                         else
6540                                 r = sprintf(buf, "%lu\n", size >> 10);
6541                 } else
6542                         r = sprintf(buf, "X\n");
6543         } else
6544                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6545
6546         mutex_unlock(&trace_types_lock);
6547
6548         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6549         return ret;
6550 }
6551
6552 static ssize_t
6553 tracing_entries_write(struct file *filp, const char __user *ubuf,
6554                       size_t cnt, loff_t *ppos)
6555 {
6556         struct inode *inode = file_inode(filp);
6557         struct trace_array *tr = inode->i_private;
6558         unsigned long val;
6559         int ret;
6560
6561         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6562         if (ret)
6563                 return ret;
6564
6565         /* must have at least 1 entry */
6566         if (!val)
6567                 return -EINVAL;
6568
6569         /* value is in KB */
6570         val <<= 10;
6571         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6572         if (ret < 0)
6573                 return ret;
6574
6575         *ppos += cnt;
6576
6577         return cnt;
6578 }
6579
6580 static ssize_t
6581 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6582                                 size_t cnt, loff_t *ppos)
6583 {
6584         struct trace_array *tr = filp->private_data;
6585         char buf[64];
6586         int r, cpu;
6587         unsigned long size = 0, expanded_size = 0;
6588
6589         mutex_lock(&trace_types_lock);
6590         for_each_tracing_cpu(cpu) {
6591                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6592                 if (!ring_buffer_expanded)
6593                         expanded_size += trace_buf_size >> 10;
6594         }
6595         if (ring_buffer_expanded)
6596                 r = sprintf(buf, "%lu\n", size);
6597         else
6598                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6599         mutex_unlock(&trace_types_lock);
6600
6601         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6602 }
6603
6604 static ssize_t
6605 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6606                           size_t cnt, loff_t *ppos)
6607 {
6608         /*
6609          * There is no need to read what the user has written, this function
6610          * is just to make sure that there is no error when "echo" is used
6611          */
6612
6613         *ppos += cnt;
6614
6615         return cnt;
6616 }
6617
6618 static int
6619 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6620 {
6621         struct trace_array *tr = inode->i_private;
6622
6623         /* disable tracing ? */
6624         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6625                 tracer_tracing_off(tr);
6626         /* resize the ring buffer to 0 */
6627         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6628
6629         trace_array_put(tr);
6630
6631         return 0;
6632 }
6633
6634 static ssize_t
6635 tracing_mark_write(struct file *filp, const char __user *ubuf,
6636                                         size_t cnt, loff_t *fpos)
6637 {
6638         struct trace_array *tr = filp->private_data;
6639         struct ring_buffer_event *event;
6640         enum event_trigger_type tt = ETT_NONE;
6641         struct trace_buffer *buffer;
6642         struct print_entry *entry;
6643         ssize_t written;
6644         int size;
6645         int len;
6646
6647 /* Used in tracing_mark_raw_write() as well */
6648 #define FAULTED_STR "<faulted>"
6649 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6650
6651         if (tracing_disabled)
6652                 return -EINVAL;
6653
6654         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6655                 return -EINVAL;
6656
6657         if (cnt > TRACE_BUF_SIZE)
6658                 cnt = TRACE_BUF_SIZE;
6659
6660         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6661
6662         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6663
6664         /* If less than "<faulted>", then make sure we can still add that */
6665         if (cnt < FAULTED_SIZE)
6666                 size += FAULTED_SIZE - cnt;
6667
6668         buffer = tr->array_buffer.buffer;
6669         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6670                                             tracing_gen_ctx());
6671         if (unlikely(!event))
6672                 /* Ring buffer disabled, return as if not open for write */
6673                 return -EBADF;
6674
6675         entry = ring_buffer_event_data(event);
6676         entry->ip = _THIS_IP_;
6677
6678         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6679         if (len) {
6680                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6681                 cnt = FAULTED_SIZE;
6682                 written = -EFAULT;
6683         } else
6684                 written = cnt;
6685
6686         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6687                 /* do not add \n before testing triggers, but add \0 */
6688                 entry->buf[cnt] = '\0';
6689                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6690         }
6691
6692         if (entry->buf[cnt - 1] != '\n') {
6693                 entry->buf[cnt] = '\n';
6694                 entry->buf[cnt + 1] = '\0';
6695         } else
6696                 entry->buf[cnt] = '\0';
6697
6698         if (static_branch_unlikely(&trace_marker_exports_enabled))
6699                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6700         __buffer_unlock_commit(buffer, event);
6701
6702         if (tt)
6703                 event_triggers_post_call(tr->trace_marker_file, tt);
6704
6705         if (written > 0)
6706                 *fpos += written;
6707
6708         return written;
6709 }
6710
6711 /* Limit it for now to 3K (including tag) */
6712 #define RAW_DATA_MAX_SIZE (1024*3)
6713
6714 static ssize_t
6715 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6716                                         size_t cnt, loff_t *fpos)
6717 {
6718         struct trace_array *tr = filp->private_data;
6719         struct ring_buffer_event *event;
6720         struct trace_buffer *buffer;
6721         struct raw_data_entry *entry;
6722         ssize_t written;
6723         int size;
6724         int len;
6725
6726 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6727
6728         if (tracing_disabled)
6729                 return -EINVAL;
6730
6731         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6732                 return -EINVAL;
6733
6734         /* The marker must at least have a tag id */
6735         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6736                 return -EINVAL;
6737
6738         if (cnt > TRACE_BUF_SIZE)
6739                 cnt = TRACE_BUF_SIZE;
6740
6741         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6742
6743         size = sizeof(*entry) + cnt;
6744         if (cnt < FAULT_SIZE_ID)
6745                 size += FAULT_SIZE_ID - cnt;
6746
6747         buffer = tr->array_buffer.buffer;
6748         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6749                                             tracing_gen_ctx());
6750         if (!event)
6751                 /* Ring buffer disabled, return as if not open for write */
6752                 return -EBADF;
6753
6754         entry = ring_buffer_event_data(event);
6755
6756         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6757         if (len) {
6758                 entry->id = -1;
6759                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6760                 written = -EFAULT;
6761         } else
6762                 written = cnt;
6763
6764         __buffer_unlock_commit(buffer, event);
6765
6766         if (written > 0)
6767                 *fpos += written;
6768
6769         return written;
6770 }
6771
6772 static int tracing_clock_show(struct seq_file *m, void *v)
6773 {
6774         struct trace_array *tr = m->private;
6775         int i;
6776
6777         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6778                 seq_printf(m,
6779                         "%s%s%s%s", i ? " " : "",
6780                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6781                         i == tr->clock_id ? "]" : "");
6782         seq_putc(m, '\n');
6783
6784         return 0;
6785 }
6786
6787 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6788 {
6789         int i;
6790
6791         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6792                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6793                         break;
6794         }
6795         if (i == ARRAY_SIZE(trace_clocks))
6796                 return -EINVAL;
6797
6798         mutex_lock(&trace_types_lock);
6799
6800         tr->clock_id = i;
6801
6802         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6803
6804         /*
6805          * New clock may not be consistent with the previous clock.
6806          * Reset the buffer so that it doesn't have incomparable timestamps.
6807          */
6808         tracing_reset_online_cpus(&tr->array_buffer);
6809
6810 #ifdef CONFIG_TRACER_MAX_TRACE
6811         if (tr->max_buffer.buffer)
6812                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6813         tracing_reset_online_cpus(&tr->max_buffer);
6814 #endif
6815
6816         mutex_unlock(&trace_types_lock);
6817
6818         return 0;
6819 }
6820
6821 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6822                                    size_t cnt, loff_t *fpos)
6823 {
6824         struct seq_file *m = filp->private_data;
6825         struct trace_array *tr = m->private;
6826         char buf[64];
6827         const char *clockstr;
6828         int ret;
6829
6830         if (cnt >= sizeof(buf))
6831                 return -EINVAL;
6832
6833         if (copy_from_user(buf, ubuf, cnt))
6834                 return -EFAULT;
6835
6836         buf[cnt] = 0;
6837
6838         clockstr = strstrip(buf);
6839
6840         ret = tracing_set_clock(tr, clockstr);
6841         if (ret)
6842                 return ret;
6843
6844         *fpos += cnt;
6845
6846         return cnt;
6847 }
6848
6849 static int tracing_clock_open(struct inode *inode, struct file *file)
6850 {
6851         struct trace_array *tr = inode->i_private;
6852         int ret;
6853
6854         ret = tracing_check_open_get_tr(tr);
6855         if (ret)
6856                 return ret;
6857
6858         ret = single_open(file, tracing_clock_show, inode->i_private);
6859         if (ret < 0)
6860                 trace_array_put(tr);
6861
6862         return ret;
6863 }
6864
6865 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6866 {
6867         struct trace_array *tr = m->private;
6868
6869         mutex_lock(&trace_types_lock);
6870
6871         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6872                 seq_puts(m, "delta [absolute]\n");
6873         else
6874                 seq_puts(m, "[delta] absolute\n");
6875
6876         mutex_unlock(&trace_types_lock);
6877
6878         return 0;
6879 }
6880
6881 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6882 {
6883         struct trace_array *tr = inode->i_private;
6884         int ret;
6885
6886         ret = tracing_check_open_get_tr(tr);
6887         if (ret)
6888                 return ret;
6889
6890         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6891         if (ret < 0)
6892                 trace_array_put(tr);
6893
6894         return ret;
6895 }
6896
6897 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6898 {
6899         int ret = 0;
6900
6901         mutex_lock(&trace_types_lock);
6902
6903         if (abs && tr->time_stamp_abs_ref++)
6904                 goto out;
6905
6906         if (!abs) {
6907                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6908                         ret = -EINVAL;
6909                         goto out;
6910                 }
6911
6912                 if (--tr->time_stamp_abs_ref)
6913                         goto out;
6914         }
6915
6916         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6917
6918 #ifdef CONFIG_TRACER_MAX_TRACE
6919         if (tr->max_buffer.buffer)
6920                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6921 #endif
6922  out:
6923         mutex_unlock(&trace_types_lock);
6924
6925         return ret;
6926 }
6927
6928 struct ftrace_buffer_info {
6929         struct trace_iterator   iter;
6930         void                    *spare;
6931         unsigned int            spare_cpu;
6932         unsigned int            read;
6933 };
6934
6935 #ifdef CONFIG_TRACER_SNAPSHOT
6936 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6937 {
6938         struct trace_array *tr = inode->i_private;
6939         struct trace_iterator *iter;
6940         struct seq_file *m;
6941         int ret;
6942
6943         ret = tracing_check_open_get_tr(tr);
6944         if (ret)
6945                 return ret;
6946
6947         if (file->f_mode & FMODE_READ) {
6948                 iter = __tracing_open(inode, file, true);
6949                 if (IS_ERR(iter))
6950                         ret = PTR_ERR(iter);
6951         } else {
6952                 /* Writes still need the seq_file to hold the private data */
6953                 ret = -ENOMEM;
6954                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6955                 if (!m)
6956                         goto out;
6957                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6958                 if (!iter) {
6959                         kfree(m);
6960                         goto out;
6961                 }
6962                 ret = 0;
6963
6964                 iter->tr = tr;
6965                 iter->array_buffer = &tr->max_buffer;
6966                 iter->cpu_file = tracing_get_cpu(inode);
6967                 m->private = iter;
6968                 file->private_data = m;
6969         }
6970 out:
6971         if (ret < 0)
6972                 trace_array_put(tr);
6973
6974         return ret;
6975 }
6976
6977 static ssize_t
6978 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6979                        loff_t *ppos)
6980 {
6981         struct seq_file *m = filp->private_data;
6982         struct trace_iterator *iter = m->private;
6983         struct trace_array *tr = iter->tr;
6984         unsigned long val;
6985         int ret;
6986
6987         ret = tracing_update_buffers();
6988         if (ret < 0)
6989                 return ret;
6990
6991         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6992         if (ret)
6993                 return ret;
6994
6995         mutex_lock(&trace_types_lock);
6996
6997         if (tr->current_trace->use_max_tr) {
6998                 ret = -EBUSY;
6999                 goto out;
7000         }
7001
7002         arch_spin_lock(&tr->max_lock);
7003         if (tr->cond_snapshot)
7004                 ret = -EBUSY;
7005         arch_spin_unlock(&tr->max_lock);
7006         if (ret)
7007                 goto out;
7008
7009         switch (val) {
7010         case 0:
7011                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7012                         ret = -EINVAL;
7013                         break;
7014                 }
7015                 if (tr->allocated_snapshot)
7016                         free_snapshot(tr);
7017                 break;
7018         case 1:
7019 /* Only allow per-cpu swap if the ring buffer supports it */
7020 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7021                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7022                         ret = -EINVAL;
7023                         break;
7024                 }
7025 #endif
7026                 if (tr->allocated_snapshot)
7027                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7028                                         &tr->array_buffer, iter->cpu_file);
7029                 else
7030                         ret = tracing_alloc_snapshot_instance(tr);
7031                 if (ret < 0)
7032                         break;
7033                 local_irq_disable();
7034                 /* Now, we're going to swap */
7035                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7036                         update_max_tr(tr, current, smp_processor_id(), NULL);
7037                 else
7038                         update_max_tr_single(tr, current, iter->cpu_file);
7039                 local_irq_enable();
7040                 break;
7041         default:
7042                 if (tr->allocated_snapshot) {
7043                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7044                                 tracing_reset_online_cpus(&tr->max_buffer);
7045                         else
7046                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7047                 }
7048                 break;
7049         }
7050
7051         if (ret >= 0) {
7052                 *ppos += cnt;
7053                 ret = cnt;
7054         }
7055 out:
7056         mutex_unlock(&trace_types_lock);
7057         return ret;
7058 }
7059
7060 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7061 {
7062         struct seq_file *m = file->private_data;
7063         int ret;
7064
7065         ret = tracing_release(inode, file);
7066
7067         if (file->f_mode & FMODE_READ)
7068                 return ret;
7069
7070         /* If write only, the seq_file is just a stub */
7071         if (m)
7072                 kfree(m->private);
7073         kfree(m);
7074
7075         return 0;
7076 }
7077
7078 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7079 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7080                                     size_t count, loff_t *ppos);
7081 static int tracing_buffers_release(struct inode *inode, struct file *file);
7082 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7083                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7084
7085 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7086 {
7087         struct ftrace_buffer_info *info;
7088         int ret;
7089
7090         /* The following checks for tracefs lockdown */
7091         ret = tracing_buffers_open(inode, filp);
7092         if (ret < 0)
7093                 return ret;
7094
7095         info = filp->private_data;
7096
7097         if (info->iter.trace->use_max_tr) {
7098                 tracing_buffers_release(inode, filp);
7099                 return -EBUSY;
7100         }
7101
7102         info->iter.snapshot = true;
7103         info->iter.array_buffer = &info->iter.tr->max_buffer;
7104
7105         return ret;
7106 }
7107
7108 #endif /* CONFIG_TRACER_SNAPSHOT */
7109
7110
7111 static const struct file_operations tracing_thresh_fops = {
7112         .open           = tracing_open_generic,
7113         .read           = tracing_thresh_read,
7114         .write          = tracing_thresh_write,
7115         .llseek         = generic_file_llseek,
7116 };
7117
7118 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7119 static const struct file_operations tracing_max_lat_fops = {
7120         .open           = tracing_open_generic,
7121         .read           = tracing_max_lat_read,
7122         .write          = tracing_max_lat_write,
7123         .llseek         = generic_file_llseek,
7124 };
7125 #endif
7126
7127 static const struct file_operations set_tracer_fops = {
7128         .open           = tracing_open_generic,
7129         .read           = tracing_set_trace_read,
7130         .write          = tracing_set_trace_write,
7131         .llseek         = generic_file_llseek,
7132 };
7133
7134 static const struct file_operations tracing_pipe_fops = {
7135         .open           = tracing_open_pipe,
7136         .poll           = tracing_poll_pipe,
7137         .read           = tracing_read_pipe,
7138         .splice_read    = tracing_splice_read_pipe,
7139         .release        = tracing_release_pipe,
7140         .llseek         = no_llseek,
7141 };
7142
7143 static const struct file_operations tracing_entries_fops = {
7144         .open           = tracing_open_generic_tr,
7145         .read           = tracing_entries_read,
7146         .write          = tracing_entries_write,
7147         .llseek         = generic_file_llseek,
7148         .release        = tracing_release_generic_tr,
7149 };
7150
7151 static const struct file_operations tracing_total_entries_fops = {
7152         .open           = tracing_open_generic_tr,
7153         .read           = tracing_total_entries_read,
7154         .llseek         = generic_file_llseek,
7155         .release        = tracing_release_generic_tr,
7156 };
7157
7158 static const struct file_operations tracing_free_buffer_fops = {
7159         .open           = tracing_open_generic_tr,
7160         .write          = tracing_free_buffer_write,
7161         .release        = tracing_free_buffer_release,
7162 };
7163
7164 static const struct file_operations tracing_mark_fops = {
7165         .open           = tracing_open_generic_tr,
7166         .write          = tracing_mark_write,
7167         .llseek         = generic_file_llseek,
7168         .release        = tracing_release_generic_tr,
7169 };
7170
7171 static const struct file_operations tracing_mark_raw_fops = {
7172         .open           = tracing_open_generic_tr,
7173         .write          = tracing_mark_raw_write,
7174         .llseek         = generic_file_llseek,
7175         .release        = tracing_release_generic_tr,
7176 };
7177
7178 static const struct file_operations trace_clock_fops = {
7179         .open           = tracing_clock_open,
7180         .read           = seq_read,
7181         .llseek         = seq_lseek,
7182         .release        = tracing_single_release_tr,
7183         .write          = tracing_clock_write,
7184 };
7185
7186 static const struct file_operations trace_time_stamp_mode_fops = {
7187         .open           = tracing_time_stamp_mode_open,
7188         .read           = seq_read,
7189         .llseek         = seq_lseek,
7190         .release        = tracing_single_release_tr,
7191 };
7192
7193 #ifdef CONFIG_TRACER_SNAPSHOT
7194 static const struct file_operations snapshot_fops = {
7195         .open           = tracing_snapshot_open,
7196         .read           = seq_read,
7197         .write          = tracing_snapshot_write,
7198         .llseek         = tracing_lseek,
7199         .release        = tracing_snapshot_release,
7200 };
7201
7202 static const struct file_operations snapshot_raw_fops = {
7203         .open           = snapshot_raw_open,
7204         .read           = tracing_buffers_read,
7205         .release        = tracing_buffers_release,
7206         .splice_read    = tracing_buffers_splice_read,
7207         .llseek         = no_llseek,
7208 };
7209
7210 #endif /* CONFIG_TRACER_SNAPSHOT */
7211
7212 #define TRACING_LOG_ERRS_MAX    8
7213 #define TRACING_LOG_LOC_MAX     128
7214
7215 #define CMD_PREFIX "  Command: "
7216
7217 struct err_info {
7218         const char      **errs; /* ptr to loc-specific array of err strings */
7219         u8              type;   /* index into errs -> specific err string */
7220         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7221         u64             ts;
7222 };
7223
7224 struct tracing_log_err {
7225         struct list_head        list;
7226         struct err_info         info;
7227         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7228         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7229 };
7230
7231 static DEFINE_MUTEX(tracing_err_log_lock);
7232
7233 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7234 {
7235         struct tracing_log_err *err;
7236
7237         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7238                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7239                 if (!err)
7240                         err = ERR_PTR(-ENOMEM);
7241                 tr->n_err_log_entries++;
7242
7243                 return err;
7244         }
7245
7246         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7247         list_del(&err->list);
7248
7249         return err;
7250 }
7251
7252 /**
7253  * err_pos - find the position of a string within a command for error careting
7254  * @cmd: The tracing command that caused the error
7255  * @str: The string to position the caret at within @cmd
7256  *
7257  * Finds the position of the first occurence of @str within @cmd.  The
7258  * return value can be passed to tracing_log_err() for caret placement
7259  * within @cmd.
7260  *
7261  * Returns the index within @cmd of the first occurence of @str or 0
7262  * if @str was not found.
7263  */
7264 unsigned int err_pos(char *cmd, const char *str)
7265 {
7266         char *found;
7267
7268         if (WARN_ON(!strlen(cmd)))
7269                 return 0;
7270
7271         found = strstr(cmd, str);
7272         if (found)
7273                 return found - cmd;
7274
7275         return 0;
7276 }
7277
7278 /**
7279  * tracing_log_err - write an error to the tracing error log
7280  * @tr: The associated trace array for the error (NULL for top level array)
7281  * @loc: A string describing where the error occurred
7282  * @cmd: The tracing command that caused the error
7283  * @errs: The array of loc-specific static error strings
7284  * @type: The index into errs[], which produces the specific static err string
7285  * @pos: The position the caret should be placed in the cmd
7286  *
7287  * Writes an error into tracing/error_log of the form:
7288  *
7289  * <loc>: error: <text>
7290  *   Command: <cmd>
7291  *              ^
7292  *
7293  * tracing/error_log is a small log file containing the last
7294  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7295  * unless there has been a tracing error, and the error log can be
7296  * cleared and have its memory freed by writing the empty string in
7297  * truncation mode to it i.e. echo > tracing/error_log.
7298  *
7299  * NOTE: the @errs array along with the @type param are used to
7300  * produce a static error string - this string is not copied and saved
7301  * when the error is logged - only a pointer to it is saved.  See
7302  * existing callers for examples of how static strings are typically
7303  * defined for use with tracing_log_err().
7304  */
7305 void tracing_log_err(struct trace_array *tr,
7306                      const char *loc, const char *cmd,
7307                      const char **errs, u8 type, u8 pos)
7308 {
7309         struct tracing_log_err *err;
7310
7311         if (!tr)
7312                 tr = &global_trace;
7313
7314         mutex_lock(&tracing_err_log_lock);
7315         err = get_tracing_log_err(tr);
7316         if (PTR_ERR(err) == -ENOMEM) {
7317                 mutex_unlock(&tracing_err_log_lock);
7318                 return;
7319         }
7320
7321         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7322         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7323
7324         err->info.errs = errs;
7325         err->info.type = type;
7326         err->info.pos = pos;
7327         err->info.ts = local_clock();
7328
7329         list_add_tail(&err->list, &tr->err_log);
7330         mutex_unlock(&tracing_err_log_lock);
7331 }
7332
7333 static void clear_tracing_err_log(struct trace_array *tr)
7334 {
7335         struct tracing_log_err *err, *next;
7336
7337         mutex_lock(&tracing_err_log_lock);
7338         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7339                 list_del(&err->list);
7340                 kfree(err);
7341         }
7342
7343         tr->n_err_log_entries = 0;
7344         mutex_unlock(&tracing_err_log_lock);
7345 }
7346
7347 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7348 {
7349         struct trace_array *tr = m->private;
7350
7351         mutex_lock(&tracing_err_log_lock);
7352
7353         return seq_list_start(&tr->err_log, *pos);
7354 }
7355
7356 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7357 {
7358         struct trace_array *tr = m->private;
7359
7360         return seq_list_next(v, &tr->err_log, pos);
7361 }
7362
7363 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7364 {
7365         mutex_unlock(&tracing_err_log_lock);
7366 }
7367
7368 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7369 {
7370         u8 i;
7371
7372         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7373                 seq_putc(m, ' ');
7374         for (i = 0; i < pos; i++)
7375                 seq_putc(m, ' ');
7376         seq_puts(m, "^\n");
7377 }
7378
7379 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7380 {
7381         struct tracing_log_err *err = v;
7382
7383         if (err) {
7384                 const char *err_text = err->info.errs[err->info.type];
7385                 u64 sec = err->info.ts;
7386                 u32 nsec;
7387
7388                 nsec = do_div(sec, NSEC_PER_SEC);
7389                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7390                            err->loc, err_text);
7391                 seq_printf(m, "%s", err->cmd);
7392                 tracing_err_log_show_pos(m, err->info.pos);
7393         }
7394
7395         return 0;
7396 }
7397
7398 static const struct seq_operations tracing_err_log_seq_ops = {
7399         .start  = tracing_err_log_seq_start,
7400         .next   = tracing_err_log_seq_next,
7401         .stop   = tracing_err_log_seq_stop,
7402         .show   = tracing_err_log_seq_show
7403 };
7404
7405 static int tracing_err_log_open(struct inode *inode, struct file *file)
7406 {
7407         struct trace_array *tr = inode->i_private;
7408         int ret = 0;
7409
7410         ret = tracing_check_open_get_tr(tr);
7411         if (ret)
7412                 return ret;
7413
7414         /* If this file was opened for write, then erase contents */
7415         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7416                 clear_tracing_err_log(tr);
7417
7418         if (file->f_mode & FMODE_READ) {
7419                 ret = seq_open(file, &tracing_err_log_seq_ops);
7420                 if (!ret) {
7421                         struct seq_file *m = file->private_data;
7422                         m->private = tr;
7423                 } else {
7424                         trace_array_put(tr);
7425                 }
7426         }
7427         return ret;
7428 }
7429
7430 static ssize_t tracing_err_log_write(struct file *file,
7431                                      const char __user *buffer,
7432                                      size_t count, loff_t *ppos)
7433 {
7434         return count;
7435 }
7436
7437 static int tracing_err_log_release(struct inode *inode, struct file *file)
7438 {
7439         struct trace_array *tr = inode->i_private;
7440
7441         trace_array_put(tr);
7442
7443         if (file->f_mode & FMODE_READ)
7444                 seq_release(inode, file);
7445
7446         return 0;
7447 }
7448
7449 static const struct file_operations tracing_err_log_fops = {
7450         .open           = tracing_err_log_open,
7451         .write          = tracing_err_log_write,
7452         .read           = seq_read,
7453         .llseek         = seq_lseek,
7454         .release        = tracing_err_log_release,
7455 };
7456
7457 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7458 {
7459         struct trace_array *tr = inode->i_private;
7460         struct ftrace_buffer_info *info;
7461         int ret;
7462
7463         ret = tracing_check_open_get_tr(tr);
7464         if (ret)
7465                 return ret;
7466
7467         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7468         if (!info) {
7469                 trace_array_put(tr);
7470                 return -ENOMEM;
7471         }
7472
7473         mutex_lock(&trace_types_lock);
7474
7475         info->iter.tr           = tr;
7476         info->iter.cpu_file     = tracing_get_cpu(inode);
7477         info->iter.trace        = tr->current_trace;
7478         info->iter.array_buffer = &tr->array_buffer;
7479         info->spare             = NULL;
7480         /* Force reading ring buffer for first read */
7481         info->read              = (unsigned int)-1;
7482
7483         filp->private_data = info;
7484
7485         tr->trace_ref++;
7486
7487         mutex_unlock(&trace_types_lock);
7488
7489         ret = nonseekable_open(inode, filp);
7490         if (ret < 0)
7491                 trace_array_put(tr);
7492
7493         return ret;
7494 }
7495
7496 static __poll_t
7497 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7498 {
7499         struct ftrace_buffer_info *info = filp->private_data;
7500         struct trace_iterator *iter = &info->iter;
7501
7502         return trace_poll(iter, filp, poll_table);
7503 }
7504
7505 static ssize_t
7506 tracing_buffers_read(struct file *filp, char __user *ubuf,
7507                      size_t count, loff_t *ppos)
7508 {
7509         struct ftrace_buffer_info *info = filp->private_data;
7510         struct trace_iterator *iter = &info->iter;
7511         ssize_t ret = 0;
7512         ssize_t size;
7513
7514         if (!count)
7515                 return 0;
7516
7517 #ifdef CONFIG_TRACER_MAX_TRACE
7518         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7519                 return -EBUSY;
7520 #endif
7521
7522         if (!info->spare) {
7523                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7524                                                           iter->cpu_file);
7525                 if (IS_ERR(info->spare)) {
7526                         ret = PTR_ERR(info->spare);
7527                         info->spare = NULL;
7528                 } else {
7529                         info->spare_cpu = iter->cpu_file;
7530                 }
7531         }
7532         if (!info->spare)
7533                 return ret;
7534
7535         /* Do we have previous read data to read? */
7536         if (info->read < PAGE_SIZE)
7537                 goto read;
7538
7539  again:
7540         trace_access_lock(iter->cpu_file);
7541         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7542                                     &info->spare,
7543                                     count,
7544                                     iter->cpu_file, 0);
7545         trace_access_unlock(iter->cpu_file);
7546
7547         if (ret < 0) {
7548                 if (trace_empty(iter)) {
7549                         if ((filp->f_flags & O_NONBLOCK))
7550                                 return -EAGAIN;
7551
7552                         ret = wait_on_pipe(iter, 0);
7553                         if (ret)
7554                                 return ret;
7555
7556                         goto again;
7557                 }
7558                 return 0;
7559         }
7560
7561         info->read = 0;
7562  read:
7563         size = PAGE_SIZE - info->read;
7564         if (size > count)
7565                 size = count;
7566
7567         ret = copy_to_user(ubuf, info->spare + info->read, size);
7568         if (ret == size)
7569                 return -EFAULT;
7570
7571         size -= ret;
7572
7573         *ppos += size;
7574         info->read += size;
7575
7576         return size;
7577 }
7578
7579 static int tracing_buffers_release(struct inode *inode, struct file *file)
7580 {
7581         struct ftrace_buffer_info *info = file->private_data;
7582         struct trace_iterator *iter = &info->iter;
7583
7584         mutex_lock(&trace_types_lock);
7585
7586         iter->tr->trace_ref--;
7587
7588         __trace_array_put(iter->tr);
7589
7590         if (info->spare)
7591                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7592                                            info->spare_cpu, info->spare);
7593         kvfree(info);
7594
7595         mutex_unlock(&trace_types_lock);
7596
7597         return 0;
7598 }
7599
7600 struct buffer_ref {
7601         struct trace_buffer     *buffer;
7602         void                    *page;
7603         int                     cpu;
7604         refcount_t              refcount;
7605 };
7606
7607 static void buffer_ref_release(struct buffer_ref *ref)
7608 {
7609         if (!refcount_dec_and_test(&ref->refcount))
7610                 return;
7611         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7612         kfree(ref);
7613 }
7614
7615 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7616                                     struct pipe_buffer *buf)
7617 {
7618         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7619
7620         buffer_ref_release(ref);
7621         buf->private = 0;
7622 }
7623
7624 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7625                                 struct pipe_buffer *buf)
7626 {
7627         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7628
7629         if (refcount_read(&ref->refcount) > INT_MAX/2)
7630                 return false;
7631
7632         refcount_inc(&ref->refcount);
7633         return true;
7634 }
7635
7636 /* Pipe buffer operations for a buffer. */
7637 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7638         .release                = buffer_pipe_buf_release,
7639         .get                    = buffer_pipe_buf_get,
7640 };
7641
7642 /*
7643  * Callback from splice_to_pipe(), if we need to release some pages
7644  * at the end of the spd in case we error'ed out in filling the pipe.
7645  */
7646 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7647 {
7648         struct buffer_ref *ref =
7649                 (struct buffer_ref *)spd->partial[i].private;
7650
7651         buffer_ref_release(ref);
7652         spd->partial[i].private = 0;
7653 }
7654
7655 static ssize_t
7656 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7657                             struct pipe_inode_info *pipe, size_t len,
7658                             unsigned int flags)
7659 {
7660         struct ftrace_buffer_info *info = file->private_data;
7661         struct trace_iterator *iter = &info->iter;
7662         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7663         struct page *pages_def[PIPE_DEF_BUFFERS];
7664         struct splice_pipe_desc spd = {
7665                 .pages          = pages_def,
7666                 .partial        = partial_def,
7667                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7668                 .ops            = &buffer_pipe_buf_ops,
7669                 .spd_release    = buffer_spd_release,
7670         };
7671         struct buffer_ref *ref;
7672         int entries, i;
7673         ssize_t ret = 0;
7674
7675 #ifdef CONFIG_TRACER_MAX_TRACE
7676         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7677                 return -EBUSY;
7678 #endif
7679
7680         if (*ppos & (PAGE_SIZE - 1))
7681                 return -EINVAL;
7682
7683         if (len & (PAGE_SIZE - 1)) {
7684                 if (len < PAGE_SIZE)
7685                         return -EINVAL;
7686                 len &= PAGE_MASK;
7687         }
7688
7689         if (splice_grow_spd(pipe, &spd))
7690                 return -ENOMEM;
7691
7692  again:
7693         trace_access_lock(iter->cpu_file);
7694         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7695
7696         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7697                 struct page *page;
7698                 int r;
7699
7700                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7701                 if (!ref) {
7702                         ret = -ENOMEM;
7703                         break;
7704                 }
7705
7706                 refcount_set(&ref->refcount, 1);
7707                 ref->buffer = iter->array_buffer->buffer;
7708                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7709                 if (IS_ERR(ref->page)) {
7710                         ret = PTR_ERR(ref->page);
7711                         ref->page = NULL;
7712                         kfree(ref);
7713                         break;
7714                 }
7715                 ref->cpu = iter->cpu_file;
7716
7717                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7718                                           len, iter->cpu_file, 1);
7719                 if (r < 0) {
7720                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7721                                                    ref->page);
7722                         kfree(ref);
7723                         break;
7724                 }
7725
7726                 page = virt_to_page(ref->page);
7727
7728                 spd.pages[i] = page;
7729                 spd.partial[i].len = PAGE_SIZE;
7730                 spd.partial[i].offset = 0;
7731                 spd.partial[i].private = (unsigned long)ref;
7732                 spd.nr_pages++;
7733                 *ppos += PAGE_SIZE;
7734
7735                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7736         }
7737
7738         trace_access_unlock(iter->cpu_file);
7739         spd.nr_pages = i;
7740
7741         /* did we read anything? */
7742         if (!spd.nr_pages) {
7743                 if (ret)
7744                         goto out;
7745
7746                 ret = -EAGAIN;
7747                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7748                         goto out;
7749
7750                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7751                 if (ret)
7752                         goto out;
7753
7754                 goto again;
7755         }
7756
7757         ret = splice_to_pipe(pipe, &spd);
7758 out:
7759         splice_shrink_spd(&spd);
7760
7761         return ret;
7762 }
7763
7764 static const struct file_operations tracing_buffers_fops = {
7765         .open           = tracing_buffers_open,
7766         .read           = tracing_buffers_read,
7767         .poll           = tracing_buffers_poll,
7768         .release        = tracing_buffers_release,
7769         .splice_read    = tracing_buffers_splice_read,
7770         .llseek         = no_llseek,
7771 };
7772
7773 static ssize_t
7774 tracing_stats_read(struct file *filp, char __user *ubuf,
7775                    size_t count, loff_t *ppos)
7776 {
7777         struct inode *inode = file_inode(filp);
7778         struct trace_array *tr = inode->i_private;
7779         struct array_buffer *trace_buf = &tr->array_buffer;
7780         int cpu = tracing_get_cpu(inode);
7781         struct trace_seq *s;
7782         unsigned long cnt;
7783         unsigned long long t;
7784         unsigned long usec_rem;
7785
7786         s = kmalloc(sizeof(*s), GFP_KERNEL);
7787         if (!s)
7788                 return -ENOMEM;
7789
7790         trace_seq_init(s);
7791
7792         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7793         trace_seq_printf(s, "entries: %ld\n", cnt);
7794
7795         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7796         trace_seq_printf(s, "overrun: %ld\n", cnt);
7797
7798         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7799         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7800
7801         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7802         trace_seq_printf(s, "bytes: %ld\n", cnt);
7803
7804         if (trace_clocks[tr->clock_id].in_ns) {
7805                 /* local or global for trace_clock */
7806                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7807                 usec_rem = do_div(t, USEC_PER_SEC);
7808                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7809                                                                 t, usec_rem);
7810
7811                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7812                 usec_rem = do_div(t, USEC_PER_SEC);
7813                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7814         } else {
7815                 /* counter or tsc mode for trace_clock */
7816                 trace_seq_printf(s, "oldest event ts: %llu\n",
7817                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7818
7819                 trace_seq_printf(s, "now ts: %llu\n",
7820                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7821         }
7822
7823         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7824         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7825
7826         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7827         trace_seq_printf(s, "read events: %ld\n", cnt);
7828
7829         count = simple_read_from_buffer(ubuf, count, ppos,
7830                                         s->buffer, trace_seq_used(s));
7831
7832         kfree(s);
7833
7834         return count;
7835 }
7836
7837 static const struct file_operations tracing_stats_fops = {
7838         .open           = tracing_open_generic_tr,
7839         .read           = tracing_stats_read,
7840         .llseek         = generic_file_llseek,
7841         .release        = tracing_release_generic_tr,
7842 };
7843
7844 #ifdef CONFIG_DYNAMIC_FTRACE
7845
7846 static ssize_t
7847 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7848                   size_t cnt, loff_t *ppos)
7849 {
7850         ssize_t ret;
7851         char *buf;
7852         int r;
7853
7854         /* 256 should be plenty to hold the amount needed */
7855         buf = kmalloc(256, GFP_KERNEL);
7856         if (!buf)
7857                 return -ENOMEM;
7858
7859         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7860                       ftrace_update_tot_cnt,
7861                       ftrace_number_of_pages,
7862                       ftrace_number_of_groups);
7863
7864         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7865         kfree(buf);
7866         return ret;
7867 }
7868
7869 static const struct file_operations tracing_dyn_info_fops = {
7870         .open           = tracing_open_generic,
7871         .read           = tracing_read_dyn_info,
7872         .llseek         = generic_file_llseek,
7873 };
7874 #endif /* CONFIG_DYNAMIC_FTRACE */
7875
7876 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7877 static void
7878 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7879                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7880                 void *data)
7881 {
7882         tracing_snapshot_instance(tr);
7883 }
7884
7885 static void
7886 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7887                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7888                       void *data)
7889 {
7890         struct ftrace_func_mapper *mapper = data;
7891         long *count = NULL;
7892
7893         if (mapper)
7894                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7895
7896         if (count) {
7897
7898                 if (*count <= 0)
7899                         return;
7900
7901                 (*count)--;
7902         }
7903
7904         tracing_snapshot_instance(tr);
7905 }
7906
7907 static int
7908 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7909                       struct ftrace_probe_ops *ops, void *data)
7910 {
7911         struct ftrace_func_mapper *mapper = data;
7912         long *count = NULL;
7913
7914         seq_printf(m, "%ps:", (void *)ip);
7915
7916         seq_puts(m, "snapshot");
7917
7918         if (mapper)
7919                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7920
7921         if (count)
7922                 seq_printf(m, ":count=%ld\n", *count);
7923         else
7924                 seq_puts(m, ":unlimited\n");
7925
7926         return 0;
7927 }
7928
7929 static int
7930 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7931                      unsigned long ip, void *init_data, void **data)
7932 {
7933         struct ftrace_func_mapper *mapper = *data;
7934
7935         if (!mapper) {
7936                 mapper = allocate_ftrace_func_mapper();
7937                 if (!mapper)
7938                         return -ENOMEM;
7939                 *data = mapper;
7940         }
7941
7942         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7943 }
7944
7945 static void
7946 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7947                      unsigned long ip, void *data)
7948 {
7949         struct ftrace_func_mapper *mapper = data;
7950
7951         if (!ip) {
7952                 if (!mapper)
7953                         return;
7954                 free_ftrace_func_mapper(mapper, NULL);
7955                 return;
7956         }
7957
7958         ftrace_func_mapper_remove_ip(mapper, ip);
7959 }
7960
7961 static struct ftrace_probe_ops snapshot_probe_ops = {
7962         .func                   = ftrace_snapshot,
7963         .print                  = ftrace_snapshot_print,
7964 };
7965
7966 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7967         .func                   = ftrace_count_snapshot,
7968         .print                  = ftrace_snapshot_print,
7969         .init                   = ftrace_snapshot_init,
7970         .free                   = ftrace_snapshot_free,
7971 };
7972
7973 static int
7974 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7975                                char *glob, char *cmd, char *param, int enable)
7976 {
7977         struct ftrace_probe_ops *ops;
7978         void *count = (void *)-1;
7979         char *number;
7980         int ret;
7981
7982         if (!tr)
7983                 return -ENODEV;
7984
7985         /* hash funcs only work with set_ftrace_filter */
7986         if (!enable)
7987                 return -EINVAL;
7988
7989         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7990
7991         if (glob[0] == '!')
7992                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7993
7994         if (!param)
7995                 goto out_reg;
7996
7997         number = strsep(&param, ":");
7998
7999         if (!strlen(number))
8000                 goto out_reg;
8001
8002         /*
8003          * We use the callback data field (which is a pointer)
8004          * as our counter.
8005          */
8006         ret = kstrtoul(number, 0, (unsigned long *)&count);
8007         if (ret)
8008                 return ret;
8009
8010  out_reg:
8011         ret = tracing_alloc_snapshot_instance(tr);
8012         if (ret < 0)
8013                 goto out;
8014
8015         ret = register_ftrace_function_probe(glob, tr, ops, count);
8016
8017  out:
8018         return ret < 0 ? ret : 0;
8019 }
8020
8021 static struct ftrace_func_command ftrace_snapshot_cmd = {
8022         .name                   = "snapshot",
8023         .func                   = ftrace_trace_snapshot_callback,
8024 };
8025
8026 static __init int register_snapshot_cmd(void)
8027 {
8028         return register_ftrace_command(&ftrace_snapshot_cmd);
8029 }
8030 #else
8031 static inline __init int register_snapshot_cmd(void) { return 0; }
8032 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8033
8034 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8035 {
8036         if (WARN_ON(!tr->dir))
8037                 return ERR_PTR(-ENODEV);
8038
8039         /* Top directory uses NULL as the parent */
8040         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8041                 return NULL;
8042
8043         /* All sub buffers have a descriptor */
8044         return tr->dir;
8045 }
8046
8047 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8048 {
8049         struct dentry *d_tracer;
8050
8051         if (tr->percpu_dir)
8052                 return tr->percpu_dir;
8053
8054         d_tracer = tracing_get_dentry(tr);
8055         if (IS_ERR(d_tracer))
8056                 return NULL;
8057
8058         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8059
8060         MEM_FAIL(!tr->percpu_dir,
8061                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8062
8063         return tr->percpu_dir;
8064 }
8065
8066 static struct dentry *
8067 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8068                       void *data, long cpu, const struct file_operations *fops)
8069 {
8070         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8071
8072         if (ret) /* See tracing_get_cpu() */
8073                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8074         return ret;
8075 }
8076
8077 static void
8078 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8079 {
8080         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8081         struct dentry *d_cpu;
8082         char cpu_dir[30]; /* 30 characters should be more than enough */
8083
8084         if (!d_percpu)
8085                 return;
8086
8087         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8088         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8089         if (!d_cpu) {
8090                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8091                 return;
8092         }
8093
8094         /* per cpu trace_pipe */
8095         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8096                                 tr, cpu, &tracing_pipe_fops);
8097
8098         /* per cpu trace */
8099         trace_create_cpu_file("trace", 0644, d_cpu,
8100                                 tr, cpu, &tracing_fops);
8101
8102         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8103                                 tr, cpu, &tracing_buffers_fops);
8104
8105         trace_create_cpu_file("stats", 0444, d_cpu,
8106                                 tr, cpu, &tracing_stats_fops);
8107
8108         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8109                                 tr, cpu, &tracing_entries_fops);
8110
8111 #ifdef CONFIG_TRACER_SNAPSHOT
8112         trace_create_cpu_file("snapshot", 0644, d_cpu,
8113                                 tr, cpu, &snapshot_fops);
8114
8115         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8116                                 tr, cpu, &snapshot_raw_fops);
8117 #endif
8118 }
8119
8120 #ifdef CONFIG_FTRACE_SELFTEST
8121 /* Let selftest have access to static functions in this file */
8122 #include "trace_selftest.c"
8123 #endif
8124
8125 static ssize_t
8126 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8127                         loff_t *ppos)
8128 {
8129         struct trace_option_dentry *topt = filp->private_data;
8130         char *buf;
8131
8132         if (topt->flags->val & topt->opt->bit)
8133                 buf = "1\n";
8134         else
8135                 buf = "0\n";
8136
8137         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8138 }
8139
8140 static ssize_t
8141 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8142                          loff_t *ppos)
8143 {
8144         struct trace_option_dentry *topt = filp->private_data;
8145         unsigned long val;
8146         int ret;
8147
8148         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8149         if (ret)
8150                 return ret;
8151
8152         if (val != 0 && val != 1)
8153                 return -EINVAL;
8154
8155         if (!!(topt->flags->val & topt->opt->bit) != val) {
8156                 mutex_lock(&trace_types_lock);
8157                 ret = __set_tracer_option(topt->tr, topt->flags,
8158                                           topt->opt, !val);
8159                 mutex_unlock(&trace_types_lock);
8160                 if (ret)
8161                         return ret;
8162         }
8163
8164         *ppos += cnt;
8165
8166         return cnt;
8167 }
8168
8169
8170 static const struct file_operations trace_options_fops = {
8171         .open = tracing_open_generic,
8172         .read = trace_options_read,
8173         .write = trace_options_write,
8174         .llseek = generic_file_llseek,
8175 };
8176
8177 /*
8178  * In order to pass in both the trace_array descriptor as well as the index
8179  * to the flag that the trace option file represents, the trace_array
8180  * has a character array of trace_flags_index[], which holds the index
8181  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8182  * The address of this character array is passed to the flag option file
8183  * read/write callbacks.
8184  *
8185  * In order to extract both the index and the trace_array descriptor,
8186  * get_tr_index() uses the following algorithm.
8187  *
8188  *   idx = *ptr;
8189  *
8190  * As the pointer itself contains the address of the index (remember
8191  * index[1] == 1).
8192  *
8193  * Then to get the trace_array descriptor, by subtracting that index
8194  * from the ptr, we get to the start of the index itself.
8195  *
8196  *   ptr - idx == &index[0]
8197  *
8198  * Then a simple container_of() from that pointer gets us to the
8199  * trace_array descriptor.
8200  */
8201 static void get_tr_index(void *data, struct trace_array **ptr,
8202                          unsigned int *pindex)
8203 {
8204         *pindex = *(unsigned char *)data;
8205
8206         *ptr = container_of(data - *pindex, struct trace_array,
8207                             trace_flags_index);
8208 }
8209
8210 static ssize_t
8211 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8212                         loff_t *ppos)
8213 {
8214         void *tr_index = filp->private_data;
8215         struct trace_array *tr;
8216         unsigned int index;
8217         char *buf;
8218
8219         get_tr_index(tr_index, &tr, &index);
8220
8221         if (tr->trace_flags & (1 << index))
8222                 buf = "1\n";
8223         else
8224                 buf = "0\n";
8225
8226         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8227 }
8228
8229 static ssize_t
8230 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8231                          loff_t *ppos)
8232 {
8233         void *tr_index = filp->private_data;
8234         struct trace_array *tr;
8235         unsigned int index;
8236         unsigned long val;
8237         int ret;
8238
8239         get_tr_index(tr_index, &tr, &index);
8240
8241         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8242         if (ret)
8243                 return ret;
8244
8245         if (val != 0 && val != 1)
8246                 return -EINVAL;
8247
8248         mutex_lock(&event_mutex);
8249         mutex_lock(&trace_types_lock);
8250         ret = set_tracer_flag(tr, 1 << index, val);
8251         mutex_unlock(&trace_types_lock);
8252         mutex_unlock(&event_mutex);
8253
8254         if (ret < 0)
8255                 return ret;
8256
8257         *ppos += cnt;
8258
8259         return cnt;
8260 }
8261
8262 static const struct file_operations trace_options_core_fops = {
8263         .open = tracing_open_generic,
8264         .read = trace_options_core_read,
8265         .write = trace_options_core_write,
8266         .llseek = generic_file_llseek,
8267 };
8268
8269 struct dentry *trace_create_file(const char *name,
8270                                  umode_t mode,
8271                                  struct dentry *parent,
8272                                  void *data,
8273                                  const struct file_operations *fops)
8274 {
8275         struct dentry *ret;
8276
8277         ret = tracefs_create_file(name, mode, parent, data, fops);
8278         if (!ret)
8279                 pr_warn("Could not create tracefs '%s' entry\n", name);
8280
8281         return ret;
8282 }
8283
8284
8285 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8286 {
8287         struct dentry *d_tracer;
8288
8289         if (tr->options)
8290                 return tr->options;
8291
8292         d_tracer = tracing_get_dentry(tr);
8293         if (IS_ERR(d_tracer))
8294                 return NULL;
8295
8296         tr->options = tracefs_create_dir("options", d_tracer);
8297         if (!tr->options) {
8298                 pr_warn("Could not create tracefs directory 'options'\n");
8299                 return NULL;
8300         }
8301
8302         return tr->options;
8303 }
8304
8305 static void
8306 create_trace_option_file(struct trace_array *tr,
8307                          struct trace_option_dentry *topt,
8308                          struct tracer_flags *flags,
8309                          struct tracer_opt *opt)
8310 {
8311         struct dentry *t_options;
8312
8313         t_options = trace_options_init_dentry(tr);
8314         if (!t_options)
8315                 return;
8316
8317         topt->flags = flags;
8318         topt->opt = opt;
8319         topt->tr = tr;
8320
8321         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8322                                     &trace_options_fops);
8323
8324 }
8325
8326 static void
8327 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8328 {
8329         struct trace_option_dentry *topts;
8330         struct trace_options *tr_topts;
8331         struct tracer_flags *flags;
8332         struct tracer_opt *opts;
8333         int cnt;
8334         int i;
8335
8336         if (!tracer)
8337                 return;
8338
8339         flags = tracer->flags;
8340
8341         if (!flags || !flags->opts)
8342                 return;
8343
8344         /*
8345          * If this is an instance, only create flags for tracers
8346          * the instance may have.
8347          */
8348         if (!trace_ok_for_array(tracer, tr))
8349                 return;
8350
8351         for (i = 0; i < tr->nr_topts; i++) {
8352                 /* Make sure there's no duplicate flags. */
8353                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8354                         return;
8355         }
8356
8357         opts = flags->opts;
8358
8359         for (cnt = 0; opts[cnt].name; cnt++)
8360                 ;
8361
8362         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8363         if (!topts)
8364                 return;
8365
8366         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8367                             GFP_KERNEL);
8368         if (!tr_topts) {
8369                 kfree(topts);
8370                 return;
8371         }
8372
8373         tr->topts = tr_topts;
8374         tr->topts[tr->nr_topts].tracer = tracer;
8375         tr->topts[tr->nr_topts].topts = topts;
8376         tr->nr_topts++;
8377
8378         for (cnt = 0; opts[cnt].name; cnt++) {
8379                 create_trace_option_file(tr, &topts[cnt], flags,
8380                                          &opts[cnt]);
8381                 MEM_FAIL(topts[cnt].entry == NULL,
8382                           "Failed to create trace option: %s",
8383                           opts[cnt].name);
8384         }
8385 }
8386
8387 static struct dentry *
8388 create_trace_option_core_file(struct trace_array *tr,
8389                               const char *option, long index)
8390 {
8391         struct dentry *t_options;
8392
8393         t_options = trace_options_init_dentry(tr);
8394         if (!t_options)
8395                 return NULL;
8396
8397         return trace_create_file(option, 0644, t_options,
8398                                  (void *)&tr->trace_flags_index[index],
8399                                  &trace_options_core_fops);
8400 }
8401
8402 static void create_trace_options_dir(struct trace_array *tr)
8403 {
8404         struct dentry *t_options;
8405         bool top_level = tr == &global_trace;
8406         int i;
8407
8408         t_options = trace_options_init_dentry(tr);
8409         if (!t_options)
8410                 return;
8411
8412         for (i = 0; trace_options[i]; i++) {
8413                 if (top_level ||
8414                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8415                         create_trace_option_core_file(tr, trace_options[i], i);
8416         }
8417 }
8418
8419 static ssize_t
8420 rb_simple_read(struct file *filp, char __user *ubuf,
8421                size_t cnt, loff_t *ppos)
8422 {
8423         struct trace_array *tr = filp->private_data;
8424         char buf[64];
8425         int r;
8426
8427         r = tracer_tracing_is_on(tr);
8428         r = sprintf(buf, "%d\n", r);
8429
8430         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8431 }
8432
8433 static ssize_t
8434 rb_simple_write(struct file *filp, const char __user *ubuf,
8435                 size_t cnt, loff_t *ppos)
8436 {
8437         struct trace_array *tr = filp->private_data;
8438         struct trace_buffer *buffer = tr->array_buffer.buffer;
8439         unsigned long val;
8440         int ret;
8441
8442         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8443         if (ret)
8444                 return ret;
8445
8446         if (buffer) {
8447                 mutex_lock(&trace_types_lock);
8448                 if (!!val == tracer_tracing_is_on(tr)) {
8449                         val = 0; /* do nothing */
8450                 } else if (val) {
8451                         tracer_tracing_on(tr);
8452                         if (tr->current_trace->start)
8453                                 tr->current_trace->start(tr);
8454                 } else {
8455                         tracer_tracing_off(tr);
8456                         if (tr->current_trace->stop)
8457                                 tr->current_trace->stop(tr);
8458                 }
8459                 mutex_unlock(&trace_types_lock);
8460         }
8461
8462         (*ppos)++;
8463
8464         return cnt;
8465 }
8466
8467 static const struct file_operations rb_simple_fops = {
8468         .open           = tracing_open_generic_tr,
8469         .read           = rb_simple_read,
8470         .write          = rb_simple_write,
8471         .release        = tracing_release_generic_tr,
8472         .llseek         = default_llseek,
8473 };
8474
8475 static ssize_t
8476 buffer_percent_read(struct file *filp, char __user *ubuf,
8477                     size_t cnt, loff_t *ppos)
8478 {
8479         struct trace_array *tr = filp->private_data;
8480         char buf[64];
8481         int r;
8482
8483         r = tr->buffer_percent;
8484         r = sprintf(buf, "%d\n", r);
8485
8486         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8487 }
8488
8489 static ssize_t
8490 buffer_percent_write(struct file *filp, const char __user *ubuf,
8491                      size_t cnt, loff_t *ppos)
8492 {
8493         struct trace_array *tr = filp->private_data;
8494         unsigned long val;
8495         int ret;
8496
8497         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8498         if (ret)
8499                 return ret;
8500
8501         if (val > 100)
8502                 return -EINVAL;
8503
8504         if (!val)
8505                 val = 1;
8506
8507         tr->buffer_percent = val;
8508
8509         (*ppos)++;
8510
8511         return cnt;
8512 }
8513
8514 static const struct file_operations buffer_percent_fops = {
8515         .open           = tracing_open_generic_tr,
8516         .read           = buffer_percent_read,
8517         .write          = buffer_percent_write,
8518         .release        = tracing_release_generic_tr,
8519         .llseek         = default_llseek,
8520 };
8521
8522 static struct dentry *trace_instance_dir;
8523
8524 static void
8525 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8526
8527 static int
8528 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8529 {
8530         enum ring_buffer_flags rb_flags;
8531
8532         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8533
8534         buf->tr = tr;
8535
8536         buf->buffer = ring_buffer_alloc(size, rb_flags);
8537         if (!buf->buffer)
8538                 return -ENOMEM;
8539
8540         buf->data = alloc_percpu(struct trace_array_cpu);
8541         if (!buf->data) {
8542                 ring_buffer_free(buf->buffer);
8543                 buf->buffer = NULL;
8544                 return -ENOMEM;
8545         }
8546
8547         /* Allocate the first page for all buffers */
8548         set_buffer_entries(&tr->array_buffer,
8549                            ring_buffer_size(tr->array_buffer.buffer, 0));
8550
8551         return 0;
8552 }
8553
8554 static int allocate_trace_buffers(struct trace_array *tr, int size)
8555 {
8556         int ret;
8557
8558         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8559         if (ret)
8560                 return ret;
8561
8562 #ifdef CONFIG_TRACER_MAX_TRACE
8563         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8564                                     allocate_snapshot ? size : 1);
8565         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8566                 ring_buffer_free(tr->array_buffer.buffer);
8567                 tr->array_buffer.buffer = NULL;
8568                 free_percpu(tr->array_buffer.data);
8569                 tr->array_buffer.data = NULL;
8570                 return -ENOMEM;
8571         }
8572         tr->allocated_snapshot = allocate_snapshot;
8573
8574         /*
8575          * Only the top level trace array gets its snapshot allocated
8576          * from the kernel command line.
8577          */
8578         allocate_snapshot = false;
8579 #endif
8580
8581         return 0;
8582 }
8583
8584 static void free_trace_buffer(struct array_buffer *buf)
8585 {
8586         if (buf->buffer) {
8587                 ring_buffer_free(buf->buffer);
8588                 buf->buffer = NULL;
8589                 free_percpu(buf->data);
8590                 buf->data = NULL;
8591         }
8592 }
8593
8594 static void free_trace_buffers(struct trace_array *tr)
8595 {
8596         if (!tr)
8597                 return;
8598
8599         free_trace_buffer(&tr->array_buffer);
8600
8601 #ifdef CONFIG_TRACER_MAX_TRACE
8602         free_trace_buffer(&tr->max_buffer);
8603 #endif
8604 }
8605
8606 static void init_trace_flags_index(struct trace_array *tr)
8607 {
8608         int i;
8609
8610         /* Used by the trace options files */
8611         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8612                 tr->trace_flags_index[i] = i;
8613 }
8614
8615 static void __update_tracer_options(struct trace_array *tr)
8616 {
8617         struct tracer *t;
8618
8619         for (t = trace_types; t; t = t->next)
8620                 add_tracer_options(tr, t);
8621 }
8622
8623 static void update_tracer_options(struct trace_array *tr)
8624 {
8625         mutex_lock(&trace_types_lock);
8626         __update_tracer_options(tr);
8627         mutex_unlock(&trace_types_lock);
8628 }
8629
8630 /* Must have trace_types_lock held */
8631 struct trace_array *trace_array_find(const char *instance)
8632 {
8633         struct trace_array *tr, *found = NULL;
8634
8635         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8636                 if (tr->name && strcmp(tr->name, instance) == 0) {
8637                         found = tr;
8638                         break;
8639                 }
8640         }
8641
8642         return found;
8643 }
8644
8645 struct trace_array *trace_array_find_get(const char *instance)
8646 {
8647         struct trace_array *tr;
8648
8649         mutex_lock(&trace_types_lock);
8650         tr = trace_array_find(instance);
8651         if (tr)
8652                 tr->ref++;
8653         mutex_unlock(&trace_types_lock);
8654
8655         return tr;
8656 }
8657
8658 static int trace_array_create_dir(struct trace_array *tr)
8659 {
8660         int ret;
8661
8662         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8663         if (!tr->dir)
8664                 return -EINVAL;
8665
8666         ret = event_trace_add_tracer(tr->dir, tr);
8667         if (ret)
8668                 tracefs_remove(tr->dir);
8669
8670         init_tracer_tracefs(tr, tr->dir);
8671         __update_tracer_options(tr);
8672
8673         return ret;
8674 }
8675
8676 static struct trace_array *trace_array_create(const char *name)
8677 {
8678         struct trace_array *tr;
8679         int ret;
8680
8681         ret = -ENOMEM;
8682         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8683         if (!tr)
8684                 return ERR_PTR(ret);
8685
8686         tr->name = kstrdup(name, GFP_KERNEL);
8687         if (!tr->name)
8688                 goto out_free_tr;
8689
8690         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8691                 goto out_free_tr;
8692
8693         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8694
8695         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8696
8697         raw_spin_lock_init(&tr->start_lock);
8698
8699         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8700
8701         tr->current_trace = &nop_trace;
8702
8703         INIT_LIST_HEAD(&tr->systems);
8704         INIT_LIST_HEAD(&tr->events);
8705         INIT_LIST_HEAD(&tr->hist_vars);
8706         INIT_LIST_HEAD(&tr->err_log);
8707
8708         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8709                 goto out_free_tr;
8710
8711         if (ftrace_allocate_ftrace_ops(tr) < 0)
8712                 goto out_free_tr;
8713
8714         ftrace_init_trace_array(tr);
8715
8716         init_trace_flags_index(tr);
8717
8718         if (trace_instance_dir) {
8719                 ret = trace_array_create_dir(tr);
8720                 if (ret)
8721                         goto out_free_tr;
8722         } else
8723                 __trace_early_add_events(tr);
8724
8725         list_add(&tr->list, &ftrace_trace_arrays);
8726
8727         tr->ref++;
8728
8729         return tr;
8730
8731  out_free_tr:
8732         ftrace_free_ftrace_ops(tr);
8733         free_trace_buffers(tr);
8734         free_cpumask_var(tr->tracing_cpumask);
8735         kfree(tr->name);
8736         kfree(tr);
8737
8738         return ERR_PTR(ret);
8739 }
8740
8741 static int instance_mkdir(const char *name)
8742 {
8743         struct trace_array *tr;
8744         int ret;
8745
8746         mutex_lock(&event_mutex);
8747         mutex_lock(&trace_types_lock);
8748
8749         ret = -EEXIST;
8750         if (trace_array_find(name))
8751                 goto out_unlock;
8752
8753         tr = trace_array_create(name);
8754
8755         ret = PTR_ERR_OR_ZERO(tr);
8756
8757 out_unlock:
8758         mutex_unlock(&trace_types_lock);
8759         mutex_unlock(&event_mutex);
8760         return ret;
8761 }
8762
8763 /**
8764  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8765  * @name: The name of the trace array to be looked up/created.
8766  *
8767  * Returns pointer to trace array with given name.
8768  * NULL, if it cannot be created.
8769  *
8770  * NOTE: This function increments the reference counter associated with the
8771  * trace array returned. This makes sure it cannot be freed while in use.
8772  * Use trace_array_put() once the trace array is no longer needed.
8773  * If the trace_array is to be freed, trace_array_destroy() needs to
8774  * be called after the trace_array_put(), or simply let user space delete
8775  * it from the tracefs instances directory. But until the
8776  * trace_array_put() is called, user space can not delete it.
8777  *
8778  */
8779 struct trace_array *trace_array_get_by_name(const char *name)
8780 {
8781         struct trace_array *tr;
8782
8783         mutex_lock(&event_mutex);
8784         mutex_lock(&trace_types_lock);
8785
8786         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8787                 if (tr->name && strcmp(tr->name, name) == 0)
8788                         goto out_unlock;
8789         }
8790
8791         tr = trace_array_create(name);
8792
8793         if (IS_ERR(tr))
8794                 tr = NULL;
8795 out_unlock:
8796         if (tr)
8797                 tr->ref++;
8798
8799         mutex_unlock(&trace_types_lock);
8800         mutex_unlock(&event_mutex);
8801         return tr;
8802 }
8803 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8804
8805 static int __remove_instance(struct trace_array *tr)
8806 {
8807         int i;
8808
8809         /* Reference counter for a newly created trace array = 1. */
8810         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8811                 return -EBUSY;
8812
8813         list_del(&tr->list);
8814
8815         /* Disable all the flags that were enabled coming in */
8816         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8817                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8818                         set_tracer_flag(tr, 1 << i, 0);
8819         }
8820
8821         tracing_set_nop(tr);
8822         clear_ftrace_function_probes(tr);
8823         event_trace_del_tracer(tr);
8824         ftrace_clear_pids(tr);
8825         ftrace_destroy_function_files(tr);
8826         tracefs_remove(tr->dir);
8827         free_trace_buffers(tr);
8828
8829         for (i = 0; i < tr->nr_topts; i++) {
8830                 kfree(tr->topts[i].topts);
8831         }
8832         kfree(tr->topts);
8833
8834         free_cpumask_var(tr->tracing_cpumask);
8835         kfree(tr->name);
8836         kfree(tr);
8837
8838         return 0;
8839 }
8840
8841 int trace_array_destroy(struct trace_array *this_tr)
8842 {
8843         struct trace_array *tr;
8844         int ret;
8845
8846         if (!this_tr)
8847                 return -EINVAL;
8848
8849         mutex_lock(&event_mutex);
8850         mutex_lock(&trace_types_lock);
8851
8852         ret = -ENODEV;
8853
8854         /* Making sure trace array exists before destroying it. */
8855         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8856                 if (tr == this_tr) {
8857                         ret = __remove_instance(tr);
8858                         break;
8859                 }
8860         }
8861
8862         mutex_unlock(&trace_types_lock);
8863         mutex_unlock(&event_mutex);
8864
8865         return ret;
8866 }
8867 EXPORT_SYMBOL_GPL(trace_array_destroy);
8868
8869 static int instance_rmdir(const char *name)
8870 {
8871         struct trace_array *tr;
8872         int ret;
8873
8874         mutex_lock(&event_mutex);
8875         mutex_lock(&trace_types_lock);
8876
8877         ret = -ENODEV;
8878         tr = trace_array_find(name);
8879         if (tr)
8880                 ret = __remove_instance(tr);
8881
8882         mutex_unlock(&trace_types_lock);
8883         mutex_unlock(&event_mutex);
8884
8885         return ret;
8886 }
8887
8888 static __init void create_trace_instances(struct dentry *d_tracer)
8889 {
8890         struct trace_array *tr;
8891
8892         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8893                                                          instance_mkdir,
8894                                                          instance_rmdir);
8895         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8896                 return;
8897
8898         mutex_lock(&event_mutex);
8899         mutex_lock(&trace_types_lock);
8900
8901         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8902                 if (!tr->name)
8903                         continue;
8904                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8905                              "Failed to create instance directory\n"))
8906                         break;
8907         }
8908
8909         mutex_unlock(&trace_types_lock);
8910         mutex_unlock(&event_mutex);
8911 }
8912
8913 static void
8914 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8915 {
8916         struct trace_event_file *file;
8917         int cpu;
8918
8919         trace_create_file("available_tracers", 0444, d_tracer,
8920                         tr, &show_traces_fops);
8921
8922         trace_create_file("current_tracer", 0644, d_tracer,
8923                         tr, &set_tracer_fops);
8924
8925         trace_create_file("tracing_cpumask", 0644, d_tracer,
8926                           tr, &tracing_cpumask_fops);
8927
8928         trace_create_file("trace_options", 0644, d_tracer,
8929                           tr, &tracing_iter_fops);
8930
8931         trace_create_file("trace", 0644, d_tracer,
8932                           tr, &tracing_fops);
8933
8934         trace_create_file("trace_pipe", 0444, d_tracer,
8935                           tr, &tracing_pipe_fops);
8936
8937         trace_create_file("buffer_size_kb", 0644, d_tracer,
8938                           tr, &tracing_entries_fops);
8939
8940         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8941                           tr, &tracing_total_entries_fops);
8942
8943         trace_create_file("free_buffer", 0200, d_tracer,
8944                           tr, &tracing_free_buffer_fops);
8945
8946         trace_create_file("trace_marker", 0220, d_tracer,
8947                           tr, &tracing_mark_fops);
8948
8949         file = __find_event_file(tr, "ftrace", "print");
8950         if (file && file->dir)
8951                 trace_create_file("trigger", 0644, file->dir, file,
8952                                   &event_trigger_fops);
8953         tr->trace_marker_file = file;
8954
8955         trace_create_file("trace_marker_raw", 0220, d_tracer,
8956                           tr, &tracing_mark_raw_fops);
8957
8958         trace_create_file("trace_clock", 0644, d_tracer, tr,
8959                           &trace_clock_fops);
8960
8961         trace_create_file("tracing_on", 0644, d_tracer,
8962                           tr, &rb_simple_fops);
8963
8964         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8965                           &trace_time_stamp_mode_fops);
8966
8967         tr->buffer_percent = 50;
8968
8969         trace_create_file("buffer_percent", 0444, d_tracer,
8970                         tr, &buffer_percent_fops);
8971
8972         create_trace_options_dir(tr);
8973
8974 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8975         trace_create_maxlat_file(tr, d_tracer);
8976 #endif
8977
8978         if (ftrace_create_function_files(tr, d_tracer))
8979                 MEM_FAIL(1, "Could not allocate function filter files");
8980
8981 #ifdef CONFIG_TRACER_SNAPSHOT
8982         trace_create_file("snapshot", 0644, d_tracer,
8983                           tr, &snapshot_fops);
8984 #endif
8985
8986         trace_create_file("error_log", 0644, d_tracer,
8987                           tr, &tracing_err_log_fops);
8988
8989         for_each_tracing_cpu(cpu)
8990                 tracing_init_tracefs_percpu(tr, cpu);
8991
8992         ftrace_init_tracefs(tr, d_tracer);
8993 }
8994
8995 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8996 {
8997         struct vfsmount *mnt;
8998         struct file_system_type *type;
8999
9000         /*
9001          * To maintain backward compatibility for tools that mount
9002          * debugfs to get to the tracing facility, tracefs is automatically
9003          * mounted to the debugfs/tracing directory.
9004          */
9005         type = get_fs_type("tracefs");
9006         if (!type)
9007                 return NULL;
9008         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9009         put_filesystem(type);
9010         if (IS_ERR(mnt))
9011                 return NULL;
9012         mntget(mnt);
9013
9014         return mnt;
9015 }
9016
9017 /**
9018  * tracing_init_dentry - initialize top level trace array
9019  *
9020  * This is called when creating files or directories in the tracing
9021  * directory. It is called via fs_initcall() by any of the boot up code
9022  * and expects to return the dentry of the top level tracing directory.
9023  */
9024 int tracing_init_dentry(void)
9025 {
9026         struct trace_array *tr = &global_trace;
9027
9028         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9029                 pr_warn("Tracing disabled due to lockdown\n");
9030                 return -EPERM;
9031         }
9032
9033         /* The top level trace array uses  NULL as parent */
9034         if (tr->dir)
9035                 return 0;
9036
9037         if (WARN_ON(!tracefs_initialized()))
9038                 return -ENODEV;
9039
9040         /*
9041          * As there may still be users that expect the tracing
9042          * files to exist in debugfs/tracing, we must automount
9043          * the tracefs file system there, so older tools still
9044          * work with the newer kerenl.
9045          */
9046         tr->dir = debugfs_create_automount("tracing", NULL,
9047                                            trace_automount, NULL);
9048
9049         return 0;
9050 }
9051
9052 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9053 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9054
9055 static struct workqueue_struct *eval_map_wq __initdata;
9056 static struct work_struct eval_map_work __initdata;
9057
9058 static void __init eval_map_work_func(struct work_struct *work)
9059 {
9060         int len;
9061
9062         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9063         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9064 }
9065
9066 static int __init trace_eval_init(void)
9067 {
9068         INIT_WORK(&eval_map_work, eval_map_work_func);
9069
9070         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9071         if (!eval_map_wq) {
9072                 pr_err("Unable to allocate eval_map_wq\n");
9073                 /* Do work here */
9074                 eval_map_work_func(&eval_map_work);
9075                 return -ENOMEM;
9076         }
9077
9078         queue_work(eval_map_wq, &eval_map_work);
9079         return 0;
9080 }
9081
9082 static int __init trace_eval_sync(void)
9083 {
9084         /* Make sure the eval map updates are finished */
9085         if (eval_map_wq)
9086                 destroy_workqueue(eval_map_wq);
9087         return 0;
9088 }
9089
9090 late_initcall_sync(trace_eval_sync);
9091
9092
9093 #ifdef CONFIG_MODULES
9094 static void trace_module_add_evals(struct module *mod)
9095 {
9096         if (!mod->num_trace_evals)
9097                 return;
9098
9099         /*
9100          * Modules with bad taint do not have events created, do
9101          * not bother with enums either.
9102          */
9103         if (trace_module_has_bad_taint(mod))
9104                 return;
9105
9106         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9107 }
9108
9109 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9110 static void trace_module_remove_evals(struct module *mod)
9111 {
9112         union trace_eval_map_item *map;
9113         union trace_eval_map_item **last = &trace_eval_maps;
9114
9115         if (!mod->num_trace_evals)
9116                 return;
9117
9118         mutex_lock(&trace_eval_mutex);
9119
9120         map = trace_eval_maps;
9121
9122         while (map) {
9123                 if (map->head.mod == mod)
9124                         break;
9125                 map = trace_eval_jmp_to_tail(map);
9126                 last = &map->tail.next;
9127                 map = map->tail.next;
9128         }
9129         if (!map)
9130                 goto out;
9131
9132         *last = trace_eval_jmp_to_tail(map)->tail.next;
9133         kfree(map);
9134  out:
9135         mutex_unlock(&trace_eval_mutex);
9136 }
9137 #else
9138 static inline void trace_module_remove_evals(struct module *mod) { }
9139 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9140
9141 static int trace_module_notify(struct notifier_block *self,
9142                                unsigned long val, void *data)
9143 {
9144         struct module *mod = data;
9145
9146         switch (val) {
9147         case MODULE_STATE_COMING:
9148                 trace_module_add_evals(mod);
9149                 break;
9150         case MODULE_STATE_GOING:
9151                 trace_module_remove_evals(mod);
9152                 break;
9153         }
9154
9155         return NOTIFY_OK;
9156 }
9157
9158 static struct notifier_block trace_module_nb = {
9159         .notifier_call = trace_module_notify,
9160         .priority = 0,
9161 };
9162 #endif /* CONFIG_MODULES */
9163
9164 static __init int tracer_init_tracefs(void)
9165 {
9166         int ret;
9167
9168         trace_access_lock_init();
9169
9170         ret = tracing_init_dentry();
9171         if (ret)
9172                 return 0;
9173
9174         event_trace_init();
9175
9176         init_tracer_tracefs(&global_trace, NULL);
9177         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9178
9179         trace_create_file("tracing_thresh", 0644, NULL,
9180                         &global_trace, &tracing_thresh_fops);
9181
9182         trace_create_file("README", 0444, NULL,
9183                         NULL, &tracing_readme_fops);
9184
9185         trace_create_file("saved_cmdlines", 0444, NULL,
9186                         NULL, &tracing_saved_cmdlines_fops);
9187
9188         trace_create_file("saved_cmdlines_size", 0644, NULL,
9189                           NULL, &tracing_saved_cmdlines_size_fops);
9190
9191         trace_create_file("saved_tgids", 0444, NULL,
9192                         NULL, &tracing_saved_tgids_fops);
9193
9194         trace_eval_init();
9195
9196         trace_create_eval_file(NULL);
9197
9198 #ifdef CONFIG_MODULES
9199         register_module_notifier(&trace_module_nb);
9200 #endif
9201
9202 #ifdef CONFIG_DYNAMIC_FTRACE
9203         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9204                         NULL, &tracing_dyn_info_fops);
9205 #endif
9206
9207         create_trace_instances(NULL);
9208
9209         update_tracer_options(&global_trace);
9210
9211         return 0;
9212 }
9213
9214 static int trace_panic_handler(struct notifier_block *this,
9215                                unsigned long event, void *unused)
9216 {
9217         if (ftrace_dump_on_oops)
9218                 ftrace_dump(ftrace_dump_on_oops);
9219         return NOTIFY_OK;
9220 }
9221
9222 static struct notifier_block trace_panic_notifier = {
9223         .notifier_call  = trace_panic_handler,
9224         .next           = NULL,
9225         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9226 };
9227
9228 static int trace_die_handler(struct notifier_block *self,
9229                              unsigned long val,
9230                              void *data)
9231 {
9232         switch (val) {
9233         case DIE_OOPS:
9234                 if (ftrace_dump_on_oops)
9235                         ftrace_dump(ftrace_dump_on_oops);
9236                 break;
9237         default:
9238                 break;
9239         }
9240         return NOTIFY_OK;
9241 }
9242
9243 static struct notifier_block trace_die_notifier = {
9244         .notifier_call = trace_die_handler,
9245         .priority = 200
9246 };
9247
9248 /*
9249  * printk is set to max of 1024, we really don't need it that big.
9250  * Nothing should be printing 1000 characters anyway.
9251  */
9252 #define TRACE_MAX_PRINT         1000
9253
9254 /*
9255  * Define here KERN_TRACE so that we have one place to modify
9256  * it if we decide to change what log level the ftrace dump
9257  * should be at.
9258  */
9259 #define KERN_TRACE              KERN_EMERG
9260
9261 void
9262 trace_printk_seq(struct trace_seq *s)
9263 {
9264         /* Probably should print a warning here. */
9265         if (s->seq.len >= TRACE_MAX_PRINT)
9266                 s->seq.len = TRACE_MAX_PRINT;
9267
9268         /*
9269          * More paranoid code. Although the buffer size is set to
9270          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9271          * an extra layer of protection.
9272          */
9273         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9274                 s->seq.len = s->seq.size - 1;
9275
9276         /* should be zero ended, but we are paranoid. */
9277         s->buffer[s->seq.len] = 0;
9278
9279         printk(KERN_TRACE "%s", s->buffer);
9280
9281         trace_seq_init(s);
9282 }
9283
9284 void trace_init_global_iter(struct trace_iterator *iter)
9285 {
9286         iter->tr = &global_trace;
9287         iter->trace = iter->tr->current_trace;
9288         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9289         iter->array_buffer = &global_trace.array_buffer;
9290
9291         if (iter->trace && iter->trace->open)
9292                 iter->trace->open(iter);
9293
9294         /* Annotate start of buffers if we had overruns */
9295         if (ring_buffer_overruns(iter->array_buffer->buffer))
9296                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9297
9298         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9299         if (trace_clocks[iter->tr->clock_id].in_ns)
9300                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9301 }
9302
9303 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9304 {
9305         /* use static because iter can be a bit big for the stack */
9306         static struct trace_iterator iter;
9307         static atomic_t dump_running;
9308         struct trace_array *tr = &global_trace;
9309         unsigned int old_userobj;
9310         unsigned long flags;
9311         int cnt = 0, cpu;
9312
9313         /* Only allow one dump user at a time. */
9314         if (atomic_inc_return(&dump_running) != 1) {
9315                 atomic_dec(&dump_running);
9316                 return;
9317         }
9318
9319         /*
9320          * Always turn off tracing when we dump.
9321          * We don't need to show trace output of what happens
9322          * between multiple crashes.
9323          *
9324          * If the user does a sysrq-z, then they can re-enable
9325          * tracing with echo 1 > tracing_on.
9326          */
9327         tracing_off();
9328
9329         local_irq_save(flags);
9330         printk_nmi_direct_enter();
9331
9332         /* Simulate the iterator */
9333         trace_init_global_iter(&iter);
9334         /* Can not use kmalloc for iter.temp */
9335         iter.temp = static_temp_buf;
9336         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9337
9338         for_each_tracing_cpu(cpu) {
9339                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9340         }
9341
9342         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9343
9344         /* don't look at user memory in panic mode */
9345         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9346
9347         switch (oops_dump_mode) {
9348         case DUMP_ALL:
9349                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9350                 break;
9351         case DUMP_ORIG:
9352                 iter.cpu_file = raw_smp_processor_id();
9353                 break;
9354         case DUMP_NONE:
9355                 goto out_enable;
9356         default:
9357                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9358                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9359         }
9360
9361         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9362
9363         /* Did function tracer already get disabled? */
9364         if (ftrace_is_dead()) {
9365                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9366                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9367         }
9368
9369         /*
9370          * We need to stop all tracing on all CPUS to read
9371          * the next buffer. This is a bit expensive, but is
9372          * not done often. We fill all what we can read,
9373          * and then release the locks again.
9374          */
9375
9376         while (!trace_empty(&iter)) {
9377
9378                 if (!cnt)
9379                         printk(KERN_TRACE "---------------------------------\n");
9380
9381                 cnt++;
9382
9383                 trace_iterator_reset(&iter);
9384                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9385
9386                 if (trace_find_next_entry_inc(&iter) != NULL) {
9387                         int ret;
9388
9389                         ret = print_trace_line(&iter);
9390                         if (ret != TRACE_TYPE_NO_CONSUME)
9391                                 trace_consume(&iter);
9392                 }
9393                 touch_nmi_watchdog();
9394
9395                 trace_printk_seq(&iter.seq);
9396         }
9397
9398         if (!cnt)
9399                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9400         else
9401                 printk(KERN_TRACE "---------------------------------\n");
9402
9403  out_enable:
9404         tr->trace_flags |= old_userobj;
9405
9406         for_each_tracing_cpu(cpu) {
9407                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9408         }
9409         atomic_dec(&dump_running);
9410         printk_nmi_direct_exit();
9411         local_irq_restore(flags);
9412 }
9413 EXPORT_SYMBOL_GPL(ftrace_dump);
9414
9415 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9416 {
9417         char **argv;
9418         int argc, ret;
9419
9420         argc = 0;
9421         ret = 0;
9422         argv = argv_split(GFP_KERNEL, buf, &argc);
9423         if (!argv)
9424                 return -ENOMEM;
9425
9426         if (argc)
9427                 ret = createfn(argc, argv);
9428
9429         argv_free(argv);
9430
9431         return ret;
9432 }
9433
9434 #define WRITE_BUFSIZE  4096
9435
9436 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9437                                 size_t count, loff_t *ppos,
9438                                 int (*createfn)(int, char **))
9439 {
9440         char *kbuf, *buf, *tmp;
9441         int ret = 0;
9442         size_t done = 0;
9443         size_t size;
9444
9445         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9446         if (!kbuf)
9447                 return -ENOMEM;
9448
9449         while (done < count) {
9450                 size = count - done;
9451
9452                 if (size >= WRITE_BUFSIZE)
9453                         size = WRITE_BUFSIZE - 1;
9454
9455                 if (copy_from_user(kbuf, buffer + done, size)) {
9456                         ret = -EFAULT;
9457                         goto out;
9458                 }
9459                 kbuf[size] = '\0';
9460                 buf = kbuf;
9461                 do {
9462                         tmp = strchr(buf, '\n');
9463                         if (tmp) {
9464                                 *tmp = '\0';
9465                                 size = tmp - buf + 1;
9466                         } else {
9467                                 size = strlen(buf);
9468                                 if (done + size < count) {
9469                                         if (buf != kbuf)
9470                                                 break;
9471                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9472                                         pr_warn("Line length is too long: Should be less than %d\n",
9473                                                 WRITE_BUFSIZE - 2);
9474                                         ret = -EINVAL;
9475                                         goto out;
9476                                 }
9477                         }
9478                         done += size;
9479
9480                         /* Remove comments */
9481                         tmp = strchr(buf, '#');
9482
9483                         if (tmp)
9484                                 *tmp = '\0';
9485
9486                         ret = trace_run_command(buf, createfn);
9487                         if (ret)
9488                                 goto out;
9489                         buf += size;
9490
9491                 } while (done < count);
9492         }
9493         ret = done;
9494
9495 out:
9496         kfree(kbuf);
9497
9498         return ret;
9499 }
9500
9501 __init static int tracer_alloc_buffers(void)
9502 {
9503         int ring_buf_size;
9504         int ret = -ENOMEM;
9505
9506
9507         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9508                 pr_warn("Tracing disabled due to lockdown\n");
9509                 return -EPERM;
9510         }
9511
9512         /*
9513          * Make sure we don't accidentally add more trace options
9514          * than we have bits for.
9515          */
9516         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9517
9518         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9519                 goto out;
9520
9521         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9522                 goto out_free_buffer_mask;
9523
9524         /* Only allocate trace_printk buffers if a trace_printk exists */
9525         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9526                 /* Must be called before global_trace.buffer is allocated */
9527                 trace_printk_init_buffers();
9528
9529         /* To save memory, keep the ring buffer size to its minimum */
9530         if (ring_buffer_expanded)
9531                 ring_buf_size = trace_buf_size;
9532         else
9533                 ring_buf_size = 1;
9534
9535         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9536         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9537
9538         raw_spin_lock_init(&global_trace.start_lock);
9539
9540         /*
9541          * The prepare callbacks allocates some memory for the ring buffer. We
9542          * don't free the buffer if the CPU goes down. If we were to free
9543          * the buffer, then the user would lose any trace that was in the
9544          * buffer. The memory will be removed once the "instance" is removed.
9545          */
9546         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9547                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9548                                       NULL);
9549         if (ret < 0)
9550                 goto out_free_cpumask;
9551         /* Used for event triggers */
9552         ret = -ENOMEM;
9553         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9554         if (!temp_buffer)
9555                 goto out_rm_hp_state;
9556
9557         if (trace_create_savedcmd() < 0)
9558                 goto out_free_temp_buffer;
9559
9560         /* TODO: make the number of buffers hot pluggable with CPUS */
9561         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9562                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9563                 goto out_free_savedcmd;
9564         }
9565
9566         if (global_trace.buffer_disabled)
9567                 tracing_off();
9568
9569         if (trace_boot_clock) {
9570                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9571                 if (ret < 0)
9572                         pr_warn("Trace clock %s not defined, going back to default\n",
9573                                 trace_boot_clock);
9574         }
9575
9576         /*
9577          * register_tracer() might reference current_trace, so it
9578          * needs to be set before we register anything. This is
9579          * just a bootstrap of current_trace anyway.
9580          */
9581         global_trace.current_trace = &nop_trace;
9582
9583         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9584
9585         ftrace_init_global_array_ops(&global_trace);
9586
9587         init_trace_flags_index(&global_trace);
9588
9589         register_tracer(&nop_trace);
9590
9591         /* Function tracing may start here (via kernel command line) */
9592         init_function_trace();
9593
9594         /* All seems OK, enable tracing */
9595         tracing_disabled = 0;
9596
9597         atomic_notifier_chain_register(&panic_notifier_list,
9598                                        &trace_panic_notifier);
9599
9600         register_die_notifier(&trace_die_notifier);
9601
9602         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9603
9604         INIT_LIST_HEAD(&global_trace.systems);
9605         INIT_LIST_HEAD(&global_trace.events);
9606         INIT_LIST_HEAD(&global_trace.hist_vars);
9607         INIT_LIST_HEAD(&global_trace.err_log);
9608         list_add(&global_trace.list, &ftrace_trace_arrays);
9609
9610         apply_trace_boot_options();
9611
9612         register_snapshot_cmd();
9613
9614         return 0;
9615
9616 out_free_savedcmd:
9617         free_saved_cmdlines_buffer(savedcmd);
9618 out_free_temp_buffer:
9619         ring_buffer_free(temp_buffer);
9620 out_rm_hp_state:
9621         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9622 out_free_cpumask:
9623         free_cpumask_var(global_trace.tracing_cpumask);
9624 out_free_buffer_mask:
9625         free_cpumask_var(tracing_buffer_mask);
9626 out:
9627         return ret;
9628 }
9629
9630 void __init early_trace_init(void)
9631 {
9632         if (tracepoint_printk) {
9633                 tracepoint_print_iter =
9634                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9635                 if (MEM_FAIL(!tracepoint_print_iter,
9636                              "Failed to allocate trace iterator\n"))
9637                         tracepoint_printk = 0;
9638                 else
9639                         static_key_enable(&tracepoint_printk_key.key);
9640         }
9641         tracer_alloc_buffers();
9642 }
9643
9644 void __init trace_init(void)
9645 {
9646         trace_event_init();
9647 }
9648
9649 __init static int clear_boot_tracer(void)
9650 {
9651         /*
9652          * The default tracer at boot buffer is an init section.
9653          * This function is called in lateinit. If we did not
9654          * find the boot tracer, then clear it out, to prevent
9655          * later registration from accessing the buffer that is
9656          * about to be freed.
9657          */
9658         if (!default_bootup_tracer)
9659                 return 0;
9660
9661         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9662                default_bootup_tracer);
9663         default_bootup_tracer = NULL;
9664
9665         return 0;
9666 }
9667
9668 fs_initcall(tracer_init_tracefs);
9669 late_initcall_sync(clear_boot_tracer);
9670
9671 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9672 __init static int tracing_set_default_clock(void)
9673 {
9674         /* sched_clock_stable() is determined in late_initcall */
9675         if (!trace_boot_clock && !sched_clock_stable()) {
9676                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9677                         pr_warn("Can not set tracing clock due to lockdown\n");
9678                         return -EPERM;
9679                 }
9680
9681                 printk(KERN_WARNING
9682                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9683                        "If you want to keep using the local clock, then add:\n"
9684                        "  \"trace_clock=local\"\n"
9685                        "on the kernel command line\n");
9686                 tracing_set_clock(&global_trace, "global");
9687         }
9688
9689         return 0;
9690 }
9691 late_initcall_sync(tracing_set_default_clock);
9692 #endif