tracing: Have ftrace_dump_on_oops kernel parameter take numbers
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187
188 static int __init set_cmdline_ftrace(char *str)
189 {
190         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191         default_bootup_tracer = bootup_tracer_buf;
192         /* We are using ftrace early, expand it */
193         ring_buffer_expanded = true;
194         return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200         if (*str++ != '=' || !*str || !strcmp("1", str)) {
201                 ftrace_dump_on_oops = DUMP_ALL;
202                 return 1;
203         }
204
205         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
206                 ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213
214 static int __init stop_trace_on_warning(char *str)
215 {
216         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217                 __disable_trace_on_warning = 1;
218         return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221
222 static int __init boot_alloc_snapshot(char *str)
223 {
224         allocate_snapshot = true;
225         /* We also need the main ring buffer expanded */
226         ring_buffer_expanded = true;
227         return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230
231
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233
234 static int __init set_trace_boot_options(char *str)
235 {
236         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237         return 0;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243
244 static int __init set_trace_boot_clock(char *str)
245 {
246         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247         trace_boot_clock = trace_boot_clock_buf;
248         return 0;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251
252 static int __init set_tracepoint_printk(char *str)
253 {
254         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
255                 tracepoint_printk = 1;
256         return 1;
257 }
258 __setup("tp_printk", set_tracepoint_printk);
259
260 static int __init set_tracepoint_printk_stop(char *str)
261 {
262         tracepoint_printk_stop_on_boot = true;
263         return 1;
264 }
265 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
266
267 unsigned long long ns2usecs(u64 nsec)
268 {
269         nsec += 500;
270         do_div(nsec, 1000);
271         return nsec;
272 }
273
274 static void
275 trace_process_export(struct trace_export *export,
276                struct ring_buffer_event *event, int flag)
277 {
278         struct trace_entry *entry;
279         unsigned int size = 0;
280
281         if (export->flags & flag) {
282                 entry = ring_buffer_event_data(event);
283                 size = ring_buffer_event_length(event);
284                 export->write(export, entry, size);
285         }
286 }
287
288 static DEFINE_MUTEX(ftrace_export_lock);
289
290 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
291
292 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
293 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
295
296 static inline void ftrace_exports_enable(struct trace_export *export)
297 {
298         if (export->flags & TRACE_EXPORT_FUNCTION)
299                 static_branch_inc(&trace_function_exports_enabled);
300
301         if (export->flags & TRACE_EXPORT_EVENT)
302                 static_branch_inc(&trace_event_exports_enabled);
303
304         if (export->flags & TRACE_EXPORT_MARKER)
305                 static_branch_inc(&trace_marker_exports_enabled);
306 }
307
308 static inline void ftrace_exports_disable(struct trace_export *export)
309 {
310         if (export->flags & TRACE_EXPORT_FUNCTION)
311                 static_branch_dec(&trace_function_exports_enabled);
312
313         if (export->flags & TRACE_EXPORT_EVENT)
314                 static_branch_dec(&trace_event_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_MARKER)
317                 static_branch_dec(&trace_marker_exports_enabled);
318 }
319
320 static void ftrace_exports(struct ring_buffer_event *event, int flag)
321 {
322         struct trace_export *export;
323
324         preempt_disable_notrace();
325
326         export = rcu_dereference_raw_check(ftrace_exports_list);
327         while (export) {
328                 trace_process_export(export, event, flag);
329                 export = rcu_dereference_raw_check(export->next);
330         }
331
332         preempt_enable_notrace();
333 }
334
335 static inline void
336 add_trace_export(struct trace_export **list, struct trace_export *export)
337 {
338         rcu_assign_pointer(export->next, *list);
339         /*
340          * We are entering export into the list but another
341          * CPU might be walking that list. We need to make sure
342          * the export->next pointer is valid before another CPU sees
343          * the export pointer included into the list.
344          */
345         rcu_assign_pointer(*list, export);
346 }
347
348 static inline int
349 rm_trace_export(struct trace_export **list, struct trace_export *export)
350 {
351         struct trace_export **p;
352
353         for (p = list; *p != NULL; p = &(*p)->next)
354                 if (*p == export)
355                         break;
356
357         if (*p != export)
358                 return -1;
359
360         rcu_assign_pointer(*p, (*p)->next);
361
362         return 0;
363 }
364
365 static inline void
366 add_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         ftrace_exports_enable(export);
369
370         add_trace_export(list, export);
371 }
372
373 static inline int
374 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
375 {
376         int ret;
377
378         ret = rm_trace_export(list, export);
379         ftrace_exports_disable(export);
380
381         return ret;
382 }
383
384 int register_ftrace_export(struct trace_export *export)
385 {
386         if (WARN_ON_ONCE(!export->write))
387                 return -1;
388
389         mutex_lock(&ftrace_export_lock);
390
391         add_ftrace_export(&ftrace_exports_list, export);
392
393         mutex_unlock(&ftrace_export_lock);
394
395         return 0;
396 }
397 EXPORT_SYMBOL_GPL(register_ftrace_export);
398
399 int unregister_ftrace_export(struct trace_export *export)
400 {
401         int ret;
402
403         mutex_lock(&ftrace_export_lock);
404
405         ret = rm_ftrace_export(&ftrace_exports_list, export);
406
407         mutex_unlock(&ftrace_export_lock);
408
409         return ret;
410 }
411 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
412
413 /* trace_flags holds trace_options default values */
414 #define TRACE_DEFAULT_FLAGS                                             \
415         (FUNCTION_DEFAULT_FLAGS |                                       \
416          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
417          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
418          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
419          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
420          TRACE_ITER_HASH_PTR)
421
422 /* trace_options that are only supported by global_trace */
423 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
424                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
425
426 /* trace_flags that are default zero for instances */
427 #define ZEROED_TRACE_FLAGS \
428         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
429
430 /*
431  * The global_trace is the descriptor that holds the top-level tracing
432  * buffers for the live tracing.
433  */
434 static struct trace_array global_trace = {
435         .trace_flags = TRACE_DEFAULT_FLAGS,
436 };
437
438 LIST_HEAD(ftrace_trace_arrays);
439
440 int trace_array_get(struct trace_array *this_tr)
441 {
442         struct trace_array *tr;
443         int ret = -ENODEV;
444
445         mutex_lock(&trace_types_lock);
446         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
447                 if (tr == this_tr) {
448                         tr->ref++;
449                         ret = 0;
450                         break;
451                 }
452         }
453         mutex_unlock(&trace_types_lock);
454
455         return ret;
456 }
457
458 static void __trace_array_put(struct trace_array *this_tr)
459 {
460         WARN_ON(!this_tr->ref);
461         this_tr->ref--;
462 }
463
464 /**
465  * trace_array_put - Decrement the reference counter for this trace array.
466  * @this_tr : pointer to the trace array
467  *
468  * NOTE: Use this when we no longer need the trace array returned by
469  * trace_array_get_by_name(). This ensures the trace array can be later
470  * destroyed.
471  *
472  */
473 void trace_array_put(struct trace_array *this_tr)
474 {
475         if (!this_tr)
476                 return;
477
478         mutex_lock(&trace_types_lock);
479         __trace_array_put(this_tr);
480         mutex_unlock(&trace_types_lock);
481 }
482 EXPORT_SYMBOL_GPL(trace_array_put);
483
484 int tracing_check_open_get_tr(struct trace_array *tr)
485 {
486         int ret;
487
488         ret = security_locked_down(LOCKDOWN_TRACEFS);
489         if (ret)
490                 return ret;
491
492         if (tracing_disabled)
493                 return -ENODEV;
494
495         if (tr && trace_array_get(tr) < 0)
496                 return -ENODEV;
497
498         return 0;
499 }
500
501 int call_filter_check_discard(struct trace_event_call *call, void *rec,
502                               struct trace_buffer *buffer,
503                               struct ring_buffer_event *event)
504 {
505         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
506             !filter_match_preds(call->filter, rec)) {
507                 __trace_event_discard_commit(buffer, event);
508                 return 1;
509         }
510
511         return 0;
512 }
513
514 void trace_free_pid_list(struct trace_pid_list *pid_list)
515 {
516         vfree(pid_list->pids);
517         kfree(pid_list);
518 }
519
520 /**
521  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
522  * @filtered_pids: The list of pids to check
523  * @search_pid: The PID to find in @filtered_pids
524  *
525  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
526  */
527 bool
528 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
529 {
530         /*
531          * If pid_max changed after filtered_pids was created, we
532          * by default ignore all pids greater than the previous pid_max.
533          */
534         if (search_pid >= filtered_pids->pid_max)
535                 return false;
536
537         return test_bit(search_pid, filtered_pids->pids);
538 }
539
540 /**
541  * trace_ignore_this_task - should a task be ignored for tracing
542  * @filtered_pids: The list of pids to check
543  * @filtered_no_pids: The list of pids not to be traced
544  * @task: The task that should be ignored if not filtered
545  *
546  * Checks if @task should be traced or not from @filtered_pids.
547  * Returns true if @task should *NOT* be traced.
548  * Returns false if @task should be traced.
549  */
550 bool
551 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
552                        struct trace_pid_list *filtered_no_pids,
553                        struct task_struct *task)
554 {
555         /*
556          * If filtered_no_pids is not empty, and the task's pid is listed
557          * in filtered_no_pids, then return true.
558          * Otherwise, if filtered_pids is empty, that means we can
559          * trace all tasks. If it has content, then only trace pids
560          * within filtered_pids.
561          */
562
563         return (filtered_pids &&
564                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
565                 (filtered_no_pids &&
566                  trace_find_filtered_pid(filtered_no_pids, task->pid));
567 }
568
569 /**
570  * trace_filter_add_remove_task - Add or remove a task from a pid_list
571  * @pid_list: The list to modify
572  * @self: The current task for fork or NULL for exit
573  * @task: The task to add or remove
574  *
575  * If adding a task, if @self is defined, the task is only added if @self
576  * is also included in @pid_list. This happens on fork and tasks should
577  * only be added when the parent is listed. If @self is NULL, then the
578  * @task pid will be removed from the list, which would happen on exit
579  * of a task.
580  */
581 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
582                                   struct task_struct *self,
583                                   struct task_struct *task)
584 {
585         if (!pid_list)
586                 return;
587
588         /* For forks, we only add if the forking task is listed */
589         if (self) {
590                 if (!trace_find_filtered_pid(pid_list, self->pid))
591                         return;
592         }
593
594         /* Sorry, but we don't support pid_max changing after setting */
595         if (task->pid >= pid_list->pid_max)
596                 return;
597
598         /* "self" is set for forks, and NULL for exits */
599         if (self)
600                 set_bit(task->pid, pid_list->pids);
601         else
602                 clear_bit(task->pid, pid_list->pids);
603 }
604
605 /**
606  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
607  * @pid_list: The pid list to show
608  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
609  * @pos: The position of the file
610  *
611  * This is used by the seq_file "next" operation to iterate the pids
612  * listed in a trace_pid_list structure.
613  *
614  * Returns the pid+1 as we want to display pid of zero, but NULL would
615  * stop the iteration.
616  */
617 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
618 {
619         unsigned long pid = (unsigned long)v;
620
621         (*pos)++;
622
623         /* pid already is +1 of the actual previous bit */
624         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
625
626         /* Return pid + 1 to allow zero to be represented */
627         if (pid < pid_list->pid_max)
628                 return (void *)(pid + 1);
629
630         return NULL;
631 }
632
633 /**
634  * trace_pid_start - Used for seq_file to start reading pid lists
635  * @pid_list: The pid list to show
636  * @pos: The position of the file
637  *
638  * This is used by seq_file "start" operation to start the iteration
639  * of listing pids.
640  *
641  * Returns the pid+1 as we want to display pid of zero, but NULL would
642  * stop the iteration.
643  */
644 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
645 {
646         unsigned long pid;
647         loff_t l = 0;
648
649         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
650         if (pid >= pid_list->pid_max)
651                 return NULL;
652
653         /* Return pid + 1 so that zero can be the exit value */
654         for (pid++; pid && l < *pos;
655              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
656                 ;
657         return (void *)pid;
658 }
659
660 /**
661  * trace_pid_show - show the current pid in seq_file processing
662  * @m: The seq_file structure to write into
663  * @v: A void pointer of the pid (+1) value to display
664  *
665  * Can be directly used by seq_file operations to display the current
666  * pid value.
667  */
668 int trace_pid_show(struct seq_file *m, void *v)
669 {
670         unsigned long pid = (unsigned long)v - 1;
671
672         seq_printf(m, "%lu\n", pid);
673         return 0;
674 }
675
676 /* 128 should be much more than enough */
677 #define PID_BUF_SIZE            127
678
679 int trace_pid_write(struct trace_pid_list *filtered_pids,
680                     struct trace_pid_list **new_pid_list,
681                     const char __user *ubuf, size_t cnt)
682 {
683         struct trace_pid_list *pid_list;
684         struct trace_parser parser;
685         unsigned long val;
686         int nr_pids = 0;
687         ssize_t read = 0;
688         ssize_t ret = 0;
689         loff_t pos;
690         pid_t pid;
691
692         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
693                 return -ENOMEM;
694
695         /*
696          * Always recreate a new array. The write is an all or nothing
697          * operation. Always create a new array when adding new pids by
698          * the user. If the operation fails, then the current list is
699          * not modified.
700          */
701         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
702         if (!pid_list) {
703                 trace_parser_put(&parser);
704                 return -ENOMEM;
705         }
706
707         pid_list->pid_max = READ_ONCE(pid_max);
708
709         /* Only truncating will shrink pid_max */
710         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
711                 pid_list->pid_max = filtered_pids->pid_max;
712
713         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
714         if (!pid_list->pids) {
715                 trace_parser_put(&parser);
716                 kfree(pid_list);
717                 return -ENOMEM;
718         }
719
720         if (filtered_pids) {
721                 /* copy the current bits to the new max */
722                 for_each_set_bit(pid, filtered_pids->pids,
723                                  filtered_pids->pid_max) {
724                         set_bit(pid, pid_list->pids);
725                         nr_pids++;
726                 }
727         }
728
729         while (cnt > 0) {
730
731                 pos = 0;
732
733                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
734                 if (ret < 0 || !trace_parser_loaded(&parser))
735                         break;
736
737                 read += ret;
738                 ubuf += ret;
739                 cnt -= ret;
740
741                 ret = -EINVAL;
742                 if (kstrtoul(parser.buffer, 0, &val))
743                         break;
744                 if (val >= pid_list->pid_max)
745                         break;
746
747                 pid = (pid_t)val;
748
749                 set_bit(pid, pid_list->pids);
750                 nr_pids++;
751
752                 trace_parser_clear(&parser);
753                 ret = 0;
754         }
755         trace_parser_put(&parser);
756
757         if (ret < 0) {
758                 trace_free_pid_list(pid_list);
759                 return ret;
760         }
761
762         if (!nr_pids) {
763                 /* Cleared the list of pids */
764                 trace_free_pid_list(pid_list);
765                 read = ret;
766                 pid_list = NULL;
767         }
768
769         *new_pid_list = pid_list;
770
771         return read;
772 }
773
774 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
775 {
776         u64 ts;
777
778         /* Early boot up does not have a buffer yet */
779         if (!buf->buffer)
780                 return trace_clock_local();
781
782         ts = ring_buffer_time_stamp(buf->buffer);
783         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
784
785         return ts;
786 }
787
788 u64 ftrace_now(int cpu)
789 {
790         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
791 }
792
793 /**
794  * tracing_is_enabled - Show if global_trace has been enabled
795  *
796  * Shows if the global trace has been enabled or not. It uses the
797  * mirror flag "buffer_disabled" to be used in fast paths such as for
798  * the irqsoff tracer. But it may be inaccurate due to races. If you
799  * need to know the accurate state, use tracing_is_on() which is a little
800  * slower, but accurate.
801  */
802 int tracing_is_enabled(void)
803 {
804         /*
805          * For quick access (irqsoff uses this in fast path), just
806          * return the mirror variable of the state of the ring buffer.
807          * It's a little racy, but we don't really care.
808          */
809         smp_rmb();
810         return !global_trace.buffer_disabled;
811 }
812
813 /*
814  * trace_buf_size is the size in bytes that is allocated
815  * for a buffer. Note, the number of bytes is always rounded
816  * to page size.
817  *
818  * This number is purposely set to a low number of 16384.
819  * If the dump on oops happens, it will be much appreciated
820  * to not have to wait for all that output. Anyway this can be
821  * boot time and run time configurable.
822  */
823 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
824
825 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
826
827 /* trace_types holds a link list of available tracers. */
828 static struct tracer            *trace_types __read_mostly;
829
830 /*
831  * trace_types_lock is used to protect the trace_types list.
832  */
833 DEFINE_MUTEX(trace_types_lock);
834
835 /*
836  * serialize the access of the ring buffer
837  *
838  * ring buffer serializes readers, but it is low level protection.
839  * The validity of the events (which returns by ring_buffer_peek() ..etc)
840  * are not protected by ring buffer.
841  *
842  * The content of events may become garbage if we allow other process consumes
843  * these events concurrently:
844  *   A) the page of the consumed events may become a normal page
845  *      (not reader page) in ring buffer, and this page will be rewritten
846  *      by events producer.
847  *   B) The page of the consumed events may become a page for splice_read,
848  *      and this page will be returned to system.
849  *
850  * These primitives allow multi process access to different cpu ring buffer
851  * concurrently.
852  *
853  * These primitives don't distinguish read-only and read-consume access.
854  * Multi read-only access are also serialized.
855  */
856
857 #ifdef CONFIG_SMP
858 static DECLARE_RWSEM(all_cpu_access_lock);
859 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
860
861 static inline void trace_access_lock(int cpu)
862 {
863         if (cpu == RING_BUFFER_ALL_CPUS) {
864                 /* gain it for accessing the whole ring buffer. */
865                 down_write(&all_cpu_access_lock);
866         } else {
867                 /* gain it for accessing a cpu ring buffer. */
868
869                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
870                 down_read(&all_cpu_access_lock);
871
872                 /* Secondly block other access to this @cpu ring buffer. */
873                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
874         }
875 }
876
877 static inline void trace_access_unlock(int cpu)
878 {
879         if (cpu == RING_BUFFER_ALL_CPUS) {
880                 up_write(&all_cpu_access_lock);
881         } else {
882                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
883                 up_read(&all_cpu_access_lock);
884         }
885 }
886
887 static inline void trace_access_lock_init(void)
888 {
889         int cpu;
890
891         for_each_possible_cpu(cpu)
892                 mutex_init(&per_cpu(cpu_access_lock, cpu));
893 }
894
895 #else
896
897 static DEFINE_MUTEX(access_lock);
898
899 static inline void trace_access_lock(int cpu)
900 {
901         (void)cpu;
902         mutex_lock(&access_lock);
903 }
904
905 static inline void trace_access_unlock(int cpu)
906 {
907         (void)cpu;
908         mutex_unlock(&access_lock);
909 }
910
911 static inline void trace_access_lock_init(void)
912 {
913 }
914
915 #endif
916
917 #ifdef CONFIG_STACKTRACE
918 static void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                  unsigned int trace_ctx,
920                                  int skip, struct pt_regs *regs);
921 static inline void ftrace_trace_stack(struct trace_array *tr,
922                                       struct trace_buffer *buffer,
923                                       unsigned int trace_ctx,
924                                       int skip, struct pt_regs *regs);
925
926 #else
927 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
928                                         unsigned int trace_ctx,
929                                         int skip, struct pt_regs *regs)
930 {
931 }
932 static inline void ftrace_trace_stack(struct trace_array *tr,
933                                       struct trace_buffer *buffer,
934                                       unsigned long trace_ctx,
935                                       int skip, struct pt_regs *regs)
936 {
937 }
938
939 #endif
940
941 static __always_inline void
942 trace_event_setup(struct ring_buffer_event *event,
943                   int type, unsigned int trace_ctx)
944 {
945         struct trace_entry *ent = ring_buffer_event_data(event);
946
947         tracing_generic_entry_update(ent, type, trace_ctx);
948 }
949
950 static __always_inline struct ring_buffer_event *
951 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
952                           int type,
953                           unsigned long len,
954                           unsigned int trace_ctx)
955 {
956         struct ring_buffer_event *event;
957
958         event = ring_buffer_lock_reserve(buffer, len);
959         if (event != NULL)
960                 trace_event_setup(event, type, trace_ctx);
961
962         return event;
963 }
964
965 void tracer_tracing_on(struct trace_array *tr)
966 {
967         if (tr->array_buffer.buffer)
968                 ring_buffer_record_on(tr->array_buffer.buffer);
969         /*
970          * This flag is looked at when buffers haven't been allocated
971          * yet, or by some tracers (like irqsoff), that just want to
972          * know if the ring buffer has been disabled, but it can handle
973          * races of where it gets disabled but we still do a record.
974          * As the check is in the fast path of the tracers, it is more
975          * important to be fast than accurate.
976          */
977         tr->buffer_disabled = 0;
978         /* Make the flag seen by readers */
979         smp_wmb();
980 }
981
982 /**
983  * tracing_on - enable tracing buffers
984  *
985  * This function enables tracing buffers that may have been
986  * disabled with tracing_off.
987  */
988 void tracing_on(void)
989 {
990         tracer_tracing_on(&global_trace);
991 }
992 EXPORT_SYMBOL_GPL(tracing_on);
993
994
995 static __always_inline void
996 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
997 {
998         __this_cpu_write(trace_taskinfo_save, true);
999
1000         /* If this is the temp buffer, we need to commit fully */
1001         if (this_cpu_read(trace_buffered_event) == event) {
1002                 /* Length is in event->array[0] */
1003                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1004                 /* Release the temp buffer */
1005                 this_cpu_dec(trace_buffered_event_cnt);
1006         } else
1007                 ring_buffer_unlock_commit(buffer, event);
1008 }
1009
1010 /**
1011  * __trace_puts - write a constant string into the trace buffer.
1012  * @ip:    The address of the caller
1013  * @str:   The constant string to write
1014  * @size:  The size of the string.
1015  */
1016 int __trace_puts(unsigned long ip, const char *str, int size)
1017 {
1018         struct ring_buffer_event *event;
1019         struct trace_buffer *buffer;
1020         struct print_entry *entry;
1021         unsigned int trace_ctx;
1022         int alloc;
1023
1024         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1025                 return 0;
1026
1027         if (unlikely(tracing_selftest_running || tracing_disabled))
1028                 return 0;
1029
1030         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1031
1032         trace_ctx = tracing_gen_ctx();
1033         buffer = global_trace.array_buffer.buffer;
1034         ring_buffer_nest_start(buffer);
1035         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1036                                             trace_ctx);
1037         if (!event) {
1038                 size = 0;
1039                 goto out;
1040         }
1041
1042         entry = ring_buffer_event_data(event);
1043         entry->ip = ip;
1044
1045         memcpy(&entry->buf, str, size);
1046
1047         /* Add a newline if necessary */
1048         if (entry->buf[size - 1] != '\n') {
1049                 entry->buf[size] = '\n';
1050                 entry->buf[size + 1] = '\0';
1051         } else
1052                 entry->buf[size] = '\0';
1053
1054         __buffer_unlock_commit(buffer, event);
1055         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1056  out:
1057         ring_buffer_nest_end(buffer);
1058         return size;
1059 }
1060 EXPORT_SYMBOL_GPL(__trace_puts);
1061
1062 /**
1063  * __trace_bputs - write the pointer to a constant string into trace buffer
1064  * @ip:    The address of the caller
1065  * @str:   The constant string to write to the buffer to
1066  */
1067 int __trace_bputs(unsigned long ip, const char *str)
1068 {
1069         struct ring_buffer_event *event;
1070         struct trace_buffer *buffer;
1071         struct bputs_entry *entry;
1072         unsigned int trace_ctx;
1073         int size = sizeof(struct bputs_entry);
1074         int ret = 0;
1075
1076         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1077                 return 0;
1078
1079         if (unlikely(tracing_selftest_running || tracing_disabled))
1080                 return 0;
1081
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = global_trace.array_buffer.buffer;
1084
1085         ring_buffer_nest_start(buffer);
1086         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1087                                             trace_ctx);
1088         if (!event)
1089                 goto out;
1090
1091         entry = ring_buffer_event_data(event);
1092         entry->ip                       = ip;
1093         entry->str                      = str;
1094
1095         __buffer_unlock_commit(buffer, event);
1096         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1097
1098         ret = 1;
1099  out:
1100         ring_buffer_nest_end(buffer);
1101         return ret;
1102 }
1103 EXPORT_SYMBOL_GPL(__trace_bputs);
1104
1105 #ifdef CONFIG_TRACER_SNAPSHOT
1106 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107                                            void *cond_data)
1108 {
1109         struct tracer *tracer = tr->current_trace;
1110         unsigned long flags;
1111
1112         if (in_nmi()) {
1113                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1114                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1115                 return;
1116         }
1117
1118         if (!tr->allocated_snapshot) {
1119                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1120                 internal_trace_puts("*** stopping trace here!   ***\n");
1121                 tracing_off();
1122                 return;
1123         }
1124
1125         /* Note, snapshot can not be used when the tracer uses it */
1126         if (tracer->use_max_tr) {
1127                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1128                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1129                 return;
1130         }
1131
1132         local_irq_save(flags);
1133         update_max_tr(tr, current, smp_processor_id(), cond_data);
1134         local_irq_restore(flags);
1135 }
1136
1137 void tracing_snapshot_instance(struct trace_array *tr)
1138 {
1139         tracing_snapshot_instance_cond(tr, NULL);
1140 }
1141
1142 /**
1143  * tracing_snapshot - take a snapshot of the current buffer.
1144  *
1145  * This causes a swap between the snapshot buffer and the current live
1146  * tracing buffer. You can use this to take snapshots of the live
1147  * trace when some condition is triggered, but continue to trace.
1148  *
1149  * Note, make sure to allocate the snapshot with either
1150  * a tracing_snapshot_alloc(), or by doing it manually
1151  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1152  *
1153  * If the snapshot buffer is not allocated, it will stop tracing.
1154  * Basically making a permanent snapshot.
1155  */
1156 void tracing_snapshot(void)
1157 {
1158         struct trace_array *tr = &global_trace;
1159
1160         tracing_snapshot_instance(tr);
1161 }
1162 EXPORT_SYMBOL_GPL(tracing_snapshot);
1163
1164 /**
1165  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166  * @tr:         The tracing instance to snapshot
1167  * @cond_data:  The data to be tested conditionally, and possibly saved
1168  *
1169  * This is the same as tracing_snapshot() except that the snapshot is
1170  * conditional - the snapshot will only happen if the
1171  * cond_snapshot.update() implementation receiving the cond_data
1172  * returns true, which means that the trace array's cond_snapshot
1173  * update() operation used the cond_data to determine whether the
1174  * snapshot should be taken, and if it was, presumably saved it along
1175  * with the snapshot.
1176  */
1177 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178 {
1179         tracing_snapshot_instance_cond(tr, cond_data);
1180 }
1181 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182
1183 /**
1184  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185  * @tr:         The tracing instance
1186  *
1187  * When the user enables a conditional snapshot using
1188  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189  * with the snapshot.  This accessor is used to retrieve it.
1190  *
1191  * Should not be called from cond_snapshot.update(), since it takes
1192  * the tr->max_lock lock, which the code calling
1193  * cond_snapshot.update() has already done.
1194  *
1195  * Returns the cond_data associated with the trace array's snapshot.
1196  */
1197 void *tracing_cond_snapshot_data(struct trace_array *tr)
1198 {
1199         void *cond_data = NULL;
1200
1201         arch_spin_lock(&tr->max_lock);
1202
1203         if (tr->cond_snapshot)
1204                 cond_data = tr->cond_snapshot->cond_data;
1205
1206         arch_spin_unlock(&tr->max_lock);
1207
1208         return cond_data;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1211
1212 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1213                                         struct array_buffer *size_buf, int cpu_id);
1214 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1215
1216 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1217 {
1218         int ret;
1219
1220         if (!tr->allocated_snapshot) {
1221
1222                 /* allocate spare buffer */
1223                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1224                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1225                 if (ret < 0)
1226                         return ret;
1227
1228                 tr->allocated_snapshot = true;
1229         }
1230
1231         return 0;
1232 }
1233
1234 static void free_snapshot(struct trace_array *tr)
1235 {
1236         /*
1237          * We don't free the ring buffer. instead, resize it because
1238          * The max_tr ring buffer has some state (e.g. ring->clock) and
1239          * we want preserve it.
1240          */
1241         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1242         set_buffer_entries(&tr->max_buffer, 1);
1243         tracing_reset_online_cpus(&tr->max_buffer);
1244         tr->allocated_snapshot = false;
1245 }
1246
1247 /**
1248  * tracing_alloc_snapshot - allocate snapshot buffer.
1249  *
1250  * This only allocates the snapshot buffer if it isn't already
1251  * allocated - it doesn't also take a snapshot.
1252  *
1253  * This is meant to be used in cases where the snapshot buffer needs
1254  * to be set up for events that can't sleep but need to be able to
1255  * trigger a snapshot.
1256  */
1257 int tracing_alloc_snapshot(void)
1258 {
1259         struct trace_array *tr = &global_trace;
1260         int ret;
1261
1262         ret = tracing_alloc_snapshot_instance(tr);
1263         WARN_ON(ret < 0);
1264
1265         return ret;
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1268
1269 /**
1270  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1271  *
1272  * This is similar to tracing_snapshot(), but it will allocate the
1273  * snapshot buffer if it isn't already allocated. Use this only
1274  * where it is safe to sleep, as the allocation may sleep.
1275  *
1276  * This causes a swap between the snapshot buffer and the current live
1277  * tracing buffer. You can use this to take snapshots of the live
1278  * trace when some condition is triggered, but continue to trace.
1279  */
1280 void tracing_snapshot_alloc(void)
1281 {
1282         int ret;
1283
1284         ret = tracing_alloc_snapshot();
1285         if (ret < 0)
1286                 return;
1287
1288         tracing_snapshot();
1289 }
1290 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1291
1292 /**
1293  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1294  * @tr:         The tracing instance
1295  * @cond_data:  User data to associate with the snapshot
1296  * @update:     Implementation of the cond_snapshot update function
1297  *
1298  * Check whether the conditional snapshot for the given instance has
1299  * already been enabled, or if the current tracer is already using a
1300  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1301  * save the cond_data and update function inside.
1302  *
1303  * Returns 0 if successful, error otherwise.
1304  */
1305 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1306                                  cond_update_fn_t update)
1307 {
1308         struct cond_snapshot *cond_snapshot;
1309         int ret = 0;
1310
1311         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1312         if (!cond_snapshot)
1313                 return -ENOMEM;
1314
1315         cond_snapshot->cond_data = cond_data;
1316         cond_snapshot->update = update;
1317
1318         mutex_lock(&trace_types_lock);
1319
1320         ret = tracing_alloc_snapshot_instance(tr);
1321         if (ret)
1322                 goto fail_unlock;
1323
1324         if (tr->current_trace->use_max_tr) {
1325                 ret = -EBUSY;
1326                 goto fail_unlock;
1327         }
1328
1329         /*
1330          * The cond_snapshot can only change to NULL without the
1331          * trace_types_lock. We don't care if we race with it going
1332          * to NULL, but we want to make sure that it's not set to
1333          * something other than NULL when we get here, which we can
1334          * do safely with only holding the trace_types_lock and not
1335          * having to take the max_lock.
1336          */
1337         if (tr->cond_snapshot) {
1338                 ret = -EBUSY;
1339                 goto fail_unlock;
1340         }
1341
1342         arch_spin_lock(&tr->max_lock);
1343         tr->cond_snapshot = cond_snapshot;
1344         arch_spin_unlock(&tr->max_lock);
1345
1346         mutex_unlock(&trace_types_lock);
1347
1348         return ret;
1349
1350  fail_unlock:
1351         mutex_unlock(&trace_types_lock);
1352         kfree(cond_snapshot);
1353         return ret;
1354 }
1355 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1356
1357 /**
1358  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1359  * @tr:         The tracing instance
1360  *
1361  * Check whether the conditional snapshot for the given instance is
1362  * enabled; if so, free the cond_snapshot associated with it,
1363  * otherwise return -EINVAL.
1364  *
1365  * Returns 0 if successful, error otherwise.
1366  */
1367 int tracing_snapshot_cond_disable(struct trace_array *tr)
1368 {
1369         int ret = 0;
1370
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381
1382         return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1385 #else
1386 void tracing_snapshot(void)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot);
1391 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1396 int tracing_alloc_snapshot(void)
1397 {
1398         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1399         return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1402 void tracing_snapshot_alloc(void)
1403 {
1404         /* Give warning */
1405         tracing_snapshot();
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1408 void *tracing_cond_snapshot_data(struct trace_array *tr)
1409 {
1410         return NULL;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1413 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1414 {
1415         return -ENODEV;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420         return false;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1423 #endif /* CONFIG_TRACER_SNAPSHOT */
1424
1425 void tracer_tracing_off(struct trace_array *tr)
1426 {
1427         if (tr->array_buffer.buffer)
1428                 ring_buffer_record_off(tr->array_buffer.buffer);
1429         /*
1430          * This flag is looked at when buffers haven't been allocated
1431          * yet, or by some tracers (like irqsoff), that just want to
1432          * know if the ring buffer has been disabled, but it can handle
1433          * races of where it gets disabled but we still do a record.
1434          * As the check is in the fast path of the tracers, it is more
1435          * important to be fast than accurate.
1436          */
1437         tr->buffer_disabled = 1;
1438         /* Make the flag seen by readers */
1439         smp_wmb();
1440 }
1441
1442 /**
1443  * tracing_off - turn off tracing buffers
1444  *
1445  * This function stops the tracing buffers from recording data.
1446  * It does not disable any overhead the tracers themselves may
1447  * be causing. This function simply causes all recording to
1448  * the ring buffers to fail.
1449  */
1450 void tracing_off(void)
1451 {
1452         tracer_tracing_off(&global_trace);
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_off);
1455
1456 void disable_trace_on_warning(void)
1457 {
1458         if (__disable_trace_on_warning) {
1459                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1460                         "Disabling tracing due to warning\n");
1461                 tracing_off();
1462         }
1463 }
1464
1465 /**
1466  * tracer_tracing_is_on - show real state of ring buffer enabled
1467  * @tr : the trace array to know if ring buffer is enabled
1468  *
1469  * Shows real state of the ring buffer if it is enabled or not.
1470  */
1471 bool tracer_tracing_is_on(struct trace_array *tr)
1472 {
1473         if (tr->array_buffer.buffer)
1474                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1475         return !tr->buffer_disabled;
1476 }
1477
1478 /**
1479  * tracing_is_on - show state of ring buffers enabled
1480  */
1481 int tracing_is_on(void)
1482 {
1483         return tracer_tracing_is_on(&global_trace);
1484 }
1485 EXPORT_SYMBOL_GPL(tracing_is_on);
1486
1487 static int __init set_buf_size(char *str)
1488 {
1489         unsigned long buf_size;
1490
1491         if (!str)
1492                 return 0;
1493         buf_size = memparse(str, &str);
1494         /* nr_entries can not be zero */
1495         if (buf_size == 0)
1496                 return 0;
1497         trace_buf_size = buf_size;
1498         return 1;
1499 }
1500 __setup("trace_buf_size=", set_buf_size);
1501
1502 static int __init set_tracing_thresh(char *str)
1503 {
1504         unsigned long threshold;
1505         int ret;
1506
1507         if (!str)
1508                 return 0;
1509         ret = kstrtoul(str, 0, &threshold);
1510         if (ret < 0)
1511                 return 0;
1512         tracing_thresh = threshold * 1000;
1513         return 1;
1514 }
1515 __setup("tracing_thresh=", set_tracing_thresh);
1516
1517 unsigned long nsecs_to_usecs(unsigned long nsecs)
1518 {
1519         return nsecs / 1000;
1520 }
1521
1522 /*
1523  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1524  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1525  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1526  * of strings in the order that the evals (enum) were defined.
1527  */
1528 #undef C
1529 #define C(a, b) b
1530
1531 /* These must match the bit positions in trace_iterator_flags */
1532 static const char *trace_options[] = {
1533         TRACE_FLAGS
1534         NULL
1535 };
1536
1537 static struct {
1538         u64 (*func)(void);
1539         const char *name;
1540         int in_ns;              /* is this clock in nanoseconds? */
1541 } trace_clocks[] = {
1542         { trace_clock_local,            "local",        1 },
1543         { trace_clock_global,           "global",       1 },
1544         { trace_clock_counter,          "counter",      0 },
1545         { trace_clock_jiffies,          "uptime",       0 },
1546         { trace_clock,                  "perf",         1 },
1547         { ktime_get_mono_fast_ns,       "mono",         1 },
1548         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1549         { ktime_get_boot_fast_ns,       "boot",         1 },
1550         ARCH_TRACE_CLOCKS
1551 };
1552
1553 bool trace_clock_in_ns(struct trace_array *tr)
1554 {
1555         if (trace_clocks[tr->clock_id].in_ns)
1556                 return true;
1557
1558         return false;
1559 }
1560
1561 /*
1562  * trace_parser_get_init - gets the buffer for trace parser
1563  */
1564 int trace_parser_get_init(struct trace_parser *parser, int size)
1565 {
1566         memset(parser, 0, sizeof(*parser));
1567
1568         parser->buffer = kmalloc(size, GFP_KERNEL);
1569         if (!parser->buffer)
1570                 return 1;
1571
1572         parser->size = size;
1573         return 0;
1574 }
1575
1576 /*
1577  * trace_parser_put - frees the buffer for trace parser
1578  */
1579 void trace_parser_put(struct trace_parser *parser)
1580 {
1581         kfree(parser->buffer);
1582         parser->buffer = NULL;
1583 }
1584
1585 /*
1586  * trace_get_user - reads the user input string separated by  space
1587  * (matched by isspace(ch))
1588  *
1589  * For each string found the 'struct trace_parser' is updated,
1590  * and the function returns.
1591  *
1592  * Returns number of bytes read.
1593  *
1594  * See kernel/trace/trace.h for 'struct trace_parser' details.
1595  */
1596 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1597         size_t cnt, loff_t *ppos)
1598 {
1599         char ch;
1600         size_t read = 0;
1601         ssize_t ret;
1602
1603         if (!*ppos)
1604                 trace_parser_clear(parser);
1605
1606         ret = get_user(ch, ubuf++);
1607         if (ret)
1608                 goto out;
1609
1610         read++;
1611         cnt--;
1612
1613         /*
1614          * The parser is not finished with the last write,
1615          * continue reading the user input without skipping spaces.
1616          */
1617         if (!parser->cont) {
1618                 /* skip white space */
1619                 while (cnt && isspace(ch)) {
1620                         ret = get_user(ch, ubuf++);
1621                         if (ret)
1622                                 goto out;
1623                         read++;
1624                         cnt--;
1625                 }
1626
1627                 parser->idx = 0;
1628
1629                 /* only spaces were written */
1630                 if (isspace(ch) || !ch) {
1631                         *ppos += read;
1632                         ret = read;
1633                         goto out;
1634                 }
1635         }
1636
1637         /* read the non-space input */
1638         while (cnt && !isspace(ch) && ch) {
1639                 if (parser->idx < parser->size - 1)
1640                         parser->buffer[parser->idx++] = ch;
1641                 else {
1642                         ret = -EINVAL;
1643                         goto out;
1644                 }
1645                 ret = get_user(ch, ubuf++);
1646                 if (ret)
1647                         goto out;
1648                 read++;
1649                 cnt--;
1650         }
1651
1652         /* We either got finished input or we have to wait for another call. */
1653         if (isspace(ch) || !ch) {
1654                 parser->buffer[parser->idx] = 0;
1655                 parser->cont = false;
1656         } else if (parser->idx < parser->size - 1) {
1657                 parser->cont = true;
1658                 parser->buffer[parser->idx++] = ch;
1659                 /* Make sure the parsed string always terminates with '\0'. */
1660                 parser->buffer[parser->idx] = 0;
1661         } else {
1662                 ret = -EINVAL;
1663                 goto out;
1664         }
1665
1666         *ppos += read;
1667         ret = read;
1668
1669 out:
1670         return ret;
1671 }
1672
1673 /* TODO add a seq_buf_to_buffer() */
1674 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1675 {
1676         int len;
1677
1678         if (trace_seq_used(s) <= s->seq.readpos)
1679                 return -EBUSY;
1680
1681         len = trace_seq_used(s) - s->seq.readpos;
1682         if (cnt > len)
1683                 cnt = len;
1684         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1685
1686         s->seq.readpos += cnt;
1687         return cnt;
1688 }
1689
1690 unsigned long __read_mostly     tracing_thresh;
1691 static const struct file_operations tracing_max_lat_fops;
1692
1693 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1694         defined(CONFIG_FSNOTIFY)
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 static int *tgid_map;
2196
2197 #define SAVED_CMDLINES_DEFAULT 128
2198 #define NO_CMDLINE_MAP UINT_MAX
2199 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2200 struct saved_cmdlines_buffer {
2201         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2202         unsigned *map_cmdline_to_pid;
2203         unsigned cmdline_num;
2204         int cmdline_idx;
2205         char *saved_cmdlines;
2206 };
2207 static struct saved_cmdlines_buffer *savedcmd;
2208
2209 /* temporary disable recording */
2210 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2211
2212 static inline char *get_saved_cmdlines(int idx)
2213 {
2214         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2215 }
2216
2217 static inline void set_cmdline(int idx, const char *cmdline)
2218 {
2219         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2220 }
2221
2222 static int allocate_cmdlines_buffer(unsigned int val,
2223                                     struct saved_cmdlines_buffer *s)
2224 {
2225         s->map_cmdline_to_pid = kmalloc_array(val,
2226                                               sizeof(*s->map_cmdline_to_pid),
2227                                               GFP_KERNEL);
2228         if (!s->map_cmdline_to_pid)
2229                 return -ENOMEM;
2230
2231         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2232         if (!s->saved_cmdlines) {
2233                 kfree(s->map_cmdline_to_pid);
2234                 return -ENOMEM;
2235         }
2236
2237         s->cmdline_idx = 0;
2238         s->cmdline_num = val;
2239         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2240                sizeof(s->map_pid_to_cmdline));
2241         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2242                val * sizeof(*s->map_cmdline_to_pid));
2243
2244         return 0;
2245 }
2246
2247 static int trace_create_savedcmd(void)
2248 {
2249         int ret;
2250
2251         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2252         if (!savedcmd)
2253                 return -ENOMEM;
2254
2255         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2256         if (ret < 0) {
2257                 kfree(savedcmd);
2258                 savedcmd = NULL;
2259                 return -ENOMEM;
2260         }
2261
2262         return 0;
2263 }
2264
2265 int is_tracing_stopped(void)
2266 {
2267         return global_trace.stop_count;
2268 }
2269
2270 /**
2271  * tracing_start - quick start of the tracer
2272  *
2273  * If tracing is enabled but was stopped by tracing_stop,
2274  * this will start the tracer back up.
2275  */
2276 void tracing_start(void)
2277 {
2278         struct trace_buffer *buffer;
2279         unsigned long flags;
2280
2281         if (tracing_disabled)
2282                 return;
2283
2284         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2285         if (--global_trace.stop_count) {
2286                 if (global_trace.stop_count < 0) {
2287                         /* Someone screwed up their debugging */
2288                         WARN_ON_ONCE(1);
2289                         global_trace.stop_count = 0;
2290                 }
2291                 goto out;
2292         }
2293
2294         /* Prevent the buffers from switching */
2295         arch_spin_lock(&global_trace.max_lock);
2296
2297         buffer = global_trace.array_buffer.buffer;
2298         if (buffer)
2299                 ring_buffer_record_enable(buffer);
2300
2301 #ifdef CONFIG_TRACER_MAX_TRACE
2302         buffer = global_trace.max_buffer.buffer;
2303         if (buffer)
2304                 ring_buffer_record_enable(buffer);
2305 #endif
2306
2307         arch_spin_unlock(&global_trace.max_lock);
2308
2309  out:
2310         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2311 }
2312
2313 static void tracing_start_tr(struct trace_array *tr)
2314 {
2315         struct trace_buffer *buffer;
2316         unsigned long flags;
2317
2318         if (tracing_disabled)
2319                 return;
2320
2321         /* If global, we need to also start the max tracer */
2322         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2323                 return tracing_start();
2324
2325         raw_spin_lock_irqsave(&tr->start_lock, flags);
2326
2327         if (--tr->stop_count) {
2328                 if (tr->stop_count < 0) {
2329                         /* Someone screwed up their debugging */
2330                         WARN_ON_ONCE(1);
2331                         tr->stop_count = 0;
2332                 }
2333                 goto out;
2334         }
2335
2336         buffer = tr->array_buffer.buffer;
2337         if (buffer)
2338                 ring_buffer_record_enable(buffer);
2339
2340  out:
2341         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2342 }
2343
2344 /**
2345  * tracing_stop - quick stop of the tracer
2346  *
2347  * Light weight way to stop tracing. Use in conjunction with
2348  * tracing_start.
2349  */
2350 void tracing_stop(void)
2351 {
2352         struct trace_buffer *buffer;
2353         unsigned long flags;
2354
2355         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2356         if (global_trace.stop_count++)
2357                 goto out;
2358
2359         /* Prevent the buffers from switching */
2360         arch_spin_lock(&global_trace.max_lock);
2361
2362         buffer = global_trace.array_buffer.buffer;
2363         if (buffer)
2364                 ring_buffer_record_disable(buffer);
2365
2366 #ifdef CONFIG_TRACER_MAX_TRACE
2367         buffer = global_trace.max_buffer.buffer;
2368         if (buffer)
2369                 ring_buffer_record_disable(buffer);
2370 #endif
2371
2372         arch_spin_unlock(&global_trace.max_lock);
2373
2374  out:
2375         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2376 }
2377
2378 static void tracing_stop_tr(struct trace_array *tr)
2379 {
2380         struct trace_buffer *buffer;
2381         unsigned long flags;
2382
2383         /* If global, we need to also stop the max tracer */
2384         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2385                 return tracing_stop();
2386
2387         raw_spin_lock_irqsave(&tr->start_lock, flags);
2388         if (tr->stop_count++)
2389                 goto out;
2390
2391         buffer = tr->array_buffer.buffer;
2392         if (buffer)
2393                 ring_buffer_record_disable(buffer);
2394
2395  out:
2396         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2397 }
2398
2399 static int trace_save_cmdline(struct task_struct *tsk)
2400 {
2401         unsigned tpid, idx;
2402
2403         /* treat recording of idle task as a success */
2404         if (!tsk->pid)
2405                 return 1;
2406
2407         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2408
2409         /*
2410          * It's not the end of the world if we don't get
2411          * the lock, but we also don't want to spin
2412          * nor do we want to disable interrupts,
2413          * so if we miss here, then better luck next time.
2414          */
2415         if (!arch_spin_trylock(&trace_cmdline_lock))
2416                 return 0;
2417
2418         idx = savedcmd->map_pid_to_cmdline[tpid];
2419         if (idx == NO_CMDLINE_MAP) {
2420                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2421
2422                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2423                 savedcmd->cmdline_idx = idx;
2424         }
2425
2426         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2427         set_cmdline(idx, tsk->comm);
2428
2429         arch_spin_unlock(&trace_cmdline_lock);
2430
2431         return 1;
2432 }
2433
2434 static void __trace_find_cmdline(int pid, char comm[])
2435 {
2436         unsigned map;
2437         int tpid;
2438
2439         if (!pid) {
2440                 strcpy(comm, "<idle>");
2441                 return;
2442         }
2443
2444         if (WARN_ON_ONCE(pid < 0)) {
2445                 strcpy(comm, "<XXX>");
2446                 return;
2447         }
2448
2449         tpid = pid & (PID_MAX_DEFAULT - 1);
2450         map = savedcmd->map_pid_to_cmdline[tpid];
2451         if (map != NO_CMDLINE_MAP) {
2452                 tpid = savedcmd->map_cmdline_to_pid[map];
2453                 if (tpid == pid) {
2454                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2455                         return;
2456                 }
2457         }
2458         strcpy(comm, "<...>");
2459 }
2460
2461 void trace_find_cmdline(int pid, char comm[])
2462 {
2463         preempt_disable();
2464         arch_spin_lock(&trace_cmdline_lock);
2465
2466         __trace_find_cmdline(pid, comm);
2467
2468         arch_spin_unlock(&trace_cmdline_lock);
2469         preempt_enable();
2470 }
2471
2472 int trace_find_tgid(int pid)
2473 {
2474         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2475                 return 0;
2476
2477         return tgid_map[pid];
2478 }
2479
2480 static int trace_save_tgid(struct task_struct *tsk)
2481 {
2482         /* treat recording of idle task as a success */
2483         if (!tsk->pid)
2484                 return 1;
2485
2486         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2487                 return 0;
2488
2489         tgid_map[tsk->pid] = tsk->tgid;
2490         return 1;
2491 }
2492
2493 static bool tracing_record_taskinfo_skip(int flags)
2494 {
2495         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2496                 return true;
2497         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2498                 return true;
2499         if (!__this_cpu_read(trace_taskinfo_save))
2500                 return true;
2501         return false;
2502 }
2503
2504 /**
2505  * tracing_record_taskinfo - record the task info of a task
2506  *
2507  * @task:  task to record
2508  * @flags: TRACE_RECORD_CMDLINE for recording comm
2509  *         TRACE_RECORD_TGID for recording tgid
2510  */
2511 void tracing_record_taskinfo(struct task_struct *task, int flags)
2512 {
2513         bool done;
2514
2515         if (tracing_record_taskinfo_skip(flags))
2516                 return;
2517
2518         /*
2519          * Record as much task information as possible. If some fail, continue
2520          * to try to record the others.
2521          */
2522         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2523         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2524
2525         /* If recording any information failed, retry again soon. */
2526         if (!done)
2527                 return;
2528
2529         __this_cpu_write(trace_taskinfo_save, false);
2530 }
2531
2532 /**
2533  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2534  *
2535  * @prev: previous task during sched_switch
2536  * @next: next task during sched_switch
2537  * @flags: TRACE_RECORD_CMDLINE for recording comm
2538  *         TRACE_RECORD_TGID for recording tgid
2539  */
2540 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2541                                           struct task_struct *next, int flags)
2542 {
2543         bool done;
2544
2545         if (tracing_record_taskinfo_skip(flags))
2546                 return;
2547
2548         /*
2549          * Record as much task information as possible. If some fail, continue
2550          * to try to record the others.
2551          */
2552         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2553         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2555         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2556
2557         /* If recording any information failed, retry again soon. */
2558         if (!done)
2559                 return;
2560
2561         __this_cpu_write(trace_taskinfo_save, false);
2562 }
2563
2564 /* Helpers to record a specific task information */
2565 void tracing_record_cmdline(struct task_struct *task)
2566 {
2567         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2568 }
2569
2570 void tracing_record_tgid(struct task_struct *task)
2571 {
2572         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2573 }
2574
2575 /*
2576  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2577  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2578  * simplifies those functions and keeps them in sync.
2579  */
2580 enum print_line_t trace_handle_return(struct trace_seq *s)
2581 {
2582         return trace_seq_has_overflowed(s) ?
2583                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2584 }
2585 EXPORT_SYMBOL_GPL(trace_handle_return);
2586
2587 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2588 {
2589         unsigned int trace_flags = irqs_status;
2590         unsigned int pc;
2591
2592         pc = preempt_count();
2593
2594         if (pc & NMI_MASK)
2595                 trace_flags |= TRACE_FLAG_NMI;
2596         if (pc & HARDIRQ_MASK)
2597                 trace_flags |= TRACE_FLAG_HARDIRQ;
2598         if (in_serving_softirq())
2599                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2600
2601         if (tif_need_resched())
2602                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2603         if (test_preempt_need_resched())
2604                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2605         return (trace_flags << 16) | (pc & 0xff);
2606 }
2607
2608 struct ring_buffer_event *
2609 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2610                           int type,
2611                           unsigned long len,
2612                           unsigned int trace_ctx)
2613 {
2614         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2615 }
2616
2617 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2618 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2619 static int trace_buffered_event_ref;
2620
2621 /**
2622  * trace_buffered_event_enable - enable buffering events
2623  *
2624  * When events are being filtered, it is quicker to use a temporary
2625  * buffer to write the event data into if there's a likely chance
2626  * that it will not be committed. The discard of the ring buffer
2627  * is not as fast as committing, and is much slower than copying
2628  * a commit.
2629  *
2630  * When an event is to be filtered, allocate per cpu buffers to
2631  * write the event data into, and if the event is filtered and discarded
2632  * it is simply dropped, otherwise, the entire data is to be committed
2633  * in one shot.
2634  */
2635 void trace_buffered_event_enable(void)
2636 {
2637         struct ring_buffer_event *event;
2638         struct page *page;
2639         int cpu;
2640
2641         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2642
2643         if (trace_buffered_event_ref++)
2644                 return;
2645
2646         for_each_tracing_cpu(cpu) {
2647                 page = alloc_pages_node(cpu_to_node(cpu),
2648                                         GFP_KERNEL | __GFP_NORETRY, 0);
2649                 if (!page)
2650                         goto failed;
2651
2652                 event = page_address(page);
2653                 memset(event, 0, sizeof(*event));
2654
2655                 per_cpu(trace_buffered_event, cpu) = event;
2656
2657                 preempt_disable();
2658                 if (cpu == smp_processor_id() &&
2659                     __this_cpu_read(trace_buffered_event) !=
2660                     per_cpu(trace_buffered_event, cpu))
2661                         WARN_ON_ONCE(1);
2662                 preempt_enable();
2663         }
2664
2665         return;
2666  failed:
2667         trace_buffered_event_disable();
2668 }
2669
2670 static void enable_trace_buffered_event(void *data)
2671 {
2672         /* Probably not needed, but do it anyway */
2673         smp_rmb();
2674         this_cpu_dec(trace_buffered_event_cnt);
2675 }
2676
2677 static void disable_trace_buffered_event(void *data)
2678 {
2679         this_cpu_inc(trace_buffered_event_cnt);
2680 }
2681
2682 /**
2683  * trace_buffered_event_disable - disable buffering events
2684  *
2685  * When a filter is removed, it is faster to not use the buffered
2686  * events, and to commit directly into the ring buffer. Free up
2687  * the temp buffers when there are no more users. This requires
2688  * special synchronization with current events.
2689  */
2690 void trace_buffered_event_disable(void)
2691 {
2692         int cpu;
2693
2694         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2695
2696         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2697                 return;
2698
2699         if (--trace_buffered_event_ref)
2700                 return;
2701
2702         preempt_disable();
2703         /* For each CPU, set the buffer as used. */
2704         smp_call_function_many(tracing_buffer_mask,
2705                                disable_trace_buffered_event, NULL, 1);
2706         preempt_enable();
2707
2708         /* Wait for all current users to finish */
2709         synchronize_rcu();
2710
2711         for_each_tracing_cpu(cpu) {
2712                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2713                 per_cpu(trace_buffered_event, cpu) = NULL;
2714         }
2715         /*
2716          * Make sure trace_buffered_event is NULL before clearing
2717          * trace_buffered_event_cnt.
2718          */
2719         smp_wmb();
2720
2721         preempt_disable();
2722         /* Do the work on each cpu */
2723         smp_call_function_many(tracing_buffer_mask,
2724                                enable_trace_buffered_event, NULL, 1);
2725         preempt_enable();
2726 }
2727
2728 static struct trace_buffer *temp_buffer;
2729
2730 struct ring_buffer_event *
2731 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2732                           struct trace_event_file *trace_file,
2733                           int type, unsigned long len,
2734                           unsigned int trace_ctx)
2735 {
2736         struct ring_buffer_event *entry;
2737         struct trace_array *tr = trace_file->tr;
2738         int val;
2739
2740         *current_rb = tr->array_buffer.buffer;
2741
2742         if (!tr->no_filter_buffering_ref &&
2743             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2744             (entry = this_cpu_read(trace_buffered_event))) {
2745                 /*
2746                  * Filtering is on, so try to use the per cpu buffer first.
2747                  * This buffer will simulate a ring_buffer_event,
2748                  * where the type_len is zero and the array[0] will
2749                  * hold the full length.
2750                  * (see include/linux/ring-buffer.h for details on
2751                  *  how the ring_buffer_event is structured).
2752                  *
2753                  * Using a temp buffer during filtering and copying it
2754                  * on a matched filter is quicker than writing directly
2755                  * into the ring buffer and then discarding it when
2756                  * it doesn't match. That is because the discard
2757                  * requires several atomic operations to get right.
2758                  * Copying on match and doing nothing on a failed match
2759                  * is still quicker than no copy on match, but having
2760                  * to discard out of the ring buffer on a failed match.
2761                  */
2762                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2763
2764                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2765
2766                 /*
2767                  * Preemption is disabled, but interrupts and NMIs
2768                  * can still come in now. If that happens after
2769                  * the above increment, then it will have to go
2770                  * back to the old method of allocating the event
2771                  * on the ring buffer, and if the filter fails, it
2772                  * will have to call ring_buffer_discard_commit()
2773                  * to remove it.
2774                  *
2775                  * Need to also check the unlikely case that the
2776                  * length is bigger than the temp buffer size.
2777                  * If that happens, then the reserve is pretty much
2778                  * guaranteed to fail, as the ring buffer currently
2779                  * only allows events less than a page. But that may
2780                  * change in the future, so let the ring buffer reserve
2781                  * handle the failure in that case.
2782                  */
2783                 if (val == 1 && likely(len <= max_len)) {
2784                         trace_event_setup(entry, type, trace_ctx);
2785                         entry->array[0] = len;
2786                         return entry;
2787                 }
2788                 this_cpu_dec(trace_buffered_event_cnt);
2789         }
2790
2791         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2792                                             trace_ctx);
2793         /*
2794          * If tracing is off, but we have triggers enabled
2795          * we still need to look at the event data. Use the temp_buffer
2796          * to store the trace event for the trigger to use. It's recursive
2797          * safe and will not be recorded anywhere.
2798          */
2799         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2800                 *current_rb = temp_buffer;
2801                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2802                                                     trace_ctx);
2803         }
2804         return entry;
2805 }
2806 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2807
2808 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2809 static DEFINE_MUTEX(tracepoint_printk_mutex);
2810
2811 static void output_printk(struct trace_event_buffer *fbuffer)
2812 {
2813         struct trace_event_call *event_call;
2814         struct trace_event_file *file;
2815         struct trace_event *event;
2816         unsigned long flags;
2817         struct trace_iterator *iter = tracepoint_print_iter;
2818
2819         /* We should never get here if iter is NULL */
2820         if (WARN_ON_ONCE(!iter))
2821                 return;
2822
2823         event_call = fbuffer->trace_file->event_call;
2824         if (!event_call || !event_call->event.funcs ||
2825             !event_call->event.funcs->trace)
2826                 return;
2827
2828         file = fbuffer->trace_file;
2829         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2830             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2831              !filter_match_preds(file->filter, fbuffer->entry)))
2832                 return;
2833
2834         event = &fbuffer->trace_file->event_call->event;
2835
2836         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2837         trace_seq_init(&iter->seq);
2838         iter->ent = fbuffer->entry;
2839         event_call->event.funcs->trace(iter, 0, event);
2840         trace_seq_putc(&iter->seq, 0);
2841         printk("%s", iter->seq.buffer);
2842
2843         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2844 }
2845
2846 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2847                              void *buffer, size_t *lenp,
2848                              loff_t *ppos)
2849 {
2850         int save_tracepoint_printk;
2851         int ret;
2852
2853         mutex_lock(&tracepoint_printk_mutex);
2854         save_tracepoint_printk = tracepoint_printk;
2855
2856         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2857
2858         /*
2859          * This will force exiting early, as tracepoint_printk
2860          * is always zero when tracepoint_printk_iter is not allocated
2861          */
2862         if (!tracepoint_print_iter)
2863                 tracepoint_printk = 0;
2864
2865         if (save_tracepoint_printk == tracepoint_printk)
2866                 goto out;
2867
2868         if (tracepoint_printk)
2869                 static_key_enable(&tracepoint_printk_key.key);
2870         else
2871                 static_key_disable(&tracepoint_printk_key.key);
2872
2873  out:
2874         mutex_unlock(&tracepoint_printk_mutex);
2875
2876         return ret;
2877 }
2878
2879 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2880 {
2881         if (static_key_false(&tracepoint_printk_key.key))
2882                 output_printk(fbuffer);
2883
2884         if (static_branch_unlikely(&trace_event_exports_enabled))
2885                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2886         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2887                                     fbuffer->event, fbuffer->entry,
2888                                     fbuffer->trace_ctx, fbuffer->regs);
2889 }
2890 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2891
2892 /*
2893  * Skip 3:
2894  *
2895  *   trace_buffer_unlock_commit_regs()
2896  *   trace_event_buffer_commit()
2897  *   trace_event_raw_event_xxx()
2898  */
2899 # define STACK_SKIP 3
2900
2901 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2902                                      struct trace_buffer *buffer,
2903                                      struct ring_buffer_event *event,
2904                                      unsigned int trace_ctx,
2905                                      struct pt_regs *regs)
2906 {
2907         __buffer_unlock_commit(buffer, event);
2908
2909         /*
2910          * If regs is not set, then skip the necessary functions.
2911          * Note, we can still get here via blktrace, wakeup tracer
2912          * and mmiotrace, but that's ok if they lose a function or
2913          * two. They are not that meaningful.
2914          */
2915         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2916         ftrace_trace_userstack(tr, buffer, trace_ctx);
2917 }
2918
2919 /*
2920  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2921  */
2922 void
2923 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2924                                    struct ring_buffer_event *event)
2925 {
2926         __buffer_unlock_commit(buffer, event);
2927 }
2928
2929 void
2930 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2931                parent_ip, unsigned int trace_ctx)
2932 {
2933         struct trace_event_call *call = &event_function;
2934         struct trace_buffer *buffer = tr->array_buffer.buffer;
2935         struct ring_buffer_event *event;
2936         struct ftrace_entry *entry;
2937
2938         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2939                                             trace_ctx);
2940         if (!event)
2941                 return;
2942         entry   = ring_buffer_event_data(event);
2943         entry->ip                       = ip;
2944         entry->parent_ip                = parent_ip;
2945
2946         if (!call_filter_check_discard(call, entry, buffer, event)) {
2947                 if (static_branch_unlikely(&trace_function_exports_enabled))
2948                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2949                 __buffer_unlock_commit(buffer, event);
2950         }
2951 }
2952
2953 #ifdef CONFIG_STACKTRACE
2954
2955 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2956 #define FTRACE_KSTACK_NESTING   4
2957
2958 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2959
2960 struct ftrace_stack {
2961         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2962 };
2963
2964
2965 struct ftrace_stacks {
2966         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2967 };
2968
2969 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2970 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2971
2972 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2973                                  unsigned int trace_ctx,
2974                                  int skip, struct pt_regs *regs)
2975 {
2976         struct trace_event_call *call = &event_kernel_stack;
2977         struct ring_buffer_event *event;
2978         unsigned int size, nr_entries;
2979         struct ftrace_stack *fstack;
2980         struct stack_entry *entry;
2981         int stackidx;
2982
2983         /*
2984          * Add one, for this function and the call to save_stack_trace()
2985          * If regs is set, then these functions will not be in the way.
2986          */
2987 #ifndef CONFIG_UNWINDER_ORC
2988         if (!regs)
2989                 skip++;
2990 #endif
2991
2992         preempt_disable_notrace();
2993
2994         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2995
2996         /* This should never happen. If it does, yell once and skip */
2997         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2998                 goto out;
2999
3000         /*
3001          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3002          * interrupt will either see the value pre increment or post
3003          * increment. If the interrupt happens pre increment it will have
3004          * restored the counter when it returns.  We just need a barrier to
3005          * keep gcc from moving things around.
3006          */
3007         barrier();
3008
3009         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3010         size = ARRAY_SIZE(fstack->calls);
3011
3012         if (regs) {
3013                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3014                                                    size, skip);
3015         } else {
3016                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3017         }
3018
3019         size = nr_entries * sizeof(unsigned long);
3020         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3021                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3022                                     trace_ctx);
3023         if (!event)
3024                 goto out;
3025         entry = ring_buffer_event_data(event);
3026
3027         memcpy(&entry->caller, fstack->calls, size);
3028         entry->size = nr_entries;
3029
3030         if (!call_filter_check_discard(call, entry, buffer, event))
3031                 __buffer_unlock_commit(buffer, event);
3032
3033  out:
3034         /* Again, don't let gcc optimize things here */
3035         barrier();
3036         __this_cpu_dec(ftrace_stack_reserve);
3037         preempt_enable_notrace();
3038
3039 }
3040
3041 static inline void ftrace_trace_stack(struct trace_array *tr,
3042                                       struct trace_buffer *buffer,
3043                                       unsigned int trace_ctx,
3044                                       int skip, struct pt_regs *regs)
3045 {
3046         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3047                 return;
3048
3049         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3050 }
3051
3052 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3053                    int skip)
3054 {
3055         struct trace_buffer *buffer = tr->array_buffer.buffer;
3056
3057         if (rcu_is_watching()) {
3058                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3059                 return;
3060         }
3061
3062         /*
3063          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3064          * but if the above rcu_is_watching() failed, then the NMI
3065          * triggered someplace critical, and rcu_irq_enter() should
3066          * not be called from NMI.
3067          */
3068         if (unlikely(in_nmi()))
3069                 return;
3070
3071         rcu_irq_enter_irqson();
3072         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3073         rcu_irq_exit_irqson();
3074 }
3075
3076 /**
3077  * trace_dump_stack - record a stack back trace in the trace buffer
3078  * @skip: Number of functions to skip (helper handlers)
3079  */
3080 void trace_dump_stack(int skip)
3081 {
3082         if (tracing_disabled || tracing_selftest_running)
3083                 return;
3084
3085 #ifndef CONFIG_UNWINDER_ORC
3086         /* Skip 1 to skip this function. */
3087         skip++;
3088 #endif
3089         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3090                              tracing_gen_ctx(), skip, NULL);
3091 }
3092 EXPORT_SYMBOL_GPL(trace_dump_stack);
3093
3094 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3095 static DEFINE_PER_CPU(int, user_stack_count);
3096
3097 static void
3098 ftrace_trace_userstack(struct trace_array *tr,
3099                        struct trace_buffer *buffer, unsigned int trace_ctx)
3100 {
3101         struct trace_event_call *call = &event_user_stack;
3102         struct ring_buffer_event *event;
3103         struct userstack_entry *entry;
3104
3105         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3106                 return;
3107
3108         /*
3109          * NMIs can not handle page faults, even with fix ups.
3110          * The save user stack can (and often does) fault.
3111          */
3112         if (unlikely(in_nmi()))
3113                 return;
3114
3115         /*
3116          * prevent recursion, since the user stack tracing may
3117          * trigger other kernel events.
3118          */
3119         preempt_disable();
3120         if (__this_cpu_read(user_stack_count))
3121                 goto out;
3122
3123         __this_cpu_inc(user_stack_count);
3124
3125         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3126                                             sizeof(*entry), trace_ctx);
3127         if (!event)
3128                 goto out_drop_count;
3129         entry   = ring_buffer_event_data(event);
3130
3131         entry->tgid             = current->tgid;
3132         memset(&entry->caller, 0, sizeof(entry->caller));
3133
3134         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3135         if (!call_filter_check_discard(call, entry, buffer, event))
3136                 __buffer_unlock_commit(buffer, event);
3137
3138  out_drop_count:
3139         __this_cpu_dec(user_stack_count);
3140  out:
3141         preempt_enable();
3142 }
3143 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3144 static void ftrace_trace_userstack(struct trace_array *tr,
3145                                    struct trace_buffer *buffer,
3146                                    unsigned int trace_ctx)
3147 {
3148 }
3149 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3150
3151 #endif /* CONFIG_STACKTRACE */
3152
3153 static inline void
3154 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3155                           unsigned long long delta)
3156 {
3157         entry->bottom_delta_ts = delta & U32_MAX;
3158         entry->top_delta_ts = (delta >> 32);
3159 }
3160
3161 void trace_last_func_repeats(struct trace_array *tr,
3162                              struct trace_func_repeats *last_info,
3163                              unsigned int trace_ctx)
3164 {
3165         struct trace_buffer *buffer = tr->array_buffer.buffer;
3166         struct func_repeats_entry *entry;
3167         struct ring_buffer_event *event;
3168         u64 delta;
3169
3170         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3171                                             sizeof(*entry), trace_ctx);
3172         if (!event)
3173                 return;
3174
3175         delta = ring_buffer_event_time_stamp(buffer, event) -
3176                 last_info->ts_last_call;
3177
3178         entry = ring_buffer_event_data(event);
3179         entry->ip = last_info->ip;
3180         entry->parent_ip = last_info->parent_ip;
3181         entry->count = last_info->count;
3182         func_repeats_set_delta_ts(entry, delta);
3183
3184         __buffer_unlock_commit(buffer, event);
3185 }
3186
3187 /* created for use with alloc_percpu */
3188 struct trace_buffer_struct {
3189         int nesting;
3190         char buffer[4][TRACE_BUF_SIZE];
3191 };
3192
3193 static struct trace_buffer_struct *trace_percpu_buffer;
3194
3195 /*
3196  * This allows for lockless recording.  If we're nested too deeply, then
3197  * this returns NULL.
3198  */
3199 static char *get_trace_buf(void)
3200 {
3201         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3202
3203         if (!buffer || buffer->nesting >= 4)
3204                 return NULL;
3205
3206         buffer->nesting++;
3207
3208         /* Interrupts must see nesting incremented before we use the buffer */
3209         barrier();
3210         return &buffer->buffer[buffer->nesting - 1][0];
3211 }
3212
3213 static void put_trace_buf(void)
3214 {
3215         /* Don't let the decrement of nesting leak before this */
3216         barrier();
3217         this_cpu_dec(trace_percpu_buffer->nesting);
3218 }
3219
3220 static int alloc_percpu_trace_buffer(void)
3221 {
3222         struct trace_buffer_struct *buffers;
3223
3224         if (trace_percpu_buffer)
3225                 return 0;
3226
3227         buffers = alloc_percpu(struct trace_buffer_struct);
3228         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3229                 return -ENOMEM;
3230
3231         trace_percpu_buffer = buffers;
3232         return 0;
3233 }
3234
3235 static int buffers_allocated;
3236
3237 void trace_printk_init_buffers(void)
3238 {
3239         if (buffers_allocated)
3240                 return;
3241
3242         if (alloc_percpu_trace_buffer())
3243                 return;
3244
3245         /* trace_printk() is for debug use only. Don't use it in production. */
3246
3247         pr_warn("\n");
3248         pr_warn("**********************************************************\n");
3249         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3250         pr_warn("**                                                      **\n");
3251         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3252         pr_warn("**                                                      **\n");
3253         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3254         pr_warn("** unsafe for production use.                           **\n");
3255         pr_warn("**                                                      **\n");
3256         pr_warn("** If you see this message and you are not debugging    **\n");
3257         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3258         pr_warn("**                                                      **\n");
3259         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3260         pr_warn("**********************************************************\n");
3261
3262         /* Expand the buffers to set size */
3263         tracing_update_buffers();
3264
3265         buffers_allocated = 1;
3266
3267         /*
3268          * trace_printk_init_buffers() can be called by modules.
3269          * If that happens, then we need to start cmdline recording
3270          * directly here. If the global_trace.buffer is already
3271          * allocated here, then this was called by module code.
3272          */
3273         if (global_trace.array_buffer.buffer)
3274                 tracing_start_cmdline_record();
3275 }
3276 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3277
3278 void trace_printk_start_comm(void)
3279 {
3280         /* Start tracing comms if trace printk is set */
3281         if (!buffers_allocated)
3282                 return;
3283         tracing_start_cmdline_record();
3284 }
3285
3286 static void trace_printk_start_stop_comm(int enabled)
3287 {
3288         if (!buffers_allocated)
3289                 return;
3290
3291         if (enabled)
3292                 tracing_start_cmdline_record();
3293         else
3294                 tracing_stop_cmdline_record();
3295 }
3296
3297 /**
3298  * trace_vbprintk - write binary msg to tracing buffer
3299  * @ip:    The address of the caller
3300  * @fmt:   The string format to write to the buffer
3301  * @args:  Arguments for @fmt
3302  */
3303 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3304 {
3305         struct trace_event_call *call = &event_bprint;
3306         struct ring_buffer_event *event;
3307         struct trace_buffer *buffer;
3308         struct trace_array *tr = &global_trace;
3309         struct bprint_entry *entry;
3310         unsigned int trace_ctx;
3311         char *tbuffer;
3312         int len = 0, size;
3313
3314         if (unlikely(tracing_selftest_running || tracing_disabled))
3315                 return 0;
3316
3317         /* Don't pollute graph traces with trace_vprintk internals */
3318         pause_graph_tracing();
3319
3320         trace_ctx = tracing_gen_ctx();
3321         preempt_disable_notrace();
3322
3323         tbuffer = get_trace_buf();
3324         if (!tbuffer) {
3325                 len = 0;
3326                 goto out_nobuffer;
3327         }
3328
3329         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3330
3331         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3332                 goto out_put;
3333
3334         size = sizeof(*entry) + sizeof(u32) * len;
3335         buffer = tr->array_buffer.buffer;
3336         ring_buffer_nest_start(buffer);
3337         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3338                                             trace_ctx);
3339         if (!event)
3340                 goto out;
3341         entry = ring_buffer_event_data(event);
3342         entry->ip                       = ip;
3343         entry->fmt                      = fmt;
3344
3345         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3346         if (!call_filter_check_discard(call, entry, buffer, event)) {
3347                 __buffer_unlock_commit(buffer, event);
3348                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3349         }
3350
3351 out:
3352         ring_buffer_nest_end(buffer);
3353 out_put:
3354         put_trace_buf();
3355
3356 out_nobuffer:
3357         preempt_enable_notrace();
3358         unpause_graph_tracing();
3359
3360         return len;
3361 }
3362 EXPORT_SYMBOL_GPL(trace_vbprintk);
3363
3364 __printf(3, 0)
3365 static int
3366 __trace_array_vprintk(struct trace_buffer *buffer,
3367                       unsigned long ip, const char *fmt, va_list args)
3368 {
3369         struct trace_event_call *call = &event_print;
3370         struct ring_buffer_event *event;
3371         int len = 0, size;
3372         struct print_entry *entry;
3373         unsigned int trace_ctx;
3374         char *tbuffer;
3375
3376         if (tracing_disabled || tracing_selftest_running)
3377                 return 0;
3378
3379         /* Don't pollute graph traces with trace_vprintk internals */
3380         pause_graph_tracing();
3381
3382         trace_ctx = tracing_gen_ctx();
3383         preempt_disable_notrace();
3384
3385
3386         tbuffer = get_trace_buf();
3387         if (!tbuffer) {
3388                 len = 0;
3389                 goto out_nobuffer;
3390         }
3391
3392         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3393
3394         size = sizeof(*entry) + len + 1;
3395         ring_buffer_nest_start(buffer);
3396         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3397                                             trace_ctx);
3398         if (!event)
3399                 goto out;
3400         entry = ring_buffer_event_data(event);
3401         entry->ip = ip;
3402
3403         memcpy(&entry->buf, tbuffer, len + 1);
3404         if (!call_filter_check_discard(call, entry, buffer, event)) {
3405                 __buffer_unlock_commit(buffer, event);
3406                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3407         }
3408
3409 out:
3410         ring_buffer_nest_end(buffer);
3411         put_trace_buf();
3412
3413 out_nobuffer:
3414         preempt_enable_notrace();
3415         unpause_graph_tracing();
3416
3417         return len;
3418 }
3419
3420 __printf(3, 0)
3421 int trace_array_vprintk(struct trace_array *tr,
3422                         unsigned long ip, const char *fmt, va_list args)
3423 {
3424         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3425 }
3426
3427 /**
3428  * trace_array_printk - Print a message to a specific instance
3429  * @tr: The instance trace_array descriptor
3430  * @ip: The instruction pointer that this is called from.
3431  * @fmt: The format to print (printf format)
3432  *
3433  * If a subsystem sets up its own instance, they have the right to
3434  * printk strings into their tracing instance buffer using this
3435  * function. Note, this function will not write into the top level
3436  * buffer (use trace_printk() for that), as writing into the top level
3437  * buffer should only have events that can be individually disabled.
3438  * trace_printk() is only used for debugging a kernel, and should not
3439  * be ever incorporated in normal use.
3440  *
3441  * trace_array_printk() can be used, as it will not add noise to the
3442  * top level tracing buffer.
3443  *
3444  * Note, trace_array_init_printk() must be called on @tr before this
3445  * can be used.
3446  */
3447 __printf(3, 0)
3448 int trace_array_printk(struct trace_array *tr,
3449                        unsigned long ip, const char *fmt, ...)
3450 {
3451         int ret;
3452         va_list ap;
3453
3454         if (!tr)
3455                 return -ENOENT;
3456
3457         /* This is only allowed for created instances */
3458         if (tr == &global_trace)
3459                 return 0;
3460
3461         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3462                 return 0;
3463
3464         va_start(ap, fmt);
3465         ret = trace_array_vprintk(tr, ip, fmt, ap);
3466         va_end(ap);
3467         return ret;
3468 }
3469 EXPORT_SYMBOL_GPL(trace_array_printk);
3470
3471 /**
3472  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3473  * @tr: The trace array to initialize the buffers for
3474  *
3475  * As trace_array_printk() only writes into instances, they are OK to
3476  * have in the kernel (unlike trace_printk()). This needs to be called
3477  * before trace_array_printk() can be used on a trace_array.
3478  */
3479 int trace_array_init_printk(struct trace_array *tr)
3480 {
3481         if (!tr)
3482                 return -ENOENT;
3483
3484         /* This is only allowed for created instances */
3485         if (tr == &global_trace)
3486                 return -EINVAL;
3487
3488         return alloc_percpu_trace_buffer();
3489 }
3490 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3491
3492 __printf(3, 4)
3493 int trace_array_printk_buf(struct trace_buffer *buffer,
3494                            unsigned long ip, const char *fmt, ...)
3495 {
3496         int ret;
3497         va_list ap;
3498
3499         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3500                 return 0;
3501
3502         va_start(ap, fmt);
3503         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3504         va_end(ap);
3505         return ret;
3506 }
3507
3508 __printf(2, 0)
3509 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3510 {
3511         return trace_array_vprintk(&global_trace, ip, fmt, args);
3512 }
3513 EXPORT_SYMBOL_GPL(trace_vprintk);
3514
3515 static void trace_iterator_increment(struct trace_iterator *iter)
3516 {
3517         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3518
3519         iter->idx++;
3520         if (buf_iter)
3521                 ring_buffer_iter_advance(buf_iter);
3522 }
3523
3524 static struct trace_entry *
3525 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3526                 unsigned long *lost_events)
3527 {
3528         struct ring_buffer_event *event;
3529         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3530
3531         if (buf_iter) {
3532                 event = ring_buffer_iter_peek(buf_iter, ts);
3533                 if (lost_events)
3534                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3535                                 (unsigned long)-1 : 0;
3536         } else {
3537                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3538                                          lost_events);
3539         }
3540
3541         if (event) {
3542                 iter->ent_size = ring_buffer_event_length(event);
3543                 return ring_buffer_event_data(event);
3544         }
3545         iter->ent_size = 0;
3546         return NULL;
3547 }
3548
3549 static struct trace_entry *
3550 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3551                   unsigned long *missing_events, u64 *ent_ts)
3552 {
3553         struct trace_buffer *buffer = iter->array_buffer->buffer;
3554         struct trace_entry *ent, *next = NULL;
3555         unsigned long lost_events = 0, next_lost = 0;
3556         int cpu_file = iter->cpu_file;
3557         u64 next_ts = 0, ts;
3558         int next_cpu = -1;
3559         int next_size = 0;
3560         int cpu;
3561
3562         /*
3563          * If we are in a per_cpu trace file, don't bother by iterating over
3564          * all cpu and peek directly.
3565          */
3566         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3567                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3568                         return NULL;
3569                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3570                 if (ent_cpu)
3571                         *ent_cpu = cpu_file;
3572
3573                 return ent;
3574         }
3575
3576         for_each_tracing_cpu(cpu) {
3577
3578                 if (ring_buffer_empty_cpu(buffer, cpu))
3579                         continue;
3580
3581                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3582
3583                 /*
3584                  * Pick the entry with the smallest timestamp:
3585                  */
3586                 if (ent && (!next || ts < next_ts)) {
3587                         next = ent;
3588                         next_cpu = cpu;
3589                         next_ts = ts;
3590                         next_lost = lost_events;
3591                         next_size = iter->ent_size;
3592                 }
3593         }
3594
3595         iter->ent_size = next_size;
3596
3597         if (ent_cpu)
3598                 *ent_cpu = next_cpu;
3599
3600         if (ent_ts)
3601                 *ent_ts = next_ts;
3602
3603         if (missing_events)
3604                 *missing_events = next_lost;
3605
3606         return next;
3607 }
3608
3609 #define STATIC_FMT_BUF_SIZE     128
3610 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3611
3612 static char *trace_iter_expand_format(struct trace_iterator *iter)
3613 {
3614         char *tmp;
3615
3616         /*
3617          * iter->tr is NULL when used with tp_printk, which makes
3618          * this get called where it is not safe to call krealloc().
3619          */
3620         if (!iter->tr || iter->fmt == static_fmt_buf)
3621                 return NULL;
3622
3623         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3624                        GFP_KERNEL);
3625         if (tmp) {
3626                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3627                 iter->fmt = tmp;
3628         }
3629
3630         return tmp;
3631 }
3632
3633 /* Returns true if the string is safe to dereference from an event */
3634 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3635 {
3636         unsigned long addr = (unsigned long)str;
3637         struct trace_event *trace_event;
3638         struct trace_event_call *event;
3639
3640         /* OK if part of the event data */
3641         if ((addr >= (unsigned long)iter->ent) &&
3642             (addr < (unsigned long)iter->ent + iter->ent_size))
3643                 return true;
3644
3645         /* OK if part of the temp seq buffer */
3646         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3647             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3648                 return true;
3649
3650         /* Core rodata can not be freed */
3651         if (is_kernel_rodata(addr))
3652                 return true;
3653
3654         if (trace_is_tracepoint_string(str))
3655                 return true;
3656
3657         /*
3658          * Now this could be a module event, referencing core module
3659          * data, which is OK.
3660          */
3661         if (!iter->ent)
3662                 return false;
3663
3664         trace_event = ftrace_find_event(iter->ent->type);
3665         if (!trace_event)
3666                 return false;
3667
3668         event = container_of(trace_event, struct trace_event_call, event);
3669         if (!event->mod)
3670                 return false;
3671
3672         /* Would rather have rodata, but this will suffice */
3673         if (within_module_core(addr, event->mod))
3674                 return true;
3675
3676         return false;
3677 }
3678
3679 static const char *show_buffer(struct trace_seq *s)
3680 {
3681         struct seq_buf *seq = &s->seq;
3682
3683         seq_buf_terminate(seq);
3684
3685         return seq->buffer;
3686 }
3687
3688 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3689
3690 static int test_can_verify_check(const char *fmt, ...)
3691 {
3692         char buf[16];
3693         va_list ap;
3694         int ret;
3695
3696         /*
3697          * The verifier is dependent on vsnprintf() modifies the va_list
3698          * passed to it, where it is sent as a reference. Some architectures
3699          * (like x86_32) passes it by value, which means that vsnprintf()
3700          * does not modify the va_list passed to it, and the verifier
3701          * would then need to be able to understand all the values that
3702          * vsnprintf can use. If it is passed by value, then the verifier
3703          * is disabled.
3704          */
3705         va_start(ap, fmt);
3706         vsnprintf(buf, 16, "%d", ap);
3707         ret = va_arg(ap, int);
3708         va_end(ap);
3709
3710         return ret;
3711 }
3712
3713 static void test_can_verify(void)
3714 {
3715         if (!test_can_verify_check("%d %d", 0, 1)) {
3716                 pr_info("trace event string verifier disabled\n");
3717                 static_branch_inc(&trace_no_verify);
3718         }
3719 }
3720
3721 /**
3722  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3723  * @iter: The iterator that holds the seq buffer and the event being printed
3724  * @fmt: The format used to print the event
3725  * @ap: The va_list holding the data to print from @fmt.
3726  *
3727  * This writes the data into the @iter->seq buffer using the data from
3728  * @fmt and @ap. If the format has a %s, then the source of the string
3729  * is examined to make sure it is safe to print, otherwise it will
3730  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3731  * pointer.
3732  */
3733 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3734                          va_list ap)
3735 {
3736         const char *p = fmt;
3737         const char *str;
3738         int i, j;
3739
3740         if (WARN_ON_ONCE(!fmt))
3741                 return;
3742
3743         if (static_branch_unlikely(&trace_no_verify))
3744                 goto print;
3745
3746         /* Don't bother checking when doing a ftrace_dump() */
3747         if (iter->fmt == static_fmt_buf)
3748                 goto print;
3749
3750         while (*p) {
3751                 bool star = false;
3752                 int len = 0;
3753
3754                 j = 0;
3755
3756                 /* We only care about %s and variants */
3757                 for (i = 0; p[i]; i++) {
3758                         if (i + 1 >= iter->fmt_size) {
3759                                 /*
3760                                  * If we can't expand the copy buffer,
3761                                  * just print it.
3762                                  */
3763                                 if (!trace_iter_expand_format(iter))
3764                                         goto print;
3765                         }
3766
3767                         if (p[i] == '\\' && p[i+1]) {
3768                                 i++;
3769                                 continue;
3770                         }
3771                         if (p[i] == '%') {
3772                                 /* Need to test cases like %08.*s */
3773                                 for (j = 1; p[i+j]; j++) {
3774                                         if (isdigit(p[i+j]) ||
3775                                             p[i+j] == '.')
3776                                                 continue;
3777                                         if (p[i+j] == '*') {
3778                                                 star = true;
3779                                                 continue;
3780                                         }
3781                                         break;
3782                                 }
3783                                 if (p[i+j] == 's')
3784                                         break;
3785                                 star = false;
3786                         }
3787                         j = 0;
3788                 }
3789                 /* If no %s found then just print normally */
3790                 if (!p[i])
3791                         break;
3792
3793                 /* Copy up to the %s, and print that */
3794                 strncpy(iter->fmt, p, i);
3795                 iter->fmt[i] = '\0';
3796                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3797
3798                 if (star)
3799                         len = va_arg(ap, int);
3800
3801                 /* The ap now points to the string data of the %s */
3802                 str = va_arg(ap, const char *);
3803
3804                 /*
3805                  * If you hit this warning, it is likely that the
3806                  * trace event in question used %s on a string that
3807                  * was saved at the time of the event, but may not be
3808                  * around when the trace is read. Use __string(),
3809                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3810                  * instead. See samples/trace_events/trace-events-sample.h
3811                  * for reference.
3812                  */
3813                 if (WARN_ONCE(!trace_safe_str(iter, str),
3814                               "fmt: '%s' current_buffer: '%s'",
3815                               fmt, show_buffer(&iter->seq))) {
3816                         int ret;
3817
3818                         /* Try to safely read the string */
3819                         if (star) {
3820                                 if (len + 1 > iter->fmt_size)
3821                                         len = iter->fmt_size - 1;
3822                                 if (len < 0)
3823                                         len = 0;
3824                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3825                                 iter->fmt[len] = 0;
3826                                 star = false;
3827                         } else {
3828                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3829                                                                   iter->fmt_size);
3830                         }
3831                         if (ret < 0)
3832                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3833                         else
3834                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3835                                                  str, iter->fmt);
3836                         str = "[UNSAFE-MEMORY]";
3837                         strcpy(iter->fmt, "%s");
3838                 } else {
3839                         strncpy(iter->fmt, p + i, j + 1);
3840                         iter->fmt[j+1] = '\0';
3841                 }
3842                 if (star)
3843                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3844                 else
3845                         trace_seq_printf(&iter->seq, iter->fmt, str);
3846
3847                 p += i + j + 1;
3848         }
3849  print:
3850         if (*p)
3851                 trace_seq_vprintf(&iter->seq, p, ap);
3852 }
3853
3854 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3855 {
3856         const char *p, *new_fmt;
3857         char *q;
3858
3859         if (WARN_ON_ONCE(!fmt))
3860                 return fmt;
3861
3862         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3863                 return fmt;
3864
3865         p = fmt;
3866         new_fmt = q = iter->fmt;
3867         while (*p) {
3868                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3869                         if (!trace_iter_expand_format(iter))
3870                                 return fmt;
3871
3872                         q += iter->fmt - new_fmt;
3873                         new_fmt = iter->fmt;
3874                 }
3875
3876                 *q++ = *p++;
3877
3878                 /* Replace %p with %px */
3879                 if (p[-1] == '%') {
3880                         if (p[0] == '%') {
3881                                 *q++ = *p++;
3882                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3883                                 *q++ = *p++;
3884                                 *q++ = 'x';
3885                         }
3886                 }
3887         }
3888         *q = '\0';
3889
3890         return new_fmt;
3891 }
3892
3893 #define STATIC_TEMP_BUF_SIZE    128
3894 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3895
3896 /* Find the next real entry, without updating the iterator itself */
3897 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3898                                           int *ent_cpu, u64 *ent_ts)
3899 {
3900         /* __find_next_entry will reset ent_size */
3901         int ent_size = iter->ent_size;
3902         struct trace_entry *entry;
3903
3904         /*
3905          * If called from ftrace_dump(), then the iter->temp buffer
3906          * will be the static_temp_buf and not created from kmalloc.
3907          * If the entry size is greater than the buffer, we can
3908          * not save it. Just return NULL in that case. This is only
3909          * used to add markers when two consecutive events' time
3910          * stamps have a large delta. See trace_print_lat_context()
3911          */
3912         if (iter->temp == static_temp_buf &&
3913             STATIC_TEMP_BUF_SIZE < ent_size)
3914                 return NULL;
3915
3916         /*
3917          * The __find_next_entry() may call peek_next_entry(), which may
3918          * call ring_buffer_peek() that may make the contents of iter->ent
3919          * undefined. Need to copy iter->ent now.
3920          */
3921         if (iter->ent && iter->ent != iter->temp) {
3922                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3923                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3924                         void *temp;
3925                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3926                         if (!temp)
3927                                 return NULL;
3928                         kfree(iter->temp);
3929                         iter->temp = temp;
3930                         iter->temp_size = iter->ent_size;
3931                 }
3932                 memcpy(iter->temp, iter->ent, iter->ent_size);
3933                 iter->ent = iter->temp;
3934         }
3935         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3936         /* Put back the original ent_size */
3937         iter->ent_size = ent_size;
3938
3939         return entry;
3940 }
3941
3942 /* Find the next real entry, and increment the iterator to the next entry */
3943 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3944 {
3945         iter->ent = __find_next_entry(iter, &iter->cpu,
3946                                       &iter->lost_events, &iter->ts);
3947
3948         if (iter->ent)
3949                 trace_iterator_increment(iter);
3950
3951         return iter->ent ? iter : NULL;
3952 }
3953
3954 static void trace_consume(struct trace_iterator *iter)
3955 {
3956         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3957                             &iter->lost_events);
3958 }
3959
3960 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3961 {
3962         struct trace_iterator *iter = m->private;
3963         int i = (int)*pos;
3964         void *ent;
3965
3966         WARN_ON_ONCE(iter->leftover);
3967
3968         (*pos)++;
3969
3970         /* can't go backwards */
3971         if (iter->idx > i)
3972                 return NULL;
3973
3974         if (iter->idx < 0)
3975                 ent = trace_find_next_entry_inc(iter);
3976         else
3977                 ent = iter;
3978
3979         while (ent && iter->idx < i)
3980                 ent = trace_find_next_entry_inc(iter);
3981
3982         iter->pos = *pos;
3983
3984         return ent;
3985 }
3986
3987 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3988 {
3989         struct ring_buffer_iter *buf_iter;
3990         unsigned long entries = 0;
3991         u64 ts;
3992
3993         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3994
3995         buf_iter = trace_buffer_iter(iter, cpu);
3996         if (!buf_iter)
3997                 return;
3998
3999         ring_buffer_iter_reset(buf_iter);
4000
4001         /*
4002          * We could have the case with the max latency tracers
4003          * that a reset never took place on a cpu. This is evident
4004          * by the timestamp being before the start of the buffer.
4005          */
4006         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4007                 if (ts >= iter->array_buffer->time_start)
4008                         break;
4009                 entries++;
4010                 ring_buffer_iter_advance(buf_iter);
4011         }
4012
4013         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4014 }
4015
4016 /*
4017  * The current tracer is copied to avoid a global locking
4018  * all around.
4019  */
4020 static void *s_start(struct seq_file *m, loff_t *pos)
4021 {
4022         struct trace_iterator *iter = m->private;
4023         struct trace_array *tr = iter->tr;
4024         int cpu_file = iter->cpu_file;
4025         void *p = NULL;
4026         loff_t l = 0;
4027         int cpu;
4028
4029         /*
4030          * copy the tracer to avoid using a global lock all around.
4031          * iter->trace is a copy of current_trace, the pointer to the
4032          * name may be used instead of a strcmp(), as iter->trace->name
4033          * will point to the same string as current_trace->name.
4034          */
4035         mutex_lock(&trace_types_lock);
4036         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4037                 *iter->trace = *tr->current_trace;
4038         mutex_unlock(&trace_types_lock);
4039
4040 #ifdef CONFIG_TRACER_MAX_TRACE
4041         if (iter->snapshot && iter->trace->use_max_tr)
4042                 return ERR_PTR(-EBUSY);
4043 #endif
4044
4045         if (!iter->snapshot)
4046                 atomic_inc(&trace_record_taskinfo_disabled);
4047
4048         if (*pos != iter->pos) {
4049                 iter->ent = NULL;
4050                 iter->cpu = 0;
4051                 iter->idx = -1;
4052
4053                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4054                         for_each_tracing_cpu(cpu)
4055                                 tracing_iter_reset(iter, cpu);
4056                 } else
4057                         tracing_iter_reset(iter, cpu_file);
4058
4059                 iter->leftover = 0;
4060                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4061                         ;
4062
4063         } else {
4064                 /*
4065                  * If we overflowed the seq_file before, then we want
4066                  * to just reuse the trace_seq buffer again.
4067                  */
4068                 if (iter->leftover)
4069                         p = iter;
4070                 else {
4071                         l = *pos - 1;
4072                         p = s_next(m, p, &l);
4073                 }
4074         }
4075
4076         trace_event_read_lock();
4077         trace_access_lock(cpu_file);
4078         return p;
4079 }
4080
4081 static void s_stop(struct seq_file *m, void *p)
4082 {
4083         struct trace_iterator *iter = m->private;
4084
4085 #ifdef CONFIG_TRACER_MAX_TRACE
4086         if (iter->snapshot && iter->trace->use_max_tr)
4087                 return;
4088 #endif
4089
4090         if (!iter->snapshot)
4091                 atomic_dec(&trace_record_taskinfo_disabled);
4092
4093         trace_access_unlock(iter->cpu_file);
4094         trace_event_read_unlock();
4095 }
4096
4097 static void
4098 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4099                       unsigned long *entries, int cpu)
4100 {
4101         unsigned long count;
4102
4103         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4104         /*
4105          * If this buffer has skipped entries, then we hold all
4106          * entries for the trace and we need to ignore the
4107          * ones before the time stamp.
4108          */
4109         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4110                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4111                 /* total is the same as the entries */
4112                 *total = count;
4113         } else
4114                 *total = count +
4115                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4116         *entries = count;
4117 }
4118
4119 static void
4120 get_total_entries(struct array_buffer *buf,
4121                   unsigned long *total, unsigned long *entries)
4122 {
4123         unsigned long t, e;
4124         int cpu;
4125
4126         *total = 0;
4127         *entries = 0;
4128
4129         for_each_tracing_cpu(cpu) {
4130                 get_total_entries_cpu(buf, &t, &e, cpu);
4131                 *total += t;
4132                 *entries += e;
4133         }
4134 }
4135
4136 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4137 {
4138         unsigned long total, entries;
4139
4140         if (!tr)
4141                 tr = &global_trace;
4142
4143         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4144
4145         return entries;
4146 }
4147
4148 unsigned long trace_total_entries(struct trace_array *tr)
4149 {
4150         unsigned long total, entries;
4151
4152         if (!tr)
4153                 tr = &global_trace;
4154
4155         get_total_entries(&tr->array_buffer, &total, &entries);
4156
4157         return entries;
4158 }
4159
4160 static void print_lat_help_header(struct seq_file *m)
4161 {
4162         seq_puts(m, "#                    _------=> CPU#            \n"
4163                     "#                   / _-----=> irqs-off        \n"
4164                     "#                  | / _----=> need-resched    \n"
4165                     "#                  || / _---=> hardirq/softirq \n"
4166                     "#                  ||| / _--=> preempt-depth   \n"
4167                     "#                  |||| /     delay            \n"
4168                     "#  cmd     pid     ||||| time  |   caller      \n"
4169                     "#     \\   /        |||||  \\    |   /         \n");
4170 }
4171
4172 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4173 {
4174         unsigned long total;
4175         unsigned long entries;
4176
4177         get_total_entries(buf, &total, &entries);
4178         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4179                    entries, total, num_online_cpus());
4180         seq_puts(m, "#\n");
4181 }
4182
4183 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4184                                    unsigned int flags)
4185 {
4186         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4187
4188         print_event_info(buf, m);
4189
4190         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4191         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4192 }
4193
4194 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4195                                        unsigned int flags)
4196 {
4197         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4198         const char *space = "            ";
4199         int prec = tgid ? 12 : 2;
4200
4201         print_event_info(buf, m);
4202
4203         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4204         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4205         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4206         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4207         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4208         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4209         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4210 }
4211
4212 void
4213 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4214 {
4215         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4216         struct array_buffer *buf = iter->array_buffer;
4217         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4218         struct tracer *type = iter->trace;
4219         unsigned long entries;
4220         unsigned long total;
4221         const char *name = "preemption";
4222
4223         name = type->name;
4224
4225         get_total_entries(buf, &total, &entries);
4226
4227         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4228                    name, UTS_RELEASE);
4229         seq_puts(m, "# -----------------------------------"
4230                  "---------------------------------\n");
4231         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4232                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4233                    nsecs_to_usecs(data->saved_latency),
4234                    entries,
4235                    total,
4236                    buf->cpu,
4237 #if defined(CONFIG_PREEMPT_NONE)
4238                    "server",
4239 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4240                    "desktop",
4241 #elif defined(CONFIG_PREEMPT)
4242                    "preempt",
4243 #elif defined(CONFIG_PREEMPT_RT)
4244                    "preempt_rt",
4245 #else
4246                    "unknown",
4247 #endif
4248                    /* These are reserved for later use */
4249                    0, 0, 0, 0);
4250 #ifdef CONFIG_SMP
4251         seq_printf(m, " #P:%d)\n", num_online_cpus());
4252 #else
4253         seq_puts(m, ")\n");
4254 #endif
4255         seq_puts(m, "#    -----------------\n");
4256         seq_printf(m, "#    | task: %.16s-%d "
4257                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4258                    data->comm, data->pid,
4259                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4260                    data->policy, data->rt_priority);
4261         seq_puts(m, "#    -----------------\n");
4262
4263         if (data->critical_start) {
4264                 seq_puts(m, "#  => started at: ");
4265                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4266                 trace_print_seq(m, &iter->seq);
4267                 seq_puts(m, "\n#  => ended at:   ");
4268                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4269                 trace_print_seq(m, &iter->seq);
4270                 seq_puts(m, "\n#\n");
4271         }
4272
4273         seq_puts(m, "#\n");
4274 }
4275
4276 static void test_cpu_buff_start(struct trace_iterator *iter)
4277 {
4278         struct trace_seq *s = &iter->seq;
4279         struct trace_array *tr = iter->tr;
4280
4281         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4282                 return;
4283
4284         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4285                 return;
4286
4287         if (cpumask_available(iter->started) &&
4288             cpumask_test_cpu(iter->cpu, iter->started))
4289                 return;
4290
4291         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4292                 return;
4293
4294         if (cpumask_available(iter->started))
4295                 cpumask_set_cpu(iter->cpu, iter->started);
4296
4297         /* Don't print started cpu buffer for the first entry of the trace */
4298         if (iter->idx > 1)
4299                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4300                                 iter->cpu);
4301 }
4302
4303 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4304 {
4305         struct trace_array *tr = iter->tr;
4306         struct trace_seq *s = &iter->seq;
4307         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4308         struct trace_entry *entry;
4309         struct trace_event *event;
4310
4311         entry = iter->ent;
4312
4313         test_cpu_buff_start(iter);
4314
4315         event = ftrace_find_event(entry->type);
4316
4317         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4318                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4319                         trace_print_lat_context(iter);
4320                 else
4321                         trace_print_context(iter);
4322         }
4323
4324         if (trace_seq_has_overflowed(s))
4325                 return TRACE_TYPE_PARTIAL_LINE;
4326
4327         if (event)
4328                 return event->funcs->trace(iter, sym_flags, event);
4329
4330         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4331
4332         return trace_handle_return(s);
4333 }
4334
4335 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4336 {
4337         struct trace_array *tr = iter->tr;
4338         struct trace_seq *s = &iter->seq;
4339         struct trace_entry *entry;
4340         struct trace_event *event;
4341
4342         entry = iter->ent;
4343
4344         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4345                 trace_seq_printf(s, "%d %d %llu ",
4346                                  entry->pid, iter->cpu, iter->ts);
4347
4348         if (trace_seq_has_overflowed(s))
4349                 return TRACE_TYPE_PARTIAL_LINE;
4350
4351         event = ftrace_find_event(entry->type);
4352         if (event)
4353                 return event->funcs->raw(iter, 0, event);
4354
4355         trace_seq_printf(s, "%d ?\n", entry->type);
4356
4357         return trace_handle_return(s);
4358 }
4359
4360 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4361 {
4362         struct trace_array *tr = iter->tr;
4363         struct trace_seq *s = &iter->seq;
4364         unsigned char newline = '\n';
4365         struct trace_entry *entry;
4366         struct trace_event *event;
4367
4368         entry = iter->ent;
4369
4370         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4371                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4372                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4373                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4374                 if (trace_seq_has_overflowed(s))
4375                         return TRACE_TYPE_PARTIAL_LINE;
4376         }
4377
4378         event = ftrace_find_event(entry->type);
4379         if (event) {
4380                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4381                 if (ret != TRACE_TYPE_HANDLED)
4382                         return ret;
4383         }
4384
4385         SEQ_PUT_FIELD(s, newline);
4386
4387         return trace_handle_return(s);
4388 }
4389
4390 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4391 {
4392         struct trace_array *tr = iter->tr;
4393         struct trace_seq *s = &iter->seq;
4394         struct trace_entry *entry;
4395         struct trace_event *event;
4396
4397         entry = iter->ent;
4398
4399         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4400                 SEQ_PUT_FIELD(s, entry->pid);
4401                 SEQ_PUT_FIELD(s, iter->cpu);
4402                 SEQ_PUT_FIELD(s, iter->ts);
4403                 if (trace_seq_has_overflowed(s))
4404                         return TRACE_TYPE_PARTIAL_LINE;
4405         }
4406
4407         event = ftrace_find_event(entry->type);
4408         return event ? event->funcs->binary(iter, 0, event) :
4409                 TRACE_TYPE_HANDLED;
4410 }
4411
4412 int trace_empty(struct trace_iterator *iter)
4413 {
4414         struct ring_buffer_iter *buf_iter;
4415         int cpu;
4416
4417         /* If we are looking at one CPU buffer, only check that one */
4418         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4419                 cpu = iter->cpu_file;
4420                 buf_iter = trace_buffer_iter(iter, cpu);
4421                 if (buf_iter) {
4422                         if (!ring_buffer_iter_empty(buf_iter))
4423                                 return 0;
4424                 } else {
4425                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4426                                 return 0;
4427                 }
4428                 return 1;
4429         }
4430
4431         for_each_tracing_cpu(cpu) {
4432                 buf_iter = trace_buffer_iter(iter, cpu);
4433                 if (buf_iter) {
4434                         if (!ring_buffer_iter_empty(buf_iter))
4435                                 return 0;
4436                 } else {
4437                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4438                                 return 0;
4439                 }
4440         }
4441
4442         return 1;
4443 }
4444
4445 /*  Called with trace_event_read_lock() held. */
4446 enum print_line_t print_trace_line(struct trace_iterator *iter)
4447 {
4448         struct trace_array *tr = iter->tr;
4449         unsigned long trace_flags = tr->trace_flags;
4450         enum print_line_t ret;
4451
4452         if (iter->lost_events) {
4453                 if (iter->lost_events == (unsigned long)-1)
4454                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4455                                          iter->cpu);
4456                 else
4457                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4458                                          iter->cpu, iter->lost_events);
4459                 if (trace_seq_has_overflowed(&iter->seq))
4460                         return TRACE_TYPE_PARTIAL_LINE;
4461         }
4462
4463         if (iter->trace && iter->trace->print_line) {
4464                 ret = iter->trace->print_line(iter);
4465                 if (ret != TRACE_TYPE_UNHANDLED)
4466                         return ret;
4467         }
4468
4469         if (iter->ent->type == TRACE_BPUTS &&
4470                         trace_flags & TRACE_ITER_PRINTK &&
4471                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4472                 return trace_print_bputs_msg_only(iter);
4473
4474         if (iter->ent->type == TRACE_BPRINT &&
4475                         trace_flags & TRACE_ITER_PRINTK &&
4476                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4477                 return trace_print_bprintk_msg_only(iter);
4478
4479         if (iter->ent->type == TRACE_PRINT &&
4480                         trace_flags & TRACE_ITER_PRINTK &&
4481                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4482                 return trace_print_printk_msg_only(iter);
4483
4484         if (trace_flags & TRACE_ITER_BIN)
4485                 return print_bin_fmt(iter);
4486
4487         if (trace_flags & TRACE_ITER_HEX)
4488                 return print_hex_fmt(iter);
4489
4490         if (trace_flags & TRACE_ITER_RAW)
4491                 return print_raw_fmt(iter);
4492
4493         return print_trace_fmt(iter);
4494 }
4495
4496 void trace_latency_header(struct seq_file *m)
4497 {
4498         struct trace_iterator *iter = m->private;
4499         struct trace_array *tr = iter->tr;
4500
4501         /* print nothing if the buffers are empty */
4502         if (trace_empty(iter))
4503                 return;
4504
4505         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4506                 print_trace_header(m, iter);
4507
4508         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4509                 print_lat_help_header(m);
4510 }
4511
4512 void trace_default_header(struct seq_file *m)
4513 {
4514         struct trace_iterator *iter = m->private;
4515         struct trace_array *tr = iter->tr;
4516         unsigned long trace_flags = tr->trace_flags;
4517
4518         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4519                 return;
4520
4521         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4522                 /* print nothing if the buffers are empty */
4523                 if (trace_empty(iter))
4524                         return;
4525                 print_trace_header(m, iter);
4526                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4527                         print_lat_help_header(m);
4528         } else {
4529                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4530                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4531                                 print_func_help_header_irq(iter->array_buffer,
4532                                                            m, trace_flags);
4533                         else
4534                                 print_func_help_header(iter->array_buffer, m,
4535                                                        trace_flags);
4536                 }
4537         }
4538 }
4539
4540 static void test_ftrace_alive(struct seq_file *m)
4541 {
4542         if (!ftrace_is_dead())
4543                 return;
4544         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4545                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4546 }
4547
4548 #ifdef CONFIG_TRACER_MAX_TRACE
4549 static void show_snapshot_main_help(struct seq_file *m)
4550 {
4551         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4552                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4553                     "#                      Takes a snapshot of the main buffer.\n"
4554                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4555                     "#                      (Doesn't have to be '2' works with any number that\n"
4556                     "#                       is not a '0' or '1')\n");
4557 }
4558
4559 static void show_snapshot_percpu_help(struct seq_file *m)
4560 {
4561         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4562 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4563         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4564                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4565 #else
4566         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4567                     "#                     Must use main snapshot file to allocate.\n");
4568 #endif
4569         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4570                     "#                      (Doesn't have to be '2' works with any number that\n"
4571                     "#                       is not a '0' or '1')\n");
4572 }
4573
4574 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4575 {
4576         if (iter->tr->allocated_snapshot)
4577                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4578         else
4579                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4580
4581         seq_puts(m, "# Snapshot commands:\n");
4582         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4583                 show_snapshot_main_help(m);
4584         else
4585                 show_snapshot_percpu_help(m);
4586 }
4587 #else
4588 /* Should never be called */
4589 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4590 #endif
4591
4592 static int s_show(struct seq_file *m, void *v)
4593 {
4594         struct trace_iterator *iter = v;
4595         int ret;
4596
4597         if (iter->ent == NULL) {
4598                 if (iter->tr) {
4599                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4600                         seq_puts(m, "#\n");
4601                         test_ftrace_alive(m);
4602                 }
4603                 if (iter->snapshot && trace_empty(iter))
4604                         print_snapshot_help(m, iter);
4605                 else if (iter->trace && iter->trace->print_header)
4606                         iter->trace->print_header(m);
4607                 else
4608                         trace_default_header(m);
4609
4610         } else if (iter->leftover) {
4611                 /*
4612                  * If we filled the seq_file buffer earlier, we
4613                  * want to just show it now.
4614                  */
4615                 ret = trace_print_seq(m, &iter->seq);
4616
4617                 /* ret should this time be zero, but you never know */
4618                 iter->leftover = ret;
4619
4620         } else {
4621                 print_trace_line(iter);
4622                 ret = trace_print_seq(m, &iter->seq);
4623                 /*
4624                  * If we overflow the seq_file buffer, then it will
4625                  * ask us for this data again at start up.
4626                  * Use that instead.
4627                  *  ret is 0 if seq_file write succeeded.
4628                  *        -1 otherwise.
4629                  */
4630                 iter->leftover = ret;
4631         }
4632
4633         return 0;
4634 }
4635
4636 /*
4637  * Should be used after trace_array_get(), trace_types_lock
4638  * ensures that i_cdev was already initialized.
4639  */
4640 static inline int tracing_get_cpu(struct inode *inode)
4641 {
4642         if (inode->i_cdev) /* See trace_create_cpu_file() */
4643                 return (long)inode->i_cdev - 1;
4644         return RING_BUFFER_ALL_CPUS;
4645 }
4646
4647 static const struct seq_operations tracer_seq_ops = {
4648         .start          = s_start,
4649         .next           = s_next,
4650         .stop           = s_stop,
4651         .show           = s_show,
4652 };
4653
4654 static struct trace_iterator *
4655 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4656 {
4657         struct trace_array *tr = inode->i_private;
4658         struct trace_iterator *iter;
4659         int cpu;
4660
4661         if (tracing_disabled)
4662                 return ERR_PTR(-ENODEV);
4663
4664         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4665         if (!iter)
4666                 return ERR_PTR(-ENOMEM);
4667
4668         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4669                                     GFP_KERNEL);
4670         if (!iter->buffer_iter)
4671                 goto release;
4672
4673         /*
4674          * trace_find_next_entry() may need to save off iter->ent.
4675          * It will place it into the iter->temp buffer. As most
4676          * events are less than 128, allocate a buffer of that size.
4677          * If one is greater, then trace_find_next_entry() will
4678          * allocate a new buffer to adjust for the bigger iter->ent.
4679          * It's not critical if it fails to get allocated here.
4680          */
4681         iter->temp = kmalloc(128, GFP_KERNEL);
4682         if (iter->temp)
4683                 iter->temp_size = 128;
4684
4685         /*
4686          * trace_event_printf() may need to modify given format
4687          * string to replace %p with %px so that it shows real address
4688          * instead of hash value. However, that is only for the event
4689          * tracing, other tracer may not need. Defer the allocation
4690          * until it is needed.
4691          */
4692         iter->fmt = NULL;
4693         iter->fmt_size = 0;
4694
4695         /*
4696          * We make a copy of the current tracer to avoid concurrent
4697          * changes on it while we are reading.
4698          */
4699         mutex_lock(&trace_types_lock);
4700         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4701         if (!iter->trace)
4702                 goto fail;
4703
4704         *iter->trace = *tr->current_trace;
4705
4706         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4707                 goto fail;
4708
4709         iter->tr = tr;
4710
4711 #ifdef CONFIG_TRACER_MAX_TRACE
4712         /* Currently only the top directory has a snapshot */
4713         if (tr->current_trace->print_max || snapshot)
4714                 iter->array_buffer = &tr->max_buffer;
4715         else
4716 #endif
4717                 iter->array_buffer = &tr->array_buffer;
4718         iter->snapshot = snapshot;
4719         iter->pos = -1;
4720         iter->cpu_file = tracing_get_cpu(inode);
4721         mutex_init(&iter->mutex);
4722
4723         /* Notify the tracer early; before we stop tracing. */
4724         if (iter->trace->open)
4725                 iter->trace->open(iter);
4726
4727         /* Annotate start of buffers if we had overruns */
4728         if (ring_buffer_overruns(iter->array_buffer->buffer))
4729                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4730
4731         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4732         if (trace_clocks[tr->clock_id].in_ns)
4733                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4734
4735         /*
4736          * If pause-on-trace is enabled, then stop the trace while
4737          * dumping, unless this is the "snapshot" file
4738          */
4739         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4740                 tracing_stop_tr(tr);
4741
4742         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4743                 for_each_tracing_cpu(cpu) {
4744                         iter->buffer_iter[cpu] =
4745                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4746                                                          cpu, GFP_KERNEL);
4747                 }
4748                 ring_buffer_read_prepare_sync();
4749                 for_each_tracing_cpu(cpu) {
4750                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4751                         tracing_iter_reset(iter, cpu);
4752                 }
4753         } else {
4754                 cpu = iter->cpu_file;
4755                 iter->buffer_iter[cpu] =
4756                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4757                                                  cpu, GFP_KERNEL);
4758                 ring_buffer_read_prepare_sync();
4759                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4760                 tracing_iter_reset(iter, cpu);
4761         }
4762
4763         mutex_unlock(&trace_types_lock);
4764
4765         return iter;
4766
4767  fail:
4768         mutex_unlock(&trace_types_lock);
4769         kfree(iter->trace);
4770         kfree(iter->temp);
4771         kfree(iter->buffer_iter);
4772 release:
4773         seq_release_private(inode, file);
4774         return ERR_PTR(-ENOMEM);
4775 }
4776
4777 int tracing_open_generic(struct inode *inode, struct file *filp)
4778 {
4779         int ret;
4780
4781         ret = tracing_check_open_get_tr(NULL);
4782         if (ret)
4783                 return ret;
4784
4785         filp->private_data = inode->i_private;
4786         return 0;
4787 }
4788
4789 bool tracing_is_disabled(void)
4790 {
4791         return (tracing_disabled) ? true: false;
4792 }
4793
4794 /*
4795  * Open and update trace_array ref count.
4796  * Must have the current trace_array passed to it.
4797  */
4798 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4799 {
4800         struct trace_array *tr = inode->i_private;
4801         int ret;
4802
4803         ret = tracing_check_open_get_tr(tr);
4804         if (ret)
4805                 return ret;
4806
4807         filp->private_data = inode->i_private;
4808
4809         return 0;
4810 }
4811
4812 static int tracing_release(struct inode *inode, struct file *file)
4813 {
4814         struct trace_array *tr = inode->i_private;
4815         struct seq_file *m = file->private_data;
4816         struct trace_iterator *iter;
4817         int cpu;
4818
4819         if (!(file->f_mode & FMODE_READ)) {
4820                 trace_array_put(tr);
4821                 return 0;
4822         }
4823
4824         /* Writes do not use seq_file */
4825         iter = m->private;
4826         mutex_lock(&trace_types_lock);
4827
4828         for_each_tracing_cpu(cpu) {
4829                 if (iter->buffer_iter[cpu])
4830                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4831         }
4832
4833         if (iter->trace && iter->trace->close)
4834                 iter->trace->close(iter);
4835
4836         if (!iter->snapshot && tr->stop_count)
4837                 /* reenable tracing if it was previously enabled */
4838                 tracing_start_tr(tr);
4839
4840         __trace_array_put(tr);
4841
4842         mutex_unlock(&trace_types_lock);
4843
4844         mutex_destroy(&iter->mutex);
4845         free_cpumask_var(iter->started);
4846         kfree(iter->fmt);
4847         kfree(iter->temp);
4848         kfree(iter->trace);
4849         kfree(iter->buffer_iter);
4850         seq_release_private(inode, file);
4851
4852         return 0;
4853 }
4854
4855 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4856 {
4857         struct trace_array *tr = inode->i_private;
4858
4859         trace_array_put(tr);
4860         return 0;
4861 }
4862
4863 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866
4867         trace_array_put(tr);
4868
4869         return single_release(inode, file);
4870 }
4871
4872 static int tracing_open(struct inode *inode, struct file *file)
4873 {
4874         struct trace_array *tr = inode->i_private;
4875         struct trace_iterator *iter;
4876         int ret;
4877
4878         ret = tracing_check_open_get_tr(tr);
4879         if (ret)
4880                 return ret;
4881
4882         /* If this file was open for write, then erase contents */
4883         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4884                 int cpu = tracing_get_cpu(inode);
4885                 struct array_buffer *trace_buf = &tr->array_buffer;
4886
4887 #ifdef CONFIG_TRACER_MAX_TRACE
4888                 if (tr->current_trace->print_max)
4889                         trace_buf = &tr->max_buffer;
4890 #endif
4891
4892                 if (cpu == RING_BUFFER_ALL_CPUS)
4893                         tracing_reset_online_cpus(trace_buf);
4894                 else
4895                         tracing_reset_cpu(trace_buf, cpu);
4896         }
4897
4898         if (file->f_mode & FMODE_READ) {
4899                 iter = __tracing_open(inode, file, false);
4900                 if (IS_ERR(iter))
4901                         ret = PTR_ERR(iter);
4902                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4903                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4904         }
4905
4906         if (ret < 0)
4907                 trace_array_put(tr);
4908
4909         return ret;
4910 }
4911
4912 /*
4913  * Some tracers are not suitable for instance buffers.
4914  * A tracer is always available for the global array (toplevel)
4915  * or if it explicitly states that it is.
4916  */
4917 static bool
4918 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4919 {
4920         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4921 }
4922
4923 /* Find the next tracer that this trace array may use */
4924 static struct tracer *
4925 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4926 {
4927         while (t && !trace_ok_for_array(t, tr))
4928                 t = t->next;
4929
4930         return t;
4931 }
4932
4933 static void *
4934 t_next(struct seq_file *m, void *v, loff_t *pos)
4935 {
4936         struct trace_array *tr = m->private;
4937         struct tracer *t = v;
4938
4939         (*pos)++;
4940
4941         if (t)
4942                 t = get_tracer_for_array(tr, t->next);
4943
4944         return t;
4945 }
4946
4947 static void *t_start(struct seq_file *m, loff_t *pos)
4948 {
4949         struct trace_array *tr = m->private;
4950         struct tracer *t;
4951         loff_t l = 0;
4952
4953         mutex_lock(&trace_types_lock);
4954
4955         t = get_tracer_for_array(tr, trace_types);
4956         for (; t && l < *pos; t = t_next(m, t, &l))
4957                         ;
4958
4959         return t;
4960 }
4961
4962 static void t_stop(struct seq_file *m, void *p)
4963 {
4964         mutex_unlock(&trace_types_lock);
4965 }
4966
4967 static int t_show(struct seq_file *m, void *v)
4968 {
4969         struct tracer *t = v;
4970
4971         if (!t)
4972                 return 0;
4973
4974         seq_puts(m, t->name);
4975         if (t->next)
4976                 seq_putc(m, ' ');
4977         else
4978                 seq_putc(m, '\n');
4979
4980         return 0;
4981 }
4982
4983 static const struct seq_operations show_traces_seq_ops = {
4984         .start          = t_start,
4985         .next           = t_next,
4986         .stop           = t_stop,
4987         .show           = t_show,
4988 };
4989
4990 static int show_traces_open(struct inode *inode, struct file *file)
4991 {
4992         struct trace_array *tr = inode->i_private;
4993         struct seq_file *m;
4994         int ret;
4995
4996         ret = tracing_check_open_get_tr(tr);
4997         if (ret)
4998                 return ret;
4999
5000         ret = seq_open(file, &show_traces_seq_ops);
5001         if (ret) {
5002                 trace_array_put(tr);
5003                 return ret;
5004         }
5005
5006         m = file->private_data;
5007         m->private = tr;
5008
5009         return 0;
5010 }
5011
5012 static int show_traces_release(struct inode *inode, struct file *file)
5013 {
5014         struct trace_array *tr = inode->i_private;
5015
5016         trace_array_put(tr);
5017         return seq_release(inode, file);
5018 }
5019
5020 static ssize_t
5021 tracing_write_stub(struct file *filp, const char __user *ubuf,
5022                    size_t count, loff_t *ppos)
5023 {
5024         return count;
5025 }
5026
5027 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5028 {
5029         int ret;
5030
5031         if (file->f_mode & FMODE_READ)
5032                 ret = seq_lseek(file, offset, whence);
5033         else
5034                 file->f_pos = ret = 0;
5035
5036         return ret;
5037 }
5038
5039 static const struct file_operations tracing_fops = {
5040         .open           = tracing_open,
5041         .read           = seq_read,
5042         .write          = tracing_write_stub,
5043         .llseek         = tracing_lseek,
5044         .release        = tracing_release,
5045 };
5046
5047 static const struct file_operations show_traces_fops = {
5048         .open           = show_traces_open,
5049         .read           = seq_read,
5050         .llseek         = seq_lseek,
5051         .release        = show_traces_release,
5052 };
5053
5054 static ssize_t
5055 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5056                      size_t count, loff_t *ppos)
5057 {
5058         struct trace_array *tr = file_inode(filp)->i_private;
5059         char *mask_str;
5060         int len;
5061
5062         len = snprintf(NULL, 0, "%*pb\n",
5063                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5064         mask_str = kmalloc(len, GFP_KERNEL);
5065         if (!mask_str)
5066                 return -ENOMEM;
5067
5068         len = snprintf(mask_str, len, "%*pb\n",
5069                        cpumask_pr_args(tr->tracing_cpumask));
5070         if (len >= count) {
5071                 count = -EINVAL;
5072                 goto out_err;
5073         }
5074         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5075
5076 out_err:
5077         kfree(mask_str);
5078
5079         return count;
5080 }
5081
5082 int tracing_set_cpumask(struct trace_array *tr,
5083                         cpumask_var_t tracing_cpumask_new)
5084 {
5085         int cpu;
5086
5087         if (!tr)
5088                 return -EINVAL;
5089
5090         local_irq_disable();
5091         arch_spin_lock(&tr->max_lock);
5092         for_each_tracing_cpu(cpu) {
5093                 /*
5094                  * Increase/decrease the disabled counter if we are
5095                  * about to flip a bit in the cpumask:
5096                  */
5097                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5098                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5099                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5100                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5101                 }
5102                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5103                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5104                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5105                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5106                 }
5107         }
5108         arch_spin_unlock(&tr->max_lock);
5109         local_irq_enable();
5110
5111         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5112
5113         return 0;
5114 }
5115
5116 static ssize_t
5117 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5118                       size_t count, loff_t *ppos)
5119 {
5120         struct trace_array *tr = file_inode(filp)->i_private;
5121         cpumask_var_t tracing_cpumask_new;
5122         int err;
5123
5124         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5125                 return -ENOMEM;
5126
5127         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5128         if (err)
5129                 goto err_free;
5130
5131         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5132         if (err)
5133                 goto err_free;
5134
5135         free_cpumask_var(tracing_cpumask_new);
5136
5137         return count;
5138
5139 err_free:
5140         free_cpumask_var(tracing_cpumask_new);
5141
5142         return err;
5143 }
5144
5145 static const struct file_operations tracing_cpumask_fops = {
5146         .open           = tracing_open_generic_tr,
5147         .read           = tracing_cpumask_read,
5148         .write          = tracing_cpumask_write,
5149         .release        = tracing_release_generic_tr,
5150         .llseek         = generic_file_llseek,
5151 };
5152
5153 static int tracing_trace_options_show(struct seq_file *m, void *v)
5154 {
5155         struct tracer_opt *trace_opts;
5156         struct trace_array *tr = m->private;
5157         u32 tracer_flags;
5158         int i;
5159
5160         mutex_lock(&trace_types_lock);
5161         tracer_flags = tr->current_trace->flags->val;
5162         trace_opts = tr->current_trace->flags->opts;
5163
5164         for (i = 0; trace_options[i]; i++) {
5165                 if (tr->trace_flags & (1 << i))
5166                         seq_printf(m, "%s\n", trace_options[i]);
5167                 else
5168                         seq_printf(m, "no%s\n", trace_options[i]);
5169         }
5170
5171         for (i = 0; trace_opts[i].name; i++) {
5172                 if (tracer_flags & trace_opts[i].bit)
5173                         seq_printf(m, "%s\n", trace_opts[i].name);
5174                 else
5175                         seq_printf(m, "no%s\n", trace_opts[i].name);
5176         }
5177         mutex_unlock(&trace_types_lock);
5178
5179         return 0;
5180 }
5181
5182 static int __set_tracer_option(struct trace_array *tr,
5183                                struct tracer_flags *tracer_flags,
5184                                struct tracer_opt *opts, int neg)
5185 {
5186         struct tracer *trace = tracer_flags->trace;
5187         int ret;
5188
5189         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5190         if (ret)
5191                 return ret;
5192
5193         if (neg)
5194                 tracer_flags->val &= ~opts->bit;
5195         else
5196                 tracer_flags->val |= opts->bit;
5197         return 0;
5198 }
5199
5200 /* Try to assign a tracer specific option */
5201 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5202 {
5203         struct tracer *trace = tr->current_trace;
5204         struct tracer_flags *tracer_flags = trace->flags;
5205         struct tracer_opt *opts = NULL;
5206         int i;
5207
5208         for (i = 0; tracer_flags->opts[i].name; i++) {
5209                 opts = &tracer_flags->opts[i];
5210
5211                 if (strcmp(cmp, opts->name) == 0)
5212                         return __set_tracer_option(tr, trace->flags, opts, neg);
5213         }
5214
5215         return -EINVAL;
5216 }
5217
5218 /* Some tracers require overwrite to stay enabled */
5219 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5220 {
5221         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5222                 return -1;
5223
5224         return 0;
5225 }
5226
5227 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5228 {
5229         if ((mask == TRACE_ITER_RECORD_TGID) ||
5230             (mask == TRACE_ITER_RECORD_CMD))
5231                 lockdep_assert_held(&event_mutex);
5232
5233         /* do nothing if flag is already set */
5234         if (!!(tr->trace_flags & mask) == !!enabled)
5235                 return 0;
5236
5237         /* Give the tracer a chance to approve the change */
5238         if (tr->current_trace->flag_changed)
5239                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5240                         return -EINVAL;
5241
5242         if (enabled)
5243                 tr->trace_flags |= mask;
5244         else
5245                 tr->trace_flags &= ~mask;
5246
5247         if (mask == TRACE_ITER_RECORD_CMD)
5248                 trace_event_enable_cmd_record(enabled);
5249
5250         if (mask == TRACE_ITER_RECORD_TGID) {
5251                 if (!tgid_map)
5252                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5253                                            sizeof(*tgid_map),
5254                                            GFP_KERNEL);
5255                 if (!tgid_map) {
5256                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5257                         return -ENOMEM;
5258                 }
5259
5260                 trace_event_enable_tgid_record(enabled);
5261         }
5262
5263         if (mask == TRACE_ITER_EVENT_FORK)
5264                 trace_event_follow_fork(tr, enabled);
5265
5266         if (mask == TRACE_ITER_FUNC_FORK)
5267                 ftrace_pid_follow_fork(tr, enabled);
5268
5269         if (mask == TRACE_ITER_OVERWRITE) {
5270                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5271 #ifdef CONFIG_TRACER_MAX_TRACE
5272                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5273 #endif
5274         }
5275
5276         if (mask == TRACE_ITER_PRINTK) {
5277                 trace_printk_start_stop_comm(enabled);
5278                 trace_printk_control(enabled);
5279         }
5280
5281         return 0;
5282 }
5283
5284 int trace_set_options(struct trace_array *tr, char *option)
5285 {
5286         char *cmp;
5287         int neg = 0;
5288         int ret;
5289         size_t orig_len = strlen(option);
5290         int len;
5291
5292         cmp = strstrip(option);
5293
5294         len = str_has_prefix(cmp, "no");
5295         if (len)
5296                 neg = 1;
5297
5298         cmp += len;
5299
5300         mutex_lock(&event_mutex);
5301         mutex_lock(&trace_types_lock);
5302
5303         ret = match_string(trace_options, -1, cmp);
5304         /* If no option could be set, test the specific tracer options */
5305         if (ret < 0)
5306                 ret = set_tracer_option(tr, cmp, neg);
5307         else
5308                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5309
5310         mutex_unlock(&trace_types_lock);
5311         mutex_unlock(&event_mutex);
5312
5313         /*
5314          * If the first trailing whitespace is replaced with '\0' by strstrip,
5315          * turn it back into a space.
5316          */
5317         if (orig_len > strlen(option))
5318                 option[strlen(option)] = ' ';
5319
5320         return ret;
5321 }
5322
5323 static void __init apply_trace_boot_options(void)
5324 {
5325         char *buf = trace_boot_options_buf;
5326         char *option;
5327
5328         while (true) {
5329                 option = strsep(&buf, ",");
5330
5331                 if (!option)
5332                         break;
5333
5334                 if (*option)
5335                         trace_set_options(&global_trace, option);
5336
5337                 /* Put back the comma to allow this to be called again */
5338                 if (buf)
5339                         *(buf - 1) = ',';
5340         }
5341 }
5342
5343 static ssize_t
5344 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5345                         size_t cnt, loff_t *ppos)
5346 {
5347         struct seq_file *m = filp->private_data;
5348         struct trace_array *tr = m->private;
5349         char buf[64];
5350         int ret;
5351
5352         if (cnt >= sizeof(buf))
5353                 return -EINVAL;
5354
5355         if (copy_from_user(buf, ubuf, cnt))
5356                 return -EFAULT;
5357
5358         buf[cnt] = 0;
5359
5360         ret = trace_set_options(tr, buf);
5361         if (ret < 0)
5362                 return ret;
5363
5364         *ppos += cnt;
5365
5366         return cnt;
5367 }
5368
5369 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5370 {
5371         struct trace_array *tr = inode->i_private;
5372         int ret;
5373
5374         ret = tracing_check_open_get_tr(tr);
5375         if (ret)
5376                 return ret;
5377
5378         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5379         if (ret < 0)
5380                 trace_array_put(tr);
5381
5382         return ret;
5383 }
5384
5385 static const struct file_operations tracing_iter_fops = {
5386         .open           = tracing_trace_options_open,
5387         .read           = seq_read,
5388         .llseek         = seq_lseek,
5389         .release        = tracing_single_release_tr,
5390         .write          = tracing_trace_options_write,
5391 };
5392
5393 static const char readme_msg[] =
5394         "tracing mini-HOWTO:\n\n"
5395         "# echo 0 > tracing_on : quick way to disable tracing\n"
5396         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5397         " Important files:\n"
5398         "  trace\t\t\t- The static contents of the buffer\n"
5399         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5400         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5401         "  current_tracer\t- function and latency tracers\n"
5402         "  available_tracers\t- list of configured tracers for current_tracer\n"
5403         "  error_log\t- error log for failed commands (that support it)\n"
5404         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5405         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5406         "  trace_clock\t\t-change the clock used to order events\n"
5407         "       local:   Per cpu clock but may not be synced across CPUs\n"
5408         "      global:   Synced across CPUs but slows tracing down.\n"
5409         "     counter:   Not a clock, but just an increment\n"
5410         "      uptime:   Jiffy counter from time of boot\n"
5411         "        perf:   Same clock that perf events use\n"
5412 #ifdef CONFIG_X86_64
5413         "     x86-tsc:   TSC cycle counter\n"
5414 #endif
5415         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5416         "       delta:   Delta difference against a buffer-wide timestamp\n"
5417         "    absolute:   Absolute (standalone) timestamp\n"
5418         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5419         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5420         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5421         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5422         "\t\t\t  Remove sub-buffer with rmdir\n"
5423         "  trace_options\t\t- Set format or modify how tracing happens\n"
5424         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5425         "\t\t\t  option name\n"
5426         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5427 #ifdef CONFIG_DYNAMIC_FTRACE
5428         "\n  available_filter_functions - list of functions that can be filtered on\n"
5429         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5430         "\t\t\t  functions\n"
5431         "\t     accepts: func_full_name or glob-matching-pattern\n"
5432         "\t     modules: Can select a group via module\n"
5433         "\t      Format: :mod:<module-name>\n"
5434         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5435         "\t    triggers: a command to perform when function is hit\n"
5436         "\t      Format: <function>:<trigger>[:count]\n"
5437         "\t     trigger: traceon, traceoff\n"
5438         "\t\t      enable_event:<system>:<event>\n"
5439         "\t\t      disable_event:<system>:<event>\n"
5440 #ifdef CONFIG_STACKTRACE
5441         "\t\t      stacktrace\n"
5442 #endif
5443 #ifdef CONFIG_TRACER_SNAPSHOT
5444         "\t\t      snapshot\n"
5445 #endif
5446         "\t\t      dump\n"
5447         "\t\t      cpudump\n"
5448         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5449         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5450         "\t     The first one will disable tracing every time do_fault is hit\n"
5451         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5452         "\t       The first time do trap is hit and it disables tracing, the\n"
5453         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5454         "\t       the counter will not decrement. It only decrements when the\n"
5455         "\t       trigger did work\n"
5456         "\t     To remove trigger without count:\n"
5457         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5458         "\t     To remove trigger with a count:\n"
5459         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5460         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5461         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5462         "\t    modules: Can select a group via module command :mod:\n"
5463         "\t    Does not accept triggers\n"
5464 #endif /* CONFIG_DYNAMIC_FTRACE */
5465 #ifdef CONFIG_FUNCTION_TRACER
5466         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5467         "\t\t    (function)\n"
5468         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5469         "\t\t    (function)\n"
5470 #endif
5471 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5472         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5473         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5474         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5475 #endif
5476 #ifdef CONFIG_TRACER_SNAPSHOT
5477         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5478         "\t\t\t  snapshot buffer. Read the contents for more\n"
5479         "\t\t\t  information\n"
5480 #endif
5481 #ifdef CONFIG_STACK_TRACER
5482         "  stack_trace\t\t- Shows the max stack trace when active\n"
5483         "  stack_max_size\t- Shows current max stack size that was traced\n"
5484         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5485         "\t\t\t  new trace)\n"
5486 #ifdef CONFIG_DYNAMIC_FTRACE
5487         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5488         "\t\t\t  traces\n"
5489 #endif
5490 #endif /* CONFIG_STACK_TRACER */
5491 #ifdef CONFIG_DYNAMIC_EVENTS
5492         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5493         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5494 #endif
5495 #ifdef CONFIG_KPROBE_EVENTS
5496         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5497         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5498 #endif
5499 #ifdef CONFIG_UPROBE_EVENTS
5500         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5501         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5502 #endif
5503 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5504         "\t  accepts: event-definitions (one definition per line)\n"
5505         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5506         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5507 #ifdef CONFIG_HIST_TRIGGERS
5508         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5509 #endif
5510         "\t           -:[<group>/]<event>\n"
5511 #ifdef CONFIG_KPROBE_EVENTS
5512         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5513   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5514 #endif
5515 #ifdef CONFIG_UPROBE_EVENTS
5516   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5517 #endif
5518         "\t     args: <name>=fetcharg[:type]\n"
5519         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5520 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5521         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5522 #else
5523         "\t           $stack<index>, $stack, $retval, $comm,\n"
5524 #endif
5525         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5526         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5527         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5528         "\t           <type>\\[<array-size>\\]\n"
5529 #ifdef CONFIG_HIST_TRIGGERS
5530         "\t    field: <stype> <name>;\n"
5531         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5532         "\t           [unsigned] char/int/long\n"
5533 #endif
5534 #endif
5535         "  events/\t\t- Directory containing all trace event subsystems:\n"
5536         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5537         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5538         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5539         "\t\t\t  events\n"
5540         "      filter\t\t- If set, only events passing filter are traced\n"
5541         "  events/<system>/<event>/\t- Directory containing control files for\n"
5542         "\t\t\t  <event>:\n"
5543         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5544         "      filter\t\t- If set, only events passing filter are traced\n"
5545         "      trigger\t\t- If set, a command to perform when event is hit\n"
5546         "\t    Format: <trigger>[:count][if <filter>]\n"
5547         "\t   trigger: traceon, traceoff\n"
5548         "\t            enable_event:<system>:<event>\n"
5549         "\t            disable_event:<system>:<event>\n"
5550 #ifdef CONFIG_HIST_TRIGGERS
5551         "\t            enable_hist:<system>:<event>\n"
5552         "\t            disable_hist:<system>:<event>\n"
5553 #endif
5554 #ifdef CONFIG_STACKTRACE
5555         "\t\t    stacktrace\n"
5556 #endif
5557 #ifdef CONFIG_TRACER_SNAPSHOT
5558         "\t\t    snapshot\n"
5559 #endif
5560 #ifdef CONFIG_HIST_TRIGGERS
5561         "\t\t    hist (see below)\n"
5562 #endif
5563         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5564         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5565         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5566         "\t                  events/block/block_unplug/trigger\n"
5567         "\t   The first disables tracing every time block_unplug is hit.\n"
5568         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5569         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5570         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5571         "\t   Like function triggers, the counter is only decremented if it\n"
5572         "\t    enabled or disabled tracing.\n"
5573         "\t   To remove a trigger without a count:\n"
5574         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5575         "\t   To remove a trigger with a count:\n"
5576         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5577         "\t   Filters can be ignored when removing a trigger.\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5580         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5581         "\t            [:values=<field1[,field2,...]>]\n"
5582         "\t            [:sort=<field1[,field2,...]>]\n"
5583         "\t            [:size=#entries]\n"
5584         "\t            [:pause][:continue][:clear]\n"
5585         "\t            [:name=histname1]\n"
5586         "\t            [:<handler>.<action>]\n"
5587         "\t            [if <filter>]\n\n"
5588         "\t    When a matching event is hit, an entry is added to a hash\n"
5589         "\t    table using the key(s) and value(s) named, and the value of a\n"
5590         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5591         "\t    correspond to fields in the event's format description.  Keys\n"
5592         "\t    can be any field, or the special string 'stacktrace'.\n"
5593         "\t    Compound keys consisting of up to two fields can be specified\n"
5594         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5595         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5596         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5597         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5598         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5599         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5600         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5601         "\t    its histogram data will be shared with other triggers of the\n"
5602         "\t    same name, and trigger hits will update this common data.\n\n"
5603         "\t    Reading the 'hist' file for the event will dump the hash\n"
5604         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5605         "\t    triggers attached to an event, there will be a table for each\n"
5606         "\t    trigger in the output.  The table displayed for a named\n"
5607         "\t    trigger will be the same as any other instance having the\n"
5608         "\t    same name.  The default format used to display a given field\n"
5609         "\t    can be modified by appending any of the following modifiers\n"
5610         "\t    to the field name, as applicable:\n\n"
5611         "\t            .hex        display a number as a hex value\n"
5612         "\t            .sym        display an address as a symbol\n"
5613         "\t            .sym-offset display an address as a symbol and offset\n"
5614         "\t            .execname   display a common_pid as a program name\n"
5615         "\t            .syscall    display a syscall id as a syscall name\n"
5616         "\t            .log2       display log2 value rather than raw number\n"
5617         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5618         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5619         "\t    trigger or to start a hist trigger but not log any events\n"
5620         "\t    until told to do so.  'continue' can be used to start or\n"
5621         "\t    restart a paused hist trigger.\n\n"
5622         "\t    The 'clear' parameter will clear the contents of a running\n"
5623         "\t    hist trigger and leave its current paused/active state\n"
5624         "\t    unchanged.\n\n"
5625         "\t    The enable_hist and disable_hist triggers can be used to\n"
5626         "\t    have one event conditionally start and stop another event's\n"
5627         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5628         "\t    the enable_event and disable_event triggers.\n\n"
5629         "\t    Hist trigger handlers and actions are executed whenever a\n"
5630         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5631         "\t        <handler>.<action>\n\n"
5632         "\t    The available handlers are:\n\n"
5633         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5634         "\t        onmax(var)               - invoke if var exceeds current max\n"
5635         "\t        onchange(var)            - invoke action if var changes\n\n"
5636         "\t    The available actions are:\n\n"
5637         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5638         "\t        save(field,...)                      - save current event fields\n"
5639 #ifdef CONFIG_TRACER_SNAPSHOT
5640         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5641 #endif
5642 #ifdef CONFIG_SYNTH_EVENTS
5643         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5644         "\t  Write into this file to define/undefine new synthetic events.\n"
5645         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5646 #endif
5647 #endif
5648 ;
5649
5650 static ssize_t
5651 tracing_readme_read(struct file *filp, char __user *ubuf,
5652                        size_t cnt, loff_t *ppos)
5653 {
5654         return simple_read_from_buffer(ubuf, cnt, ppos,
5655                                         readme_msg, strlen(readme_msg));
5656 }
5657
5658 static const struct file_operations tracing_readme_fops = {
5659         .open           = tracing_open_generic,
5660         .read           = tracing_readme_read,
5661         .llseek         = generic_file_llseek,
5662 };
5663
5664 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5665 {
5666         int *ptr = v;
5667
5668         if (*pos || m->count)
5669                 ptr++;
5670
5671         (*pos)++;
5672
5673         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5674                 if (trace_find_tgid(*ptr))
5675                         return ptr;
5676         }
5677
5678         return NULL;
5679 }
5680
5681 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5682 {
5683         void *v;
5684         loff_t l = 0;
5685
5686         if (!tgid_map)
5687                 return NULL;
5688
5689         v = &tgid_map[0];
5690         while (l <= *pos) {
5691                 v = saved_tgids_next(m, v, &l);
5692                 if (!v)
5693                         return NULL;
5694         }
5695
5696         return v;
5697 }
5698
5699 static void saved_tgids_stop(struct seq_file *m, void *v)
5700 {
5701 }
5702
5703 static int saved_tgids_show(struct seq_file *m, void *v)
5704 {
5705         int pid = (int *)v - tgid_map;
5706
5707         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5708         return 0;
5709 }
5710
5711 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5712         .start          = saved_tgids_start,
5713         .stop           = saved_tgids_stop,
5714         .next           = saved_tgids_next,
5715         .show           = saved_tgids_show,
5716 };
5717
5718 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5719 {
5720         int ret;
5721
5722         ret = tracing_check_open_get_tr(NULL);
5723         if (ret)
5724                 return ret;
5725
5726         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5727 }
5728
5729
5730 static const struct file_operations tracing_saved_tgids_fops = {
5731         .open           = tracing_saved_tgids_open,
5732         .read           = seq_read,
5733         .llseek         = seq_lseek,
5734         .release        = seq_release,
5735 };
5736
5737 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5738 {
5739         unsigned int *ptr = v;
5740
5741         if (*pos || m->count)
5742                 ptr++;
5743
5744         (*pos)++;
5745
5746         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5747              ptr++) {
5748                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5749                         continue;
5750
5751                 return ptr;
5752         }
5753
5754         return NULL;
5755 }
5756
5757 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5758 {
5759         void *v;
5760         loff_t l = 0;
5761
5762         preempt_disable();
5763         arch_spin_lock(&trace_cmdline_lock);
5764
5765         v = &savedcmd->map_cmdline_to_pid[0];
5766         while (l <= *pos) {
5767                 v = saved_cmdlines_next(m, v, &l);
5768                 if (!v)
5769                         return NULL;
5770         }
5771
5772         return v;
5773 }
5774
5775 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5776 {
5777         arch_spin_unlock(&trace_cmdline_lock);
5778         preempt_enable();
5779 }
5780
5781 static int saved_cmdlines_show(struct seq_file *m, void *v)
5782 {
5783         char buf[TASK_COMM_LEN];
5784         unsigned int *pid = v;
5785
5786         __trace_find_cmdline(*pid, buf);
5787         seq_printf(m, "%d %s\n", *pid, buf);
5788         return 0;
5789 }
5790
5791 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5792         .start          = saved_cmdlines_start,
5793         .next           = saved_cmdlines_next,
5794         .stop           = saved_cmdlines_stop,
5795         .show           = saved_cmdlines_show,
5796 };
5797
5798 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5799 {
5800         int ret;
5801
5802         ret = tracing_check_open_get_tr(NULL);
5803         if (ret)
5804                 return ret;
5805
5806         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5807 }
5808
5809 static const struct file_operations tracing_saved_cmdlines_fops = {
5810         .open           = tracing_saved_cmdlines_open,
5811         .read           = seq_read,
5812         .llseek         = seq_lseek,
5813         .release        = seq_release,
5814 };
5815
5816 static ssize_t
5817 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5818                                  size_t cnt, loff_t *ppos)
5819 {
5820         char buf[64];
5821         int r;
5822
5823         arch_spin_lock(&trace_cmdline_lock);
5824         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5825         arch_spin_unlock(&trace_cmdline_lock);
5826
5827         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5828 }
5829
5830 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5831 {
5832         kfree(s->saved_cmdlines);
5833         kfree(s->map_cmdline_to_pid);
5834         kfree(s);
5835 }
5836
5837 static int tracing_resize_saved_cmdlines(unsigned int val)
5838 {
5839         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5840
5841         s = kmalloc(sizeof(*s), GFP_KERNEL);
5842         if (!s)
5843                 return -ENOMEM;
5844
5845         if (allocate_cmdlines_buffer(val, s) < 0) {
5846                 kfree(s);
5847                 return -ENOMEM;
5848         }
5849
5850         arch_spin_lock(&trace_cmdline_lock);
5851         savedcmd_temp = savedcmd;
5852         savedcmd = s;
5853         arch_spin_unlock(&trace_cmdline_lock);
5854         free_saved_cmdlines_buffer(savedcmd_temp);
5855
5856         return 0;
5857 }
5858
5859 static ssize_t
5860 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5861                                   size_t cnt, loff_t *ppos)
5862 {
5863         unsigned long val;
5864         int ret;
5865
5866         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5867         if (ret)
5868                 return ret;
5869
5870         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5871         if (!val || val > PID_MAX_DEFAULT)
5872                 return -EINVAL;
5873
5874         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5875         if (ret < 0)
5876                 return ret;
5877
5878         *ppos += cnt;
5879
5880         return cnt;
5881 }
5882
5883 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5884         .open           = tracing_open_generic,
5885         .read           = tracing_saved_cmdlines_size_read,
5886         .write          = tracing_saved_cmdlines_size_write,
5887 };
5888
5889 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5890 static union trace_eval_map_item *
5891 update_eval_map(union trace_eval_map_item *ptr)
5892 {
5893         if (!ptr->map.eval_string) {
5894                 if (ptr->tail.next) {
5895                         ptr = ptr->tail.next;
5896                         /* Set ptr to the next real item (skip head) */
5897                         ptr++;
5898                 } else
5899                         return NULL;
5900         }
5901         return ptr;
5902 }
5903
5904 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5905 {
5906         union trace_eval_map_item *ptr = v;
5907
5908         /*
5909          * Paranoid! If ptr points to end, we don't want to increment past it.
5910          * This really should never happen.
5911          */
5912         (*pos)++;
5913         ptr = update_eval_map(ptr);
5914         if (WARN_ON_ONCE(!ptr))
5915                 return NULL;
5916
5917         ptr++;
5918         ptr = update_eval_map(ptr);
5919
5920         return ptr;
5921 }
5922
5923 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5924 {
5925         union trace_eval_map_item *v;
5926         loff_t l = 0;
5927
5928         mutex_lock(&trace_eval_mutex);
5929
5930         v = trace_eval_maps;
5931         if (v)
5932                 v++;
5933
5934         while (v && l < *pos) {
5935                 v = eval_map_next(m, v, &l);
5936         }
5937
5938         return v;
5939 }
5940
5941 static void eval_map_stop(struct seq_file *m, void *v)
5942 {
5943         mutex_unlock(&trace_eval_mutex);
5944 }
5945
5946 static int eval_map_show(struct seq_file *m, void *v)
5947 {
5948         union trace_eval_map_item *ptr = v;
5949
5950         seq_printf(m, "%s %ld (%s)\n",
5951                    ptr->map.eval_string, ptr->map.eval_value,
5952                    ptr->map.system);
5953
5954         return 0;
5955 }
5956
5957 static const struct seq_operations tracing_eval_map_seq_ops = {
5958         .start          = eval_map_start,
5959         .next           = eval_map_next,
5960         .stop           = eval_map_stop,
5961         .show           = eval_map_show,
5962 };
5963
5964 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5965 {
5966         int ret;
5967
5968         ret = tracing_check_open_get_tr(NULL);
5969         if (ret)
5970                 return ret;
5971
5972         return seq_open(filp, &tracing_eval_map_seq_ops);
5973 }
5974
5975 static const struct file_operations tracing_eval_map_fops = {
5976         .open           = tracing_eval_map_open,
5977         .read           = seq_read,
5978         .llseek         = seq_lseek,
5979         .release        = seq_release,
5980 };
5981
5982 static inline union trace_eval_map_item *
5983 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5984 {
5985         /* Return tail of array given the head */
5986         return ptr + ptr->head.length + 1;
5987 }
5988
5989 static void
5990 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5991                            int len)
5992 {
5993         struct trace_eval_map **stop;
5994         struct trace_eval_map **map;
5995         union trace_eval_map_item *map_array;
5996         union trace_eval_map_item *ptr;
5997
5998         stop = start + len;
5999
6000         /*
6001          * The trace_eval_maps contains the map plus a head and tail item,
6002          * where the head holds the module and length of array, and the
6003          * tail holds a pointer to the next list.
6004          */
6005         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6006         if (!map_array) {
6007                 pr_warn("Unable to allocate trace eval mapping\n");
6008                 return;
6009         }
6010
6011         mutex_lock(&trace_eval_mutex);
6012
6013         if (!trace_eval_maps)
6014                 trace_eval_maps = map_array;
6015         else {
6016                 ptr = trace_eval_maps;
6017                 for (;;) {
6018                         ptr = trace_eval_jmp_to_tail(ptr);
6019                         if (!ptr->tail.next)
6020                                 break;
6021                         ptr = ptr->tail.next;
6022
6023                 }
6024                 ptr->tail.next = map_array;
6025         }
6026         map_array->head.mod = mod;
6027         map_array->head.length = len;
6028         map_array++;
6029
6030         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6031                 map_array->map = **map;
6032                 map_array++;
6033         }
6034         memset(map_array, 0, sizeof(*map_array));
6035
6036         mutex_unlock(&trace_eval_mutex);
6037 }
6038
6039 static void trace_create_eval_file(struct dentry *d_tracer)
6040 {
6041         trace_create_file("eval_map", 0444, d_tracer,
6042                           NULL, &tracing_eval_map_fops);
6043 }
6044
6045 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6046 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6047 static inline void trace_insert_eval_map_file(struct module *mod,
6048                               struct trace_eval_map **start, int len) { }
6049 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6050
6051 static void trace_insert_eval_map(struct module *mod,
6052                                   struct trace_eval_map **start, int len)
6053 {
6054         struct trace_eval_map **map;
6055
6056         if (len <= 0)
6057                 return;
6058
6059         map = start;
6060
6061         trace_event_eval_update(map, len);
6062
6063         trace_insert_eval_map_file(mod, start, len);
6064 }
6065
6066 static ssize_t
6067 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6068                        size_t cnt, loff_t *ppos)
6069 {
6070         struct trace_array *tr = filp->private_data;
6071         char buf[MAX_TRACER_SIZE+2];
6072         int r;
6073
6074         mutex_lock(&trace_types_lock);
6075         r = sprintf(buf, "%s\n", tr->current_trace->name);
6076         mutex_unlock(&trace_types_lock);
6077
6078         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6079 }
6080
6081 int tracer_init(struct tracer *t, struct trace_array *tr)
6082 {
6083         tracing_reset_online_cpus(&tr->array_buffer);
6084         return t->init(tr);
6085 }
6086
6087 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6088 {
6089         int cpu;
6090
6091         for_each_tracing_cpu(cpu)
6092                 per_cpu_ptr(buf->data, cpu)->entries = val;
6093 }
6094
6095 #ifdef CONFIG_TRACER_MAX_TRACE
6096 /* resize @tr's buffer to the size of @size_tr's entries */
6097 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6098                                         struct array_buffer *size_buf, int cpu_id)
6099 {
6100         int cpu, ret = 0;
6101
6102         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6103                 for_each_tracing_cpu(cpu) {
6104                         ret = ring_buffer_resize(trace_buf->buffer,
6105                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6106                         if (ret < 0)
6107                                 break;
6108                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6109                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6110                 }
6111         } else {
6112                 ret = ring_buffer_resize(trace_buf->buffer,
6113                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6114                 if (ret == 0)
6115                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6116                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6117         }
6118
6119         return ret;
6120 }
6121 #endif /* CONFIG_TRACER_MAX_TRACE */
6122
6123 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6124                                         unsigned long size, int cpu)
6125 {
6126         int ret;
6127
6128         /*
6129          * If kernel or user changes the size of the ring buffer
6130          * we use the size that was given, and we can forget about
6131          * expanding it later.
6132          */
6133         ring_buffer_expanded = true;
6134
6135         /* May be called before buffers are initialized */
6136         if (!tr->array_buffer.buffer)
6137                 return 0;
6138
6139         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6140         if (ret < 0)
6141                 return ret;
6142
6143 #ifdef CONFIG_TRACER_MAX_TRACE
6144         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6145             !tr->current_trace->use_max_tr)
6146                 goto out;
6147
6148         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6149         if (ret < 0) {
6150                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6151                                                      &tr->array_buffer, cpu);
6152                 if (r < 0) {
6153                         /*
6154                          * AARGH! We are left with different
6155                          * size max buffer!!!!
6156                          * The max buffer is our "snapshot" buffer.
6157                          * When a tracer needs a snapshot (one of the
6158                          * latency tracers), it swaps the max buffer
6159                          * with the saved snap shot. We succeeded to
6160                          * update the size of the main buffer, but failed to
6161                          * update the size of the max buffer. But when we tried
6162                          * to reset the main buffer to the original size, we
6163                          * failed there too. This is very unlikely to
6164                          * happen, but if it does, warn and kill all
6165                          * tracing.
6166                          */
6167                         WARN_ON(1);
6168                         tracing_disabled = 1;
6169                 }
6170                 return ret;
6171         }
6172
6173         if (cpu == RING_BUFFER_ALL_CPUS)
6174                 set_buffer_entries(&tr->max_buffer, size);
6175         else
6176                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6177
6178  out:
6179 #endif /* CONFIG_TRACER_MAX_TRACE */
6180
6181         if (cpu == RING_BUFFER_ALL_CPUS)
6182                 set_buffer_entries(&tr->array_buffer, size);
6183         else
6184                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6185
6186         return ret;
6187 }
6188
6189 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6190                                   unsigned long size, int cpu_id)
6191 {
6192         int ret;
6193
6194         mutex_lock(&trace_types_lock);
6195
6196         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6197                 /* make sure, this cpu is enabled in the mask */
6198                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6199                         ret = -EINVAL;
6200                         goto out;
6201                 }
6202         }
6203
6204         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6205         if (ret < 0)
6206                 ret = -ENOMEM;
6207
6208 out:
6209         mutex_unlock(&trace_types_lock);
6210
6211         return ret;
6212 }
6213
6214
6215 /**
6216  * tracing_update_buffers - used by tracing facility to expand ring buffers
6217  *
6218  * To save on memory when the tracing is never used on a system with it
6219  * configured in. The ring buffers are set to a minimum size. But once
6220  * a user starts to use the tracing facility, then they need to grow
6221  * to their default size.
6222  *
6223  * This function is to be called when a tracer is about to be used.
6224  */
6225 int tracing_update_buffers(void)
6226 {
6227         int ret = 0;
6228
6229         mutex_lock(&trace_types_lock);
6230         if (!ring_buffer_expanded)
6231                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6232                                                 RING_BUFFER_ALL_CPUS);
6233         mutex_unlock(&trace_types_lock);
6234
6235         return ret;
6236 }
6237
6238 struct trace_option_dentry;
6239
6240 static void
6241 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6242
6243 /*
6244  * Used to clear out the tracer before deletion of an instance.
6245  * Must have trace_types_lock held.
6246  */
6247 static void tracing_set_nop(struct trace_array *tr)
6248 {
6249         if (tr->current_trace == &nop_trace)
6250                 return;
6251         
6252         tr->current_trace->enabled--;
6253
6254         if (tr->current_trace->reset)
6255                 tr->current_trace->reset(tr);
6256
6257         tr->current_trace = &nop_trace;
6258 }
6259
6260 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6261 {
6262         /* Only enable if the directory has been created already. */
6263         if (!tr->dir)
6264                 return;
6265
6266         create_trace_option_files(tr, t);
6267 }
6268
6269 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6270 {
6271         struct tracer *t;
6272 #ifdef CONFIG_TRACER_MAX_TRACE
6273         bool had_max_tr;
6274 #endif
6275         int ret = 0;
6276
6277         mutex_lock(&trace_types_lock);
6278
6279         if (!ring_buffer_expanded) {
6280                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6281                                                 RING_BUFFER_ALL_CPUS);
6282                 if (ret < 0)
6283                         goto out;
6284                 ret = 0;
6285         }
6286
6287         for (t = trace_types; t; t = t->next) {
6288                 if (strcmp(t->name, buf) == 0)
6289                         break;
6290         }
6291         if (!t) {
6292                 ret = -EINVAL;
6293                 goto out;
6294         }
6295         if (t == tr->current_trace)
6296                 goto out;
6297
6298 #ifdef CONFIG_TRACER_SNAPSHOT
6299         if (t->use_max_tr) {
6300                 arch_spin_lock(&tr->max_lock);
6301                 if (tr->cond_snapshot)
6302                         ret = -EBUSY;
6303                 arch_spin_unlock(&tr->max_lock);
6304                 if (ret)
6305                         goto out;
6306         }
6307 #endif
6308         /* Some tracers won't work on kernel command line */
6309         if (system_state < SYSTEM_RUNNING && t->noboot) {
6310                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6311                         t->name);
6312                 goto out;
6313         }
6314
6315         /* Some tracers are only allowed for the top level buffer */
6316         if (!trace_ok_for_array(t, tr)) {
6317                 ret = -EINVAL;
6318                 goto out;
6319         }
6320
6321         /* If trace pipe files are being read, we can't change the tracer */
6322         if (tr->trace_ref) {
6323                 ret = -EBUSY;
6324                 goto out;
6325         }
6326
6327         trace_branch_disable();
6328
6329         tr->current_trace->enabled--;
6330
6331         if (tr->current_trace->reset)
6332                 tr->current_trace->reset(tr);
6333
6334         /* Current trace needs to be nop_trace before synchronize_rcu */
6335         tr->current_trace = &nop_trace;
6336
6337 #ifdef CONFIG_TRACER_MAX_TRACE
6338         had_max_tr = tr->allocated_snapshot;
6339
6340         if (had_max_tr && !t->use_max_tr) {
6341                 /*
6342                  * We need to make sure that the update_max_tr sees that
6343                  * current_trace changed to nop_trace to keep it from
6344                  * swapping the buffers after we resize it.
6345                  * The update_max_tr is called from interrupts disabled
6346                  * so a synchronized_sched() is sufficient.
6347                  */
6348                 synchronize_rcu();
6349                 free_snapshot(tr);
6350         }
6351 #endif
6352
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354         if (t->use_max_tr && !had_max_tr) {
6355                 ret = tracing_alloc_snapshot_instance(tr);
6356                 if (ret < 0)
6357                         goto out;
6358         }
6359 #endif
6360
6361         if (t->init) {
6362                 ret = tracer_init(t, tr);
6363                 if (ret)
6364                         goto out;
6365         }
6366
6367         tr->current_trace = t;
6368         tr->current_trace->enabled++;
6369         trace_branch_enable(tr);
6370  out:
6371         mutex_unlock(&trace_types_lock);
6372
6373         return ret;
6374 }
6375
6376 static ssize_t
6377 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6378                         size_t cnt, loff_t *ppos)
6379 {
6380         struct trace_array *tr = filp->private_data;
6381         char buf[MAX_TRACER_SIZE+1];
6382         int i;
6383         size_t ret;
6384         int err;
6385
6386         ret = cnt;
6387
6388         if (cnt > MAX_TRACER_SIZE)
6389                 cnt = MAX_TRACER_SIZE;
6390
6391         if (copy_from_user(buf, ubuf, cnt))
6392                 return -EFAULT;
6393
6394         buf[cnt] = 0;
6395
6396         /* strip ending whitespace. */
6397         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6398                 buf[i] = 0;
6399
6400         err = tracing_set_tracer(tr, buf);
6401         if (err)
6402                 return err;
6403
6404         *ppos += ret;
6405
6406         return ret;
6407 }
6408
6409 static ssize_t
6410 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6411                    size_t cnt, loff_t *ppos)
6412 {
6413         char buf[64];
6414         int r;
6415
6416         r = snprintf(buf, sizeof(buf), "%ld\n",
6417                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6418         if (r > sizeof(buf))
6419                 r = sizeof(buf);
6420         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6421 }
6422
6423 static ssize_t
6424 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6425                     size_t cnt, loff_t *ppos)
6426 {
6427         unsigned long val;
6428         int ret;
6429
6430         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6431         if (ret)
6432                 return ret;
6433
6434         *ptr = val * 1000;
6435
6436         return cnt;
6437 }
6438
6439 static ssize_t
6440 tracing_thresh_read(struct file *filp, char __user *ubuf,
6441                     size_t cnt, loff_t *ppos)
6442 {
6443         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6444 }
6445
6446 static ssize_t
6447 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6448                      size_t cnt, loff_t *ppos)
6449 {
6450         struct trace_array *tr = filp->private_data;
6451         int ret;
6452
6453         mutex_lock(&trace_types_lock);
6454         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6455         if (ret < 0)
6456                 goto out;
6457
6458         if (tr->current_trace->update_thresh) {
6459                 ret = tr->current_trace->update_thresh(tr);
6460                 if (ret < 0)
6461                         goto out;
6462         }
6463
6464         ret = cnt;
6465 out:
6466         mutex_unlock(&trace_types_lock);
6467
6468         return ret;
6469 }
6470
6471 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6472
6473 static ssize_t
6474 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6475                      size_t cnt, loff_t *ppos)
6476 {
6477         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6478 }
6479
6480 static ssize_t
6481 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6482                       size_t cnt, loff_t *ppos)
6483 {
6484         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6485 }
6486
6487 #endif
6488
6489 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6490 {
6491         struct trace_array *tr = inode->i_private;
6492         struct trace_iterator *iter;
6493         int ret;
6494
6495         ret = tracing_check_open_get_tr(tr);
6496         if (ret)
6497                 return ret;
6498
6499         mutex_lock(&trace_types_lock);
6500
6501         /* create a buffer to store the information to pass to userspace */
6502         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6503         if (!iter) {
6504                 ret = -ENOMEM;
6505                 __trace_array_put(tr);
6506                 goto out;
6507         }
6508
6509         trace_seq_init(&iter->seq);
6510         iter->trace = tr->current_trace;
6511
6512         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6513                 ret = -ENOMEM;
6514                 goto fail;
6515         }
6516
6517         /* trace pipe does not show start of buffer */
6518         cpumask_setall(iter->started);
6519
6520         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6521                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6522
6523         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6524         if (trace_clocks[tr->clock_id].in_ns)
6525                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6526
6527         iter->tr = tr;
6528         iter->array_buffer = &tr->array_buffer;
6529         iter->cpu_file = tracing_get_cpu(inode);
6530         mutex_init(&iter->mutex);
6531         filp->private_data = iter;
6532
6533         if (iter->trace->pipe_open)
6534                 iter->trace->pipe_open(iter);
6535
6536         nonseekable_open(inode, filp);
6537
6538         tr->trace_ref++;
6539 out:
6540         mutex_unlock(&trace_types_lock);
6541         return ret;
6542
6543 fail:
6544         kfree(iter);
6545         __trace_array_put(tr);
6546         mutex_unlock(&trace_types_lock);
6547         return ret;
6548 }
6549
6550 static int tracing_release_pipe(struct inode *inode, struct file *file)
6551 {
6552         struct trace_iterator *iter = file->private_data;
6553         struct trace_array *tr = inode->i_private;
6554
6555         mutex_lock(&trace_types_lock);
6556
6557         tr->trace_ref--;
6558
6559         if (iter->trace->pipe_close)
6560                 iter->trace->pipe_close(iter);
6561
6562         mutex_unlock(&trace_types_lock);
6563
6564         free_cpumask_var(iter->started);
6565         mutex_destroy(&iter->mutex);
6566         kfree(iter);
6567
6568         trace_array_put(tr);
6569
6570         return 0;
6571 }
6572
6573 static __poll_t
6574 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6575 {
6576         struct trace_array *tr = iter->tr;
6577
6578         /* Iterators are static, they should be filled or empty */
6579         if (trace_buffer_iter(iter, iter->cpu_file))
6580                 return EPOLLIN | EPOLLRDNORM;
6581
6582         if (tr->trace_flags & TRACE_ITER_BLOCK)
6583                 /*
6584                  * Always select as readable when in blocking mode
6585                  */
6586                 return EPOLLIN | EPOLLRDNORM;
6587         else
6588                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6589                                              filp, poll_table);
6590 }
6591
6592 static __poll_t
6593 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6594 {
6595         struct trace_iterator *iter = filp->private_data;
6596
6597         return trace_poll(iter, filp, poll_table);
6598 }
6599
6600 /* Must be called with iter->mutex held. */
6601 static int tracing_wait_pipe(struct file *filp)
6602 {
6603         struct trace_iterator *iter = filp->private_data;
6604         int ret;
6605
6606         while (trace_empty(iter)) {
6607
6608                 if ((filp->f_flags & O_NONBLOCK)) {
6609                         return -EAGAIN;
6610                 }
6611
6612                 /*
6613                  * We block until we read something and tracing is disabled.
6614                  * We still block if tracing is disabled, but we have never
6615                  * read anything. This allows a user to cat this file, and
6616                  * then enable tracing. But after we have read something,
6617                  * we give an EOF when tracing is again disabled.
6618                  *
6619                  * iter->pos will be 0 if we haven't read anything.
6620                  */
6621                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6622                         break;
6623
6624                 mutex_unlock(&iter->mutex);
6625
6626                 ret = wait_on_pipe(iter, 0);
6627
6628                 mutex_lock(&iter->mutex);
6629
6630                 if (ret)
6631                         return ret;
6632         }
6633
6634         return 1;
6635 }
6636
6637 /*
6638  * Consumer reader.
6639  */
6640 static ssize_t
6641 tracing_read_pipe(struct file *filp, char __user *ubuf,
6642                   size_t cnt, loff_t *ppos)
6643 {
6644         struct trace_iterator *iter = filp->private_data;
6645         ssize_t sret;
6646
6647         /*
6648          * Avoid more than one consumer on a single file descriptor
6649          * This is just a matter of traces coherency, the ring buffer itself
6650          * is protected.
6651          */
6652         mutex_lock(&iter->mutex);
6653
6654         /* return any leftover data */
6655         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6656         if (sret != -EBUSY)
6657                 goto out;
6658
6659         trace_seq_init(&iter->seq);
6660
6661         if (iter->trace->read) {
6662                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6663                 if (sret)
6664                         goto out;
6665         }
6666
6667 waitagain:
6668         sret = tracing_wait_pipe(filp);
6669         if (sret <= 0)
6670                 goto out;
6671
6672         /* stop when tracing is finished */
6673         if (trace_empty(iter)) {
6674                 sret = 0;
6675                 goto out;
6676         }
6677
6678         if (cnt >= PAGE_SIZE)
6679                 cnt = PAGE_SIZE - 1;
6680
6681         /* reset all but tr, trace, and overruns */
6682         memset(&iter->seq, 0,
6683                sizeof(struct trace_iterator) -
6684                offsetof(struct trace_iterator, seq));
6685         cpumask_clear(iter->started);
6686         trace_seq_init(&iter->seq);
6687         iter->pos = -1;
6688
6689         trace_event_read_lock();
6690         trace_access_lock(iter->cpu_file);
6691         while (trace_find_next_entry_inc(iter) != NULL) {
6692                 enum print_line_t ret;
6693                 int save_len = iter->seq.seq.len;
6694
6695                 ret = print_trace_line(iter);
6696                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6697                         /* don't print partial lines */
6698                         iter->seq.seq.len = save_len;
6699                         break;
6700                 }
6701                 if (ret != TRACE_TYPE_NO_CONSUME)
6702                         trace_consume(iter);
6703
6704                 if (trace_seq_used(&iter->seq) >= cnt)
6705                         break;
6706
6707                 /*
6708                  * Setting the full flag means we reached the trace_seq buffer
6709                  * size and we should leave by partial output condition above.
6710                  * One of the trace_seq_* functions is not used properly.
6711                  */
6712                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6713                           iter->ent->type);
6714         }
6715         trace_access_unlock(iter->cpu_file);
6716         trace_event_read_unlock();
6717
6718         /* Now copy what we have to the user */
6719         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6720         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6721                 trace_seq_init(&iter->seq);
6722
6723         /*
6724          * If there was nothing to send to user, in spite of consuming trace
6725          * entries, go back to wait for more entries.
6726          */
6727         if (sret == -EBUSY)
6728                 goto waitagain;
6729
6730 out:
6731         mutex_unlock(&iter->mutex);
6732
6733         return sret;
6734 }
6735
6736 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6737                                      unsigned int idx)
6738 {
6739         __free_page(spd->pages[idx]);
6740 }
6741
6742 static size_t
6743 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6744 {
6745         size_t count;
6746         int save_len;
6747         int ret;
6748
6749         /* Seq buffer is page-sized, exactly what we need. */
6750         for (;;) {
6751                 save_len = iter->seq.seq.len;
6752                 ret = print_trace_line(iter);
6753
6754                 if (trace_seq_has_overflowed(&iter->seq)) {
6755                         iter->seq.seq.len = save_len;
6756                         break;
6757                 }
6758
6759                 /*
6760                  * This should not be hit, because it should only
6761                  * be set if the iter->seq overflowed. But check it
6762                  * anyway to be safe.
6763                  */
6764                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6765                         iter->seq.seq.len = save_len;
6766                         break;
6767                 }
6768
6769                 count = trace_seq_used(&iter->seq) - save_len;
6770                 if (rem < count) {
6771                         rem = 0;
6772                         iter->seq.seq.len = save_len;
6773                         break;
6774                 }
6775
6776                 if (ret != TRACE_TYPE_NO_CONSUME)
6777                         trace_consume(iter);
6778                 rem -= count;
6779                 if (!trace_find_next_entry_inc(iter))   {
6780                         rem = 0;
6781                         iter->ent = NULL;
6782                         break;
6783                 }
6784         }
6785
6786         return rem;
6787 }
6788
6789 static ssize_t tracing_splice_read_pipe(struct file *filp,
6790                                         loff_t *ppos,
6791                                         struct pipe_inode_info *pipe,
6792                                         size_t len,
6793                                         unsigned int flags)
6794 {
6795         struct page *pages_def[PIPE_DEF_BUFFERS];
6796         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6797         struct trace_iterator *iter = filp->private_data;
6798         struct splice_pipe_desc spd = {
6799                 .pages          = pages_def,
6800                 .partial        = partial_def,
6801                 .nr_pages       = 0, /* This gets updated below. */
6802                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6803                 .ops            = &default_pipe_buf_ops,
6804                 .spd_release    = tracing_spd_release_pipe,
6805         };
6806         ssize_t ret;
6807         size_t rem;
6808         unsigned int i;
6809
6810         if (splice_grow_spd(pipe, &spd))
6811                 return -ENOMEM;
6812
6813         mutex_lock(&iter->mutex);
6814
6815         if (iter->trace->splice_read) {
6816                 ret = iter->trace->splice_read(iter, filp,
6817                                                ppos, pipe, len, flags);
6818                 if (ret)
6819                         goto out_err;
6820         }
6821
6822         ret = tracing_wait_pipe(filp);
6823         if (ret <= 0)
6824                 goto out_err;
6825
6826         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6827                 ret = -EFAULT;
6828                 goto out_err;
6829         }
6830
6831         trace_event_read_lock();
6832         trace_access_lock(iter->cpu_file);
6833
6834         /* Fill as many pages as possible. */
6835         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6836                 spd.pages[i] = alloc_page(GFP_KERNEL);
6837                 if (!spd.pages[i])
6838                         break;
6839
6840                 rem = tracing_fill_pipe_page(rem, iter);
6841
6842                 /* Copy the data into the page, so we can start over. */
6843                 ret = trace_seq_to_buffer(&iter->seq,
6844                                           page_address(spd.pages[i]),
6845                                           trace_seq_used(&iter->seq));
6846                 if (ret < 0) {
6847                         __free_page(spd.pages[i]);
6848                         break;
6849                 }
6850                 spd.partial[i].offset = 0;
6851                 spd.partial[i].len = trace_seq_used(&iter->seq);
6852
6853                 trace_seq_init(&iter->seq);
6854         }
6855
6856         trace_access_unlock(iter->cpu_file);
6857         trace_event_read_unlock();
6858         mutex_unlock(&iter->mutex);
6859
6860         spd.nr_pages = i;
6861
6862         if (i)
6863                 ret = splice_to_pipe(pipe, &spd);
6864         else
6865                 ret = 0;
6866 out:
6867         splice_shrink_spd(&spd);
6868         return ret;
6869
6870 out_err:
6871         mutex_unlock(&iter->mutex);
6872         goto out;
6873 }
6874
6875 static ssize_t
6876 tracing_entries_read(struct file *filp, char __user *ubuf,
6877                      size_t cnt, loff_t *ppos)
6878 {
6879         struct inode *inode = file_inode(filp);
6880         struct trace_array *tr = inode->i_private;
6881         int cpu = tracing_get_cpu(inode);
6882         char buf[64];
6883         int r = 0;
6884         ssize_t ret;
6885
6886         mutex_lock(&trace_types_lock);
6887
6888         if (cpu == RING_BUFFER_ALL_CPUS) {
6889                 int cpu, buf_size_same;
6890                 unsigned long size;
6891
6892                 size = 0;
6893                 buf_size_same = 1;
6894                 /* check if all cpu sizes are same */
6895                 for_each_tracing_cpu(cpu) {
6896                         /* fill in the size from first enabled cpu */
6897                         if (size == 0)
6898                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6899                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6900                                 buf_size_same = 0;
6901                                 break;
6902                         }
6903                 }
6904
6905                 if (buf_size_same) {
6906                         if (!ring_buffer_expanded)
6907                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6908                                             size >> 10,
6909                                             trace_buf_size >> 10);
6910                         else
6911                                 r = sprintf(buf, "%lu\n", size >> 10);
6912                 } else
6913                         r = sprintf(buf, "X\n");
6914         } else
6915                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6916
6917         mutex_unlock(&trace_types_lock);
6918
6919         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6920         return ret;
6921 }
6922
6923 static ssize_t
6924 tracing_entries_write(struct file *filp, const char __user *ubuf,
6925                       size_t cnt, loff_t *ppos)
6926 {
6927         struct inode *inode = file_inode(filp);
6928         struct trace_array *tr = inode->i_private;
6929         unsigned long val;
6930         int ret;
6931
6932         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6933         if (ret)
6934                 return ret;
6935
6936         /* must have at least 1 entry */
6937         if (!val)
6938                 return -EINVAL;
6939
6940         /* value is in KB */
6941         val <<= 10;
6942         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6943         if (ret < 0)
6944                 return ret;
6945
6946         *ppos += cnt;
6947
6948         return cnt;
6949 }
6950
6951 static ssize_t
6952 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6953                                 size_t cnt, loff_t *ppos)
6954 {
6955         struct trace_array *tr = filp->private_data;
6956         char buf[64];
6957         int r, cpu;
6958         unsigned long size = 0, expanded_size = 0;
6959
6960         mutex_lock(&trace_types_lock);
6961         for_each_tracing_cpu(cpu) {
6962                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6963                 if (!ring_buffer_expanded)
6964                         expanded_size += trace_buf_size >> 10;
6965         }
6966         if (ring_buffer_expanded)
6967                 r = sprintf(buf, "%lu\n", size);
6968         else
6969                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6970         mutex_unlock(&trace_types_lock);
6971
6972         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6973 }
6974
6975 static ssize_t
6976 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6977                           size_t cnt, loff_t *ppos)
6978 {
6979         /*
6980          * There is no need to read what the user has written, this function
6981          * is just to make sure that there is no error when "echo" is used
6982          */
6983
6984         *ppos += cnt;
6985
6986         return cnt;
6987 }
6988
6989 static int
6990 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6991 {
6992         struct trace_array *tr = inode->i_private;
6993
6994         /* disable tracing ? */
6995         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6996                 tracer_tracing_off(tr);
6997         /* resize the ring buffer to 0 */
6998         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6999
7000         trace_array_put(tr);
7001
7002         return 0;
7003 }
7004
7005 static ssize_t
7006 tracing_mark_write(struct file *filp, const char __user *ubuf,
7007                                         size_t cnt, loff_t *fpos)
7008 {
7009         struct trace_array *tr = filp->private_data;
7010         struct ring_buffer_event *event;
7011         enum event_trigger_type tt = ETT_NONE;
7012         struct trace_buffer *buffer;
7013         struct print_entry *entry;
7014         ssize_t written;
7015         int size;
7016         int len;
7017
7018 /* Used in tracing_mark_raw_write() as well */
7019 #define FAULTED_STR "<faulted>"
7020 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7021
7022         if (tracing_disabled)
7023                 return -EINVAL;
7024
7025         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7026                 return -EINVAL;
7027
7028         if (cnt > TRACE_BUF_SIZE)
7029                 cnt = TRACE_BUF_SIZE;
7030
7031         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7032
7033         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7034
7035         /* If less than "<faulted>", then make sure we can still add that */
7036         if (cnt < FAULTED_SIZE)
7037                 size += FAULTED_SIZE - cnt;
7038
7039         buffer = tr->array_buffer.buffer;
7040         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7041                                             tracing_gen_ctx());
7042         if (unlikely(!event))
7043                 /* Ring buffer disabled, return as if not open for write */
7044                 return -EBADF;
7045
7046         entry = ring_buffer_event_data(event);
7047         entry->ip = _THIS_IP_;
7048
7049         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7050         if (len) {
7051                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7052                 cnt = FAULTED_SIZE;
7053                 written = -EFAULT;
7054         } else
7055                 written = cnt;
7056
7057         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7058                 /* do not add \n before testing triggers, but add \0 */
7059                 entry->buf[cnt] = '\0';
7060                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7061         }
7062
7063         if (entry->buf[cnt - 1] != '\n') {
7064                 entry->buf[cnt] = '\n';
7065                 entry->buf[cnt + 1] = '\0';
7066         } else
7067                 entry->buf[cnt] = '\0';
7068
7069         if (static_branch_unlikely(&trace_marker_exports_enabled))
7070                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7071         __buffer_unlock_commit(buffer, event);
7072
7073         if (tt)
7074                 event_triggers_post_call(tr->trace_marker_file, tt);
7075
7076         if (written > 0)
7077                 *fpos += written;
7078
7079         return written;
7080 }
7081
7082 /* Limit it for now to 3K (including tag) */
7083 #define RAW_DATA_MAX_SIZE (1024*3)
7084
7085 static ssize_t
7086 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7087                                         size_t cnt, loff_t *fpos)
7088 {
7089         struct trace_array *tr = filp->private_data;
7090         struct ring_buffer_event *event;
7091         struct trace_buffer *buffer;
7092         struct raw_data_entry *entry;
7093         ssize_t written;
7094         int size;
7095         int len;
7096
7097 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7098
7099         if (tracing_disabled)
7100                 return -EINVAL;
7101
7102         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7103                 return -EINVAL;
7104
7105         /* The marker must at least have a tag id */
7106         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7107                 return -EINVAL;
7108
7109         if (cnt > TRACE_BUF_SIZE)
7110                 cnt = TRACE_BUF_SIZE;
7111
7112         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7113
7114         size = sizeof(*entry) + cnt;
7115         if (cnt < FAULT_SIZE_ID)
7116                 size += FAULT_SIZE_ID - cnt;
7117
7118         buffer = tr->array_buffer.buffer;
7119         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7120                                             tracing_gen_ctx());
7121         if (!event)
7122                 /* Ring buffer disabled, return as if not open for write */
7123                 return -EBADF;
7124
7125         entry = ring_buffer_event_data(event);
7126
7127         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7128         if (len) {
7129                 entry->id = -1;
7130                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7131                 written = -EFAULT;
7132         } else
7133                 written = cnt;
7134
7135         __buffer_unlock_commit(buffer, event);
7136
7137         if (written > 0)
7138                 *fpos += written;
7139
7140         return written;
7141 }
7142
7143 static int tracing_clock_show(struct seq_file *m, void *v)
7144 {
7145         struct trace_array *tr = m->private;
7146         int i;
7147
7148         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7149                 seq_printf(m,
7150                         "%s%s%s%s", i ? " " : "",
7151                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7152                         i == tr->clock_id ? "]" : "");
7153         seq_putc(m, '\n');
7154
7155         return 0;
7156 }
7157
7158 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7159 {
7160         int i;
7161
7162         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7163                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7164                         break;
7165         }
7166         if (i == ARRAY_SIZE(trace_clocks))
7167                 return -EINVAL;
7168
7169         mutex_lock(&trace_types_lock);
7170
7171         tr->clock_id = i;
7172
7173         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7174
7175         /*
7176          * New clock may not be consistent with the previous clock.
7177          * Reset the buffer so that it doesn't have incomparable timestamps.
7178          */
7179         tracing_reset_online_cpus(&tr->array_buffer);
7180
7181 #ifdef CONFIG_TRACER_MAX_TRACE
7182         if (tr->max_buffer.buffer)
7183                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7184         tracing_reset_online_cpus(&tr->max_buffer);
7185 #endif
7186
7187         mutex_unlock(&trace_types_lock);
7188
7189         return 0;
7190 }
7191
7192 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7193                                    size_t cnt, loff_t *fpos)
7194 {
7195         struct seq_file *m = filp->private_data;
7196         struct trace_array *tr = m->private;
7197         char buf[64];
7198         const char *clockstr;
7199         int ret;
7200
7201         if (cnt >= sizeof(buf))
7202                 return -EINVAL;
7203
7204         if (copy_from_user(buf, ubuf, cnt))
7205                 return -EFAULT;
7206
7207         buf[cnt] = 0;
7208
7209         clockstr = strstrip(buf);
7210
7211         ret = tracing_set_clock(tr, clockstr);
7212         if (ret)
7213                 return ret;
7214
7215         *fpos += cnt;
7216
7217         return cnt;
7218 }
7219
7220 static int tracing_clock_open(struct inode *inode, struct file *file)
7221 {
7222         struct trace_array *tr = inode->i_private;
7223         int ret;
7224
7225         ret = tracing_check_open_get_tr(tr);
7226         if (ret)
7227                 return ret;
7228
7229         ret = single_open(file, tracing_clock_show, inode->i_private);
7230         if (ret < 0)
7231                 trace_array_put(tr);
7232
7233         return ret;
7234 }
7235
7236 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7237 {
7238         struct trace_array *tr = m->private;
7239
7240         mutex_lock(&trace_types_lock);
7241
7242         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7243                 seq_puts(m, "delta [absolute]\n");
7244         else
7245                 seq_puts(m, "[delta] absolute\n");
7246
7247         mutex_unlock(&trace_types_lock);
7248
7249         return 0;
7250 }
7251
7252 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7253 {
7254         struct trace_array *tr = inode->i_private;
7255         int ret;
7256
7257         ret = tracing_check_open_get_tr(tr);
7258         if (ret)
7259                 return ret;
7260
7261         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7262         if (ret < 0)
7263                 trace_array_put(tr);
7264
7265         return ret;
7266 }
7267
7268 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7269 {
7270         if (rbe == this_cpu_read(trace_buffered_event))
7271                 return ring_buffer_time_stamp(buffer);
7272
7273         return ring_buffer_event_time_stamp(buffer, rbe);
7274 }
7275
7276 /*
7277  * Set or disable using the per CPU trace_buffer_event when possible.
7278  */
7279 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7280 {
7281         int ret = 0;
7282
7283         mutex_lock(&trace_types_lock);
7284
7285         if (set && tr->no_filter_buffering_ref++)
7286                 goto out;
7287
7288         if (!set) {
7289                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7290                         ret = -EINVAL;
7291                         goto out;
7292                 }
7293
7294                 --tr->no_filter_buffering_ref;
7295         }
7296  out:
7297         mutex_unlock(&trace_types_lock);
7298
7299         return ret;
7300 }
7301
7302 struct ftrace_buffer_info {
7303         struct trace_iterator   iter;
7304         void                    *spare;
7305         unsigned int            spare_cpu;
7306         unsigned int            read;
7307 };
7308
7309 #ifdef CONFIG_TRACER_SNAPSHOT
7310 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7311 {
7312         struct trace_array *tr = inode->i_private;
7313         struct trace_iterator *iter;
7314         struct seq_file *m;
7315         int ret;
7316
7317         ret = tracing_check_open_get_tr(tr);
7318         if (ret)
7319                 return ret;
7320
7321         if (file->f_mode & FMODE_READ) {
7322                 iter = __tracing_open(inode, file, true);
7323                 if (IS_ERR(iter))
7324                         ret = PTR_ERR(iter);
7325         } else {
7326                 /* Writes still need the seq_file to hold the private data */
7327                 ret = -ENOMEM;
7328                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7329                 if (!m)
7330                         goto out;
7331                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7332                 if (!iter) {
7333                         kfree(m);
7334                         goto out;
7335                 }
7336                 ret = 0;
7337
7338                 iter->tr = tr;
7339                 iter->array_buffer = &tr->max_buffer;
7340                 iter->cpu_file = tracing_get_cpu(inode);
7341                 m->private = iter;
7342                 file->private_data = m;
7343         }
7344 out:
7345         if (ret < 0)
7346                 trace_array_put(tr);
7347
7348         return ret;
7349 }
7350
7351 static ssize_t
7352 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7353                        loff_t *ppos)
7354 {
7355         struct seq_file *m = filp->private_data;
7356         struct trace_iterator *iter = m->private;
7357         struct trace_array *tr = iter->tr;
7358         unsigned long val;
7359         int ret;
7360
7361         ret = tracing_update_buffers();
7362         if (ret < 0)
7363                 return ret;
7364
7365         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7366         if (ret)
7367                 return ret;
7368
7369         mutex_lock(&trace_types_lock);
7370
7371         if (tr->current_trace->use_max_tr) {
7372                 ret = -EBUSY;
7373                 goto out;
7374         }
7375
7376         arch_spin_lock(&tr->max_lock);
7377         if (tr->cond_snapshot)
7378                 ret = -EBUSY;
7379         arch_spin_unlock(&tr->max_lock);
7380         if (ret)
7381                 goto out;
7382
7383         switch (val) {
7384         case 0:
7385                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7386                         ret = -EINVAL;
7387                         break;
7388                 }
7389                 if (tr->allocated_snapshot)
7390                         free_snapshot(tr);
7391                 break;
7392         case 1:
7393 /* Only allow per-cpu swap if the ring buffer supports it */
7394 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7395                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7396                         ret = -EINVAL;
7397                         break;
7398                 }
7399 #endif
7400                 if (tr->allocated_snapshot)
7401                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7402                                         &tr->array_buffer, iter->cpu_file);
7403                 else
7404                         ret = tracing_alloc_snapshot_instance(tr);
7405                 if (ret < 0)
7406                         break;
7407                 local_irq_disable();
7408                 /* Now, we're going to swap */
7409                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7410                         update_max_tr(tr, current, smp_processor_id(), NULL);
7411                 else
7412                         update_max_tr_single(tr, current, iter->cpu_file);
7413                 local_irq_enable();
7414                 break;
7415         default:
7416                 if (tr->allocated_snapshot) {
7417                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7418                                 tracing_reset_online_cpus(&tr->max_buffer);
7419                         else
7420                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7421                 }
7422                 break;
7423         }
7424
7425         if (ret >= 0) {
7426                 *ppos += cnt;
7427                 ret = cnt;
7428         }
7429 out:
7430         mutex_unlock(&trace_types_lock);
7431         return ret;
7432 }
7433
7434 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7435 {
7436         struct seq_file *m = file->private_data;
7437         int ret;
7438
7439         ret = tracing_release(inode, file);
7440
7441         if (file->f_mode & FMODE_READ)
7442                 return ret;
7443
7444         /* If write only, the seq_file is just a stub */
7445         if (m)
7446                 kfree(m->private);
7447         kfree(m);
7448
7449         return 0;
7450 }
7451
7452 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7453 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7454                                     size_t count, loff_t *ppos);
7455 static int tracing_buffers_release(struct inode *inode, struct file *file);
7456 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7457                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7458
7459 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7460 {
7461         struct ftrace_buffer_info *info;
7462         int ret;
7463
7464         /* The following checks for tracefs lockdown */
7465         ret = tracing_buffers_open(inode, filp);
7466         if (ret < 0)
7467                 return ret;
7468
7469         info = filp->private_data;
7470
7471         if (info->iter.trace->use_max_tr) {
7472                 tracing_buffers_release(inode, filp);
7473                 return -EBUSY;
7474         }
7475
7476         info->iter.snapshot = true;
7477         info->iter.array_buffer = &info->iter.tr->max_buffer;
7478
7479         return ret;
7480 }
7481
7482 #endif /* CONFIG_TRACER_SNAPSHOT */
7483
7484
7485 static const struct file_operations tracing_thresh_fops = {
7486         .open           = tracing_open_generic,
7487         .read           = tracing_thresh_read,
7488         .write          = tracing_thresh_write,
7489         .llseek         = generic_file_llseek,
7490 };
7491
7492 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7493 static const struct file_operations tracing_max_lat_fops = {
7494         .open           = tracing_open_generic,
7495         .read           = tracing_max_lat_read,
7496         .write          = tracing_max_lat_write,
7497         .llseek         = generic_file_llseek,
7498 };
7499 #endif
7500
7501 static const struct file_operations set_tracer_fops = {
7502         .open           = tracing_open_generic,
7503         .read           = tracing_set_trace_read,
7504         .write          = tracing_set_trace_write,
7505         .llseek         = generic_file_llseek,
7506 };
7507
7508 static const struct file_operations tracing_pipe_fops = {
7509         .open           = tracing_open_pipe,
7510         .poll           = tracing_poll_pipe,
7511         .read           = tracing_read_pipe,
7512         .splice_read    = tracing_splice_read_pipe,
7513         .release        = tracing_release_pipe,
7514         .llseek         = no_llseek,
7515 };
7516
7517 static const struct file_operations tracing_entries_fops = {
7518         .open           = tracing_open_generic_tr,
7519         .read           = tracing_entries_read,
7520         .write          = tracing_entries_write,
7521         .llseek         = generic_file_llseek,
7522         .release        = tracing_release_generic_tr,
7523 };
7524
7525 static const struct file_operations tracing_total_entries_fops = {
7526         .open           = tracing_open_generic_tr,
7527         .read           = tracing_total_entries_read,
7528         .llseek         = generic_file_llseek,
7529         .release        = tracing_release_generic_tr,
7530 };
7531
7532 static const struct file_operations tracing_free_buffer_fops = {
7533         .open           = tracing_open_generic_tr,
7534         .write          = tracing_free_buffer_write,
7535         .release        = tracing_free_buffer_release,
7536 };
7537
7538 static const struct file_operations tracing_mark_fops = {
7539         .open           = tracing_open_generic_tr,
7540         .write          = tracing_mark_write,
7541         .llseek         = generic_file_llseek,
7542         .release        = tracing_release_generic_tr,
7543 };
7544
7545 static const struct file_operations tracing_mark_raw_fops = {
7546         .open           = tracing_open_generic_tr,
7547         .write          = tracing_mark_raw_write,
7548         .llseek         = generic_file_llseek,
7549         .release        = tracing_release_generic_tr,
7550 };
7551
7552 static const struct file_operations trace_clock_fops = {
7553         .open           = tracing_clock_open,
7554         .read           = seq_read,
7555         .llseek         = seq_lseek,
7556         .release        = tracing_single_release_tr,
7557         .write          = tracing_clock_write,
7558 };
7559
7560 static const struct file_operations trace_time_stamp_mode_fops = {
7561         .open           = tracing_time_stamp_mode_open,
7562         .read           = seq_read,
7563         .llseek         = seq_lseek,
7564         .release        = tracing_single_release_tr,
7565 };
7566
7567 #ifdef CONFIG_TRACER_SNAPSHOT
7568 static const struct file_operations snapshot_fops = {
7569         .open           = tracing_snapshot_open,
7570         .read           = seq_read,
7571         .write          = tracing_snapshot_write,
7572         .llseek         = tracing_lseek,
7573         .release        = tracing_snapshot_release,
7574 };
7575
7576 static const struct file_operations snapshot_raw_fops = {
7577         .open           = snapshot_raw_open,
7578         .read           = tracing_buffers_read,
7579         .release        = tracing_buffers_release,
7580         .splice_read    = tracing_buffers_splice_read,
7581         .llseek         = no_llseek,
7582 };
7583
7584 #endif /* CONFIG_TRACER_SNAPSHOT */
7585
7586 #define TRACING_LOG_ERRS_MAX    8
7587 #define TRACING_LOG_LOC_MAX     128
7588
7589 #define CMD_PREFIX "  Command: "
7590
7591 struct err_info {
7592         const char      **errs; /* ptr to loc-specific array of err strings */
7593         u8              type;   /* index into errs -> specific err string */
7594         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7595         u64             ts;
7596 };
7597
7598 struct tracing_log_err {
7599         struct list_head        list;
7600         struct err_info         info;
7601         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7602         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7603 };
7604
7605 static DEFINE_MUTEX(tracing_err_log_lock);
7606
7607 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7608 {
7609         struct tracing_log_err *err;
7610
7611         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7612                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7613                 if (!err)
7614                         err = ERR_PTR(-ENOMEM);
7615                 tr->n_err_log_entries++;
7616
7617                 return err;
7618         }
7619
7620         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7621         list_del(&err->list);
7622
7623         return err;
7624 }
7625
7626 /**
7627  * err_pos - find the position of a string within a command for error careting
7628  * @cmd: The tracing command that caused the error
7629  * @str: The string to position the caret at within @cmd
7630  *
7631  * Finds the position of the first occurrence of @str within @cmd.  The
7632  * return value can be passed to tracing_log_err() for caret placement
7633  * within @cmd.
7634  *
7635  * Returns the index within @cmd of the first occurrence of @str or 0
7636  * if @str was not found.
7637  */
7638 unsigned int err_pos(char *cmd, const char *str)
7639 {
7640         char *found;
7641
7642         if (WARN_ON(!strlen(cmd)))
7643                 return 0;
7644
7645         found = strstr(cmd, str);
7646         if (found)
7647                 return found - cmd;
7648
7649         return 0;
7650 }
7651
7652 /**
7653  * tracing_log_err - write an error to the tracing error log
7654  * @tr: The associated trace array for the error (NULL for top level array)
7655  * @loc: A string describing where the error occurred
7656  * @cmd: The tracing command that caused the error
7657  * @errs: The array of loc-specific static error strings
7658  * @type: The index into errs[], which produces the specific static err string
7659  * @pos: The position the caret should be placed in the cmd
7660  *
7661  * Writes an error into tracing/error_log of the form:
7662  *
7663  * <loc>: error: <text>
7664  *   Command: <cmd>
7665  *              ^
7666  *
7667  * tracing/error_log is a small log file containing the last
7668  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7669  * unless there has been a tracing error, and the error log can be
7670  * cleared and have its memory freed by writing the empty string in
7671  * truncation mode to it i.e. echo > tracing/error_log.
7672  *
7673  * NOTE: the @errs array along with the @type param are used to
7674  * produce a static error string - this string is not copied and saved
7675  * when the error is logged - only a pointer to it is saved.  See
7676  * existing callers for examples of how static strings are typically
7677  * defined for use with tracing_log_err().
7678  */
7679 void tracing_log_err(struct trace_array *tr,
7680                      const char *loc, const char *cmd,
7681                      const char **errs, u8 type, u8 pos)
7682 {
7683         struct tracing_log_err *err;
7684
7685         if (!tr)
7686                 tr = &global_trace;
7687
7688         mutex_lock(&tracing_err_log_lock);
7689         err = get_tracing_log_err(tr);
7690         if (PTR_ERR(err) == -ENOMEM) {
7691                 mutex_unlock(&tracing_err_log_lock);
7692                 return;
7693         }
7694
7695         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7696         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7697
7698         err->info.errs = errs;
7699         err->info.type = type;
7700         err->info.pos = pos;
7701         err->info.ts = local_clock();
7702
7703         list_add_tail(&err->list, &tr->err_log);
7704         mutex_unlock(&tracing_err_log_lock);
7705 }
7706
7707 static void clear_tracing_err_log(struct trace_array *tr)
7708 {
7709         struct tracing_log_err *err, *next;
7710
7711         mutex_lock(&tracing_err_log_lock);
7712         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7713                 list_del(&err->list);
7714                 kfree(err);
7715         }
7716
7717         tr->n_err_log_entries = 0;
7718         mutex_unlock(&tracing_err_log_lock);
7719 }
7720
7721 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7722 {
7723         struct trace_array *tr = m->private;
7724
7725         mutex_lock(&tracing_err_log_lock);
7726
7727         return seq_list_start(&tr->err_log, *pos);
7728 }
7729
7730 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7731 {
7732         struct trace_array *tr = m->private;
7733
7734         return seq_list_next(v, &tr->err_log, pos);
7735 }
7736
7737 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7738 {
7739         mutex_unlock(&tracing_err_log_lock);
7740 }
7741
7742 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7743 {
7744         u8 i;
7745
7746         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7747                 seq_putc(m, ' ');
7748         for (i = 0; i < pos; i++)
7749                 seq_putc(m, ' ');
7750         seq_puts(m, "^\n");
7751 }
7752
7753 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7754 {
7755         struct tracing_log_err *err = v;
7756
7757         if (err) {
7758                 const char *err_text = err->info.errs[err->info.type];
7759                 u64 sec = err->info.ts;
7760                 u32 nsec;
7761
7762                 nsec = do_div(sec, NSEC_PER_SEC);
7763                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7764                            err->loc, err_text);
7765                 seq_printf(m, "%s", err->cmd);
7766                 tracing_err_log_show_pos(m, err->info.pos);
7767         }
7768
7769         return 0;
7770 }
7771
7772 static const struct seq_operations tracing_err_log_seq_ops = {
7773         .start  = tracing_err_log_seq_start,
7774         .next   = tracing_err_log_seq_next,
7775         .stop   = tracing_err_log_seq_stop,
7776         .show   = tracing_err_log_seq_show
7777 };
7778
7779 static int tracing_err_log_open(struct inode *inode, struct file *file)
7780 {
7781         struct trace_array *tr = inode->i_private;
7782         int ret = 0;
7783
7784         ret = tracing_check_open_get_tr(tr);
7785         if (ret)
7786                 return ret;
7787
7788         /* If this file was opened for write, then erase contents */
7789         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7790                 clear_tracing_err_log(tr);
7791
7792         if (file->f_mode & FMODE_READ) {
7793                 ret = seq_open(file, &tracing_err_log_seq_ops);
7794                 if (!ret) {
7795                         struct seq_file *m = file->private_data;
7796                         m->private = tr;
7797                 } else {
7798                         trace_array_put(tr);
7799                 }
7800         }
7801         return ret;
7802 }
7803
7804 static ssize_t tracing_err_log_write(struct file *file,
7805                                      const char __user *buffer,
7806                                      size_t count, loff_t *ppos)
7807 {
7808         return count;
7809 }
7810
7811 static int tracing_err_log_release(struct inode *inode, struct file *file)
7812 {
7813         struct trace_array *tr = inode->i_private;
7814
7815         trace_array_put(tr);
7816
7817         if (file->f_mode & FMODE_READ)
7818                 seq_release(inode, file);
7819
7820         return 0;
7821 }
7822
7823 static const struct file_operations tracing_err_log_fops = {
7824         .open           = tracing_err_log_open,
7825         .write          = tracing_err_log_write,
7826         .read           = seq_read,
7827         .llseek         = seq_lseek,
7828         .release        = tracing_err_log_release,
7829 };
7830
7831 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7832 {
7833         struct trace_array *tr = inode->i_private;
7834         struct ftrace_buffer_info *info;
7835         int ret;
7836
7837         ret = tracing_check_open_get_tr(tr);
7838         if (ret)
7839                 return ret;
7840
7841         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7842         if (!info) {
7843                 trace_array_put(tr);
7844                 return -ENOMEM;
7845         }
7846
7847         mutex_lock(&trace_types_lock);
7848
7849         info->iter.tr           = tr;
7850         info->iter.cpu_file     = tracing_get_cpu(inode);
7851         info->iter.trace        = tr->current_trace;
7852         info->iter.array_buffer = &tr->array_buffer;
7853         info->spare             = NULL;
7854         /* Force reading ring buffer for first read */
7855         info->read              = (unsigned int)-1;
7856
7857         filp->private_data = info;
7858
7859         tr->trace_ref++;
7860
7861         mutex_unlock(&trace_types_lock);
7862
7863         ret = nonseekable_open(inode, filp);
7864         if (ret < 0)
7865                 trace_array_put(tr);
7866
7867         return ret;
7868 }
7869
7870 static __poll_t
7871 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7872 {
7873         struct ftrace_buffer_info *info = filp->private_data;
7874         struct trace_iterator *iter = &info->iter;
7875
7876         return trace_poll(iter, filp, poll_table);
7877 }
7878
7879 static ssize_t
7880 tracing_buffers_read(struct file *filp, char __user *ubuf,
7881                      size_t count, loff_t *ppos)
7882 {
7883         struct ftrace_buffer_info *info = filp->private_data;
7884         struct trace_iterator *iter = &info->iter;
7885         ssize_t ret = 0;
7886         ssize_t size;
7887
7888         if (!count)
7889                 return 0;
7890
7891 #ifdef CONFIG_TRACER_MAX_TRACE
7892         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7893                 return -EBUSY;
7894 #endif
7895
7896         if (!info->spare) {
7897                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7898                                                           iter->cpu_file);
7899                 if (IS_ERR(info->spare)) {
7900                         ret = PTR_ERR(info->spare);
7901                         info->spare = NULL;
7902                 } else {
7903                         info->spare_cpu = iter->cpu_file;
7904                 }
7905         }
7906         if (!info->spare)
7907                 return ret;
7908
7909         /* Do we have previous read data to read? */
7910         if (info->read < PAGE_SIZE)
7911                 goto read;
7912
7913  again:
7914         trace_access_lock(iter->cpu_file);
7915         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7916                                     &info->spare,
7917                                     count,
7918                                     iter->cpu_file, 0);
7919         trace_access_unlock(iter->cpu_file);
7920
7921         if (ret < 0) {
7922                 if (trace_empty(iter)) {
7923                         if ((filp->f_flags & O_NONBLOCK))
7924                                 return -EAGAIN;
7925
7926                         ret = wait_on_pipe(iter, 0);
7927                         if (ret)
7928                                 return ret;
7929
7930                         goto again;
7931                 }
7932                 return 0;
7933         }
7934
7935         info->read = 0;
7936  read:
7937         size = PAGE_SIZE - info->read;
7938         if (size > count)
7939                 size = count;
7940
7941         ret = copy_to_user(ubuf, info->spare + info->read, size);
7942         if (ret == size)
7943                 return -EFAULT;
7944
7945         size -= ret;
7946
7947         *ppos += size;
7948         info->read += size;
7949
7950         return size;
7951 }
7952
7953 static int tracing_buffers_release(struct inode *inode, struct file *file)
7954 {
7955         struct ftrace_buffer_info *info = file->private_data;
7956         struct trace_iterator *iter = &info->iter;
7957
7958         mutex_lock(&trace_types_lock);
7959
7960         iter->tr->trace_ref--;
7961
7962         __trace_array_put(iter->tr);
7963
7964         if (info->spare)
7965                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7966                                            info->spare_cpu, info->spare);
7967         kvfree(info);
7968
7969         mutex_unlock(&trace_types_lock);
7970
7971         return 0;
7972 }
7973
7974 struct buffer_ref {
7975         struct trace_buffer     *buffer;
7976         void                    *page;
7977         int                     cpu;
7978         refcount_t              refcount;
7979 };
7980
7981 static void buffer_ref_release(struct buffer_ref *ref)
7982 {
7983         if (!refcount_dec_and_test(&ref->refcount))
7984                 return;
7985         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7986         kfree(ref);
7987 }
7988
7989 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7990                                     struct pipe_buffer *buf)
7991 {
7992         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7993
7994         buffer_ref_release(ref);
7995         buf->private = 0;
7996 }
7997
7998 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7999                                 struct pipe_buffer *buf)
8000 {
8001         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8002
8003         if (refcount_read(&ref->refcount) > INT_MAX/2)
8004                 return false;
8005
8006         refcount_inc(&ref->refcount);
8007         return true;
8008 }
8009
8010 /* Pipe buffer operations for a buffer. */
8011 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8012         .release                = buffer_pipe_buf_release,
8013         .get                    = buffer_pipe_buf_get,
8014 };
8015
8016 /*
8017  * Callback from splice_to_pipe(), if we need to release some pages
8018  * at the end of the spd in case we error'ed out in filling the pipe.
8019  */
8020 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8021 {
8022         struct buffer_ref *ref =
8023                 (struct buffer_ref *)spd->partial[i].private;
8024
8025         buffer_ref_release(ref);
8026         spd->partial[i].private = 0;
8027 }
8028
8029 static ssize_t
8030 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8031                             struct pipe_inode_info *pipe, size_t len,
8032                             unsigned int flags)
8033 {
8034         struct ftrace_buffer_info *info = file->private_data;
8035         struct trace_iterator *iter = &info->iter;
8036         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8037         struct page *pages_def[PIPE_DEF_BUFFERS];
8038         struct splice_pipe_desc spd = {
8039                 .pages          = pages_def,
8040                 .partial        = partial_def,
8041                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8042                 .ops            = &buffer_pipe_buf_ops,
8043                 .spd_release    = buffer_spd_release,
8044         };
8045         struct buffer_ref *ref;
8046         int entries, i;
8047         ssize_t ret = 0;
8048
8049 #ifdef CONFIG_TRACER_MAX_TRACE
8050         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8051                 return -EBUSY;
8052 #endif
8053
8054         if (*ppos & (PAGE_SIZE - 1))
8055                 return -EINVAL;
8056
8057         if (len & (PAGE_SIZE - 1)) {
8058                 if (len < PAGE_SIZE)
8059                         return -EINVAL;
8060                 len &= PAGE_MASK;
8061         }
8062
8063         if (splice_grow_spd(pipe, &spd))
8064                 return -ENOMEM;
8065
8066  again:
8067         trace_access_lock(iter->cpu_file);
8068         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8069
8070         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8071                 struct page *page;
8072                 int r;
8073
8074                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8075                 if (!ref) {
8076                         ret = -ENOMEM;
8077                         break;
8078                 }
8079
8080                 refcount_set(&ref->refcount, 1);
8081                 ref->buffer = iter->array_buffer->buffer;
8082                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8083                 if (IS_ERR(ref->page)) {
8084                         ret = PTR_ERR(ref->page);
8085                         ref->page = NULL;
8086                         kfree(ref);
8087                         break;
8088                 }
8089                 ref->cpu = iter->cpu_file;
8090
8091                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8092                                           len, iter->cpu_file, 1);
8093                 if (r < 0) {
8094                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8095                                                    ref->page);
8096                         kfree(ref);
8097                         break;
8098                 }
8099
8100                 page = virt_to_page(ref->page);
8101
8102                 spd.pages[i] = page;
8103                 spd.partial[i].len = PAGE_SIZE;
8104                 spd.partial[i].offset = 0;
8105                 spd.partial[i].private = (unsigned long)ref;
8106                 spd.nr_pages++;
8107                 *ppos += PAGE_SIZE;
8108
8109                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8110         }
8111
8112         trace_access_unlock(iter->cpu_file);
8113         spd.nr_pages = i;
8114
8115         /* did we read anything? */
8116         if (!spd.nr_pages) {
8117                 if (ret)
8118                         goto out;
8119
8120                 ret = -EAGAIN;
8121                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8122                         goto out;
8123
8124                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8125                 if (ret)
8126                         goto out;
8127
8128                 goto again;
8129         }
8130
8131         ret = splice_to_pipe(pipe, &spd);
8132 out:
8133         splice_shrink_spd(&spd);
8134
8135         return ret;
8136 }
8137
8138 static const struct file_operations tracing_buffers_fops = {
8139         .open           = tracing_buffers_open,
8140         .read           = tracing_buffers_read,
8141         .poll           = tracing_buffers_poll,
8142         .release        = tracing_buffers_release,
8143         .splice_read    = tracing_buffers_splice_read,
8144         .llseek         = no_llseek,
8145 };
8146
8147 static ssize_t
8148 tracing_stats_read(struct file *filp, char __user *ubuf,
8149                    size_t count, loff_t *ppos)
8150 {
8151         struct inode *inode = file_inode(filp);
8152         struct trace_array *tr = inode->i_private;
8153         struct array_buffer *trace_buf = &tr->array_buffer;
8154         int cpu = tracing_get_cpu(inode);
8155         struct trace_seq *s;
8156         unsigned long cnt;
8157         unsigned long long t;
8158         unsigned long usec_rem;
8159
8160         s = kmalloc(sizeof(*s), GFP_KERNEL);
8161         if (!s)
8162                 return -ENOMEM;
8163
8164         trace_seq_init(s);
8165
8166         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8167         trace_seq_printf(s, "entries: %ld\n", cnt);
8168
8169         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8170         trace_seq_printf(s, "overrun: %ld\n", cnt);
8171
8172         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8173         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8174
8175         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8176         trace_seq_printf(s, "bytes: %ld\n", cnt);
8177
8178         if (trace_clocks[tr->clock_id].in_ns) {
8179                 /* local or global for trace_clock */
8180                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8181                 usec_rem = do_div(t, USEC_PER_SEC);
8182                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8183                                                                 t, usec_rem);
8184
8185                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8186                 usec_rem = do_div(t, USEC_PER_SEC);
8187                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8188         } else {
8189                 /* counter or tsc mode for trace_clock */
8190                 trace_seq_printf(s, "oldest event ts: %llu\n",
8191                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8192
8193                 trace_seq_printf(s, "now ts: %llu\n",
8194                                 ring_buffer_time_stamp(trace_buf->buffer));
8195         }
8196
8197         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8198         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8199
8200         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8201         trace_seq_printf(s, "read events: %ld\n", cnt);
8202
8203         count = simple_read_from_buffer(ubuf, count, ppos,
8204                                         s->buffer, trace_seq_used(s));
8205
8206         kfree(s);
8207
8208         return count;
8209 }
8210
8211 static const struct file_operations tracing_stats_fops = {
8212         .open           = tracing_open_generic_tr,
8213         .read           = tracing_stats_read,
8214         .llseek         = generic_file_llseek,
8215         .release        = tracing_release_generic_tr,
8216 };
8217
8218 #ifdef CONFIG_DYNAMIC_FTRACE
8219
8220 static ssize_t
8221 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8222                   size_t cnt, loff_t *ppos)
8223 {
8224         ssize_t ret;
8225         char *buf;
8226         int r;
8227
8228         /* 256 should be plenty to hold the amount needed */
8229         buf = kmalloc(256, GFP_KERNEL);
8230         if (!buf)
8231                 return -ENOMEM;
8232
8233         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8234                       ftrace_update_tot_cnt,
8235                       ftrace_number_of_pages,
8236                       ftrace_number_of_groups);
8237
8238         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8239         kfree(buf);
8240         return ret;
8241 }
8242
8243 static const struct file_operations tracing_dyn_info_fops = {
8244         .open           = tracing_open_generic,
8245         .read           = tracing_read_dyn_info,
8246         .llseek         = generic_file_llseek,
8247 };
8248 #endif /* CONFIG_DYNAMIC_FTRACE */
8249
8250 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8251 static void
8252 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8253                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8254                 void *data)
8255 {
8256         tracing_snapshot_instance(tr);
8257 }
8258
8259 static void
8260 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8261                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8262                       void *data)
8263 {
8264         struct ftrace_func_mapper *mapper = data;
8265         long *count = NULL;
8266
8267         if (mapper)
8268                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8269
8270         if (count) {
8271
8272                 if (*count <= 0)
8273                         return;
8274
8275                 (*count)--;
8276         }
8277
8278         tracing_snapshot_instance(tr);
8279 }
8280
8281 static int
8282 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8283                       struct ftrace_probe_ops *ops, void *data)
8284 {
8285         struct ftrace_func_mapper *mapper = data;
8286         long *count = NULL;
8287
8288         seq_printf(m, "%ps:", (void *)ip);
8289
8290         seq_puts(m, "snapshot");
8291
8292         if (mapper)
8293                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8294
8295         if (count)
8296                 seq_printf(m, ":count=%ld\n", *count);
8297         else
8298                 seq_puts(m, ":unlimited\n");
8299
8300         return 0;
8301 }
8302
8303 static int
8304 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8305                      unsigned long ip, void *init_data, void **data)
8306 {
8307         struct ftrace_func_mapper *mapper = *data;
8308
8309         if (!mapper) {
8310                 mapper = allocate_ftrace_func_mapper();
8311                 if (!mapper)
8312                         return -ENOMEM;
8313                 *data = mapper;
8314         }
8315
8316         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8317 }
8318
8319 static void
8320 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8321                      unsigned long ip, void *data)
8322 {
8323         struct ftrace_func_mapper *mapper = data;
8324
8325         if (!ip) {
8326                 if (!mapper)
8327                         return;
8328                 free_ftrace_func_mapper(mapper, NULL);
8329                 return;
8330         }
8331
8332         ftrace_func_mapper_remove_ip(mapper, ip);
8333 }
8334
8335 static struct ftrace_probe_ops snapshot_probe_ops = {
8336         .func                   = ftrace_snapshot,
8337         .print                  = ftrace_snapshot_print,
8338 };
8339
8340 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8341         .func                   = ftrace_count_snapshot,
8342         .print                  = ftrace_snapshot_print,
8343         .init                   = ftrace_snapshot_init,
8344         .free                   = ftrace_snapshot_free,
8345 };
8346
8347 static int
8348 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8349                                char *glob, char *cmd, char *param, int enable)
8350 {
8351         struct ftrace_probe_ops *ops;
8352         void *count = (void *)-1;
8353         char *number;
8354         int ret;
8355
8356         if (!tr)
8357                 return -ENODEV;
8358
8359         /* hash funcs only work with set_ftrace_filter */
8360         if (!enable)
8361                 return -EINVAL;
8362
8363         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8364
8365         if (glob[0] == '!')
8366                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8367
8368         if (!param)
8369                 goto out_reg;
8370
8371         number = strsep(&param, ":");
8372
8373         if (!strlen(number))
8374                 goto out_reg;
8375
8376         /*
8377          * We use the callback data field (which is a pointer)
8378          * as our counter.
8379          */
8380         ret = kstrtoul(number, 0, (unsigned long *)&count);
8381         if (ret)
8382                 return ret;
8383
8384  out_reg:
8385         ret = tracing_alloc_snapshot_instance(tr);
8386         if (ret < 0)
8387                 goto out;
8388
8389         ret = register_ftrace_function_probe(glob, tr, ops, count);
8390
8391  out:
8392         return ret < 0 ? ret : 0;
8393 }
8394
8395 static struct ftrace_func_command ftrace_snapshot_cmd = {
8396         .name                   = "snapshot",
8397         .func                   = ftrace_trace_snapshot_callback,
8398 };
8399
8400 static __init int register_snapshot_cmd(void)
8401 {
8402         return register_ftrace_command(&ftrace_snapshot_cmd);
8403 }
8404 #else
8405 static inline __init int register_snapshot_cmd(void) { return 0; }
8406 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8407
8408 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8409 {
8410         if (WARN_ON(!tr->dir))
8411                 return ERR_PTR(-ENODEV);
8412
8413         /* Top directory uses NULL as the parent */
8414         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8415                 return NULL;
8416
8417         /* All sub buffers have a descriptor */
8418         return tr->dir;
8419 }
8420
8421 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8422 {
8423         struct dentry *d_tracer;
8424
8425         if (tr->percpu_dir)
8426                 return tr->percpu_dir;
8427
8428         d_tracer = tracing_get_dentry(tr);
8429         if (IS_ERR(d_tracer))
8430                 return NULL;
8431
8432         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8433
8434         MEM_FAIL(!tr->percpu_dir,
8435                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8436
8437         return tr->percpu_dir;
8438 }
8439
8440 static struct dentry *
8441 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8442                       void *data, long cpu, const struct file_operations *fops)
8443 {
8444         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8445
8446         if (ret) /* See tracing_get_cpu() */
8447                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8448         return ret;
8449 }
8450
8451 static void
8452 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8453 {
8454         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8455         struct dentry *d_cpu;
8456         char cpu_dir[30]; /* 30 characters should be more than enough */
8457
8458         if (!d_percpu)
8459                 return;
8460
8461         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8462         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8463         if (!d_cpu) {
8464                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8465                 return;
8466         }
8467
8468         /* per cpu trace_pipe */
8469         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8470                                 tr, cpu, &tracing_pipe_fops);
8471
8472         /* per cpu trace */
8473         trace_create_cpu_file("trace", 0644, d_cpu,
8474                                 tr, cpu, &tracing_fops);
8475
8476         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8477                                 tr, cpu, &tracing_buffers_fops);
8478
8479         trace_create_cpu_file("stats", 0444, d_cpu,
8480                                 tr, cpu, &tracing_stats_fops);
8481
8482         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8483                                 tr, cpu, &tracing_entries_fops);
8484
8485 #ifdef CONFIG_TRACER_SNAPSHOT
8486         trace_create_cpu_file("snapshot", 0644, d_cpu,
8487                                 tr, cpu, &snapshot_fops);
8488
8489         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8490                                 tr, cpu, &snapshot_raw_fops);
8491 #endif
8492 }
8493
8494 #ifdef CONFIG_FTRACE_SELFTEST
8495 /* Let selftest have access to static functions in this file */
8496 #include "trace_selftest.c"
8497 #endif
8498
8499 static ssize_t
8500 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8501                         loff_t *ppos)
8502 {
8503         struct trace_option_dentry *topt = filp->private_data;
8504         char *buf;
8505
8506         if (topt->flags->val & topt->opt->bit)
8507                 buf = "1\n";
8508         else
8509                 buf = "0\n";
8510
8511         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8512 }
8513
8514 static ssize_t
8515 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8516                          loff_t *ppos)
8517 {
8518         struct trace_option_dentry *topt = filp->private_data;
8519         unsigned long val;
8520         int ret;
8521
8522         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8523         if (ret)
8524                 return ret;
8525
8526         if (val != 0 && val != 1)
8527                 return -EINVAL;
8528
8529         if (!!(topt->flags->val & topt->opt->bit) != val) {
8530                 mutex_lock(&trace_types_lock);
8531                 ret = __set_tracer_option(topt->tr, topt->flags,
8532                                           topt->opt, !val);
8533                 mutex_unlock(&trace_types_lock);
8534                 if (ret)
8535                         return ret;
8536         }
8537
8538         *ppos += cnt;
8539
8540         return cnt;
8541 }
8542
8543
8544 static const struct file_operations trace_options_fops = {
8545         .open = tracing_open_generic,
8546         .read = trace_options_read,
8547         .write = trace_options_write,
8548         .llseek = generic_file_llseek,
8549 };
8550
8551 /*
8552  * In order to pass in both the trace_array descriptor as well as the index
8553  * to the flag that the trace option file represents, the trace_array
8554  * has a character array of trace_flags_index[], which holds the index
8555  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8556  * The address of this character array is passed to the flag option file
8557  * read/write callbacks.
8558  *
8559  * In order to extract both the index and the trace_array descriptor,
8560  * get_tr_index() uses the following algorithm.
8561  *
8562  *   idx = *ptr;
8563  *
8564  * As the pointer itself contains the address of the index (remember
8565  * index[1] == 1).
8566  *
8567  * Then to get the trace_array descriptor, by subtracting that index
8568  * from the ptr, we get to the start of the index itself.
8569  *
8570  *   ptr - idx == &index[0]
8571  *
8572  * Then a simple container_of() from that pointer gets us to the
8573  * trace_array descriptor.
8574  */
8575 static void get_tr_index(void *data, struct trace_array **ptr,
8576                          unsigned int *pindex)
8577 {
8578         *pindex = *(unsigned char *)data;
8579
8580         *ptr = container_of(data - *pindex, struct trace_array,
8581                             trace_flags_index);
8582 }
8583
8584 static ssize_t
8585 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8586                         loff_t *ppos)
8587 {
8588         void *tr_index = filp->private_data;
8589         struct trace_array *tr;
8590         unsigned int index;
8591         char *buf;
8592
8593         get_tr_index(tr_index, &tr, &index);
8594
8595         if (tr->trace_flags & (1 << index))
8596                 buf = "1\n";
8597         else
8598                 buf = "0\n";
8599
8600         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8601 }
8602
8603 static ssize_t
8604 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8605                          loff_t *ppos)
8606 {
8607         void *tr_index = filp->private_data;
8608         struct trace_array *tr;
8609         unsigned int index;
8610         unsigned long val;
8611         int ret;
8612
8613         get_tr_index(tr_index, &tr, &index);
8614
8615         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8616         if (ret)
8617                 return ret;
8618
8619         if (val != 0 && val != 1)
8620                 return -EINVAL;
8621
8622         mutex_lock(&event_mutex);
8623         mutex_lock(&trace_types_lock);
8624         ret = set_tracer_flag(tr, 1 << index, val);
8625         mutex_unlock(&trace_types_lock);
8626         mutex_unlock(&event_mutex);
8627
8628         if (ret < 0)
8629                 return ret;
8630
8631         *ppos += cnt;
8632
8633         return cnt;
8634 }
8635
8636 static const struct file_operations trace_options_core_fops = {
8637         .open = tracing_open_generic,
8638         .read = trace_options_core_read,
8639         .write = trace_options_core_write,
8640         .llseek = generic_file_llseek,
8641 };
8642
8643 struct dentry *trace_create_file(const char *name,
8644                                  umode_t mode,
8645                                  struct dentry *parent,
8646                                  void *data,
8647                                  const struct file_operations *fops)
8648 {
8649         struct dentry *ret;
8650
8651         ret = tracefs_create_file(name, mode, parent, data, fops);
8652         if (!ret)
8653                 pr_warn("Could not create tracefs '%s' entry\n", name);
8654
8655         return ret;
8656 }
8657
8658
8659 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8660 {
8661         struct dentry *d_tracer;
8662
8663         if (tr->options)
8664                 return tr->options;
8665
8666         d_tracer = tracing_get_dentry(tr);
8667         if (IS_ERR(d_tracer))
8668                 return NULL;
8669
8670         tr->options = tracefs_create_dir("options", d_tracer);
8671         if (!tr->options) {
8672                 pr_warn("Could not create tracefs directory 'options'\n");
8673                 return NULL;
8674         }
8675
8676         return tr->options;
8677 }
8678
8679 static void
8680 create_trace_option_file(struct trace_array *tr,
8681                          struct trace_option_dentry *topt,
8682                          struct tracer_flags *flags,
8683                          struct tracer_opt *opt)
8684 {
8685         struct dentry *t_options;
8686
8687         t_options = trace_options_init_dentry(tr);
8688         if (!t_options)
8689                 return;
8690
8691         topt->flags = flags;
8692         topt->opt = opt;
8693         topt->tr = tr;
8694
8695         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8696                                     &trace_options_fops);
8697
8698 }
8699
8700 static void
8701 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8702 {
8703         struct trace_option_dentry *topts;
8704         struct trace_options *tr_topts;
8705         struct tracer_flags *flags;
8706         struct tracer_opt *opts;
8707         int cnt;
8708         int i;
8709
8710         if (!tracer)
8711                 return;
8712
8713         flags = tracer->flags;
8714
8715         if (!flags || !flags->opts)
8716                 return;
8717
8718         /*
8719          * If this is an instance, only create flags for tracers
8720          * the instance may have.
8721          */
8722         if (!trace_ok_for_array(tracer, tr))
8723                 return;
8724
8725         for (i = 0; i < tr->nr_topts; i++) {
8726                 /* Make sure there's no duplicate flags. */
8727                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8728                         return;
8729         }
8730
8731         opts = flags->opts;
8732
8733         for (cnt = 0; opts[cnt].name; cnt++)
8734                 ;
8735
8736         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8737         if (!topts)
8738                 return;
8739
8740         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8741                             GFP_KERNEL);
8742         if (!tr_topts) {
8743                 kfree(topts);
8744                 return;
8745         }
8746
8747         tr->topts = tr_topts;
8748         tr->topts[tr->nr_topts].tracer = tracer;
8749         tr->topts[tr->nr_topts].topts = topts;
8750         tr->nr_topts++;
8751
8752         for (cnt = 0; opts[cnt].name; cnt++) {
8753                 create_trace_option_file(tr, &topts[cnt], flags,
8754                                          &opts[cnt]);
8755                 MEM_FAIL(topts[cnt].entry == NULL,
8756                           "Failed to create trace option: %s",
8757                           opts[cnt].name);
8758         }
8759 }
8760
8761 static struct dentry *
8762 create_trace_option_core_file(struct trace_array *tr,
8763                               const char *option, long index)
8764 {
8765         struct dentry *t_options;
8766
8767         t_options = trace_options_init_dentry(tr);
8768         if (!t_options)
8769                 return NULL;
8770
8771         return trace_create_file(option, 0644, t_options,
8772                                  (void *)&tr->trace_flags_index[index],
8773                                  &trace_options_core_fops);
8774 }
8775
8776 static void create_trace_options_dir(struct trace_array *tr)
8777 {
8778         struct dentry *t_options;
8779         bool top_level = tr == &global_trace;
8780         int i;
8781
8782         t_options = trace_options_init_dentry(tr);
8783         if (!t_options)
8784                 return;
8785
8786         for (i = 0; trace_options[i]; i++) {
8787                 if (top_level ||
8788                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8789                         create_trace_option_core_file(tr, trace_options[i], i);
8790         }
8791 }
8792
8793 static ssize_t
8794 rb_simple_read(struct file *filp, char __user *ubuf,
8795                size_t cnt, loff_t *ppos)
8796 {
8797         struct trace_array *tr = filp->private_data;
8798         char buf[64];
8799         int r;
8800
8801         r = tracer_tracing_is_on(tr);
8802         r = sprintf(buf, "%d\n", r);
8803
8804         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8805 }
8806
8807 static ssize_t
8808 rb_simple_write(struct file *filp, const char __user *ubuf,
8809                 size_t cnt, loff_t *ppos)
8810 {
8811         struct trace_array *tr = filp->private_data;
8812         struct trace_buffer *buffer = tr->array_buffer.buffer;
8813         unsigned long val;
8814         int ret;
8815
8816         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8817         if (ret)
8818                 return ret;
8819
8820         if (buffer) {
8821                 mutex_lock(&trace_types_lock);
8822                 if (!!val == tracer_tracing_is_on(tr)) {
8823                         val = 0; /* do nothing */
8824                 } else if (val) {
8825                         tracer_tracing_on(tr);
8826                         if (tr->current_trace->start)
8827                                 tr->current_trace->start(tr);
8828                 } else {
8829                         tracer_tracing_off(tr);
8830                         if (tr->current_trace->stop)
8831                                 tr->current_trace->stop(tr);
8832                 }
8833                 mutex_unlock(&trace_types_lock);
8834         }
8835
8836         (*ppos)++;
8837
8838         return cnt;
8839 }
8840
8841 static const struct file_operations rb_simple_fops = {
8842         .open           = tracing_open_generic_tr,
8843         .read           = rb_simple_read,
8844         .write          = rb_simple_write,
8845         .release        = tracing_release_generic_tr,
8846         .llseek         = default_llseek,
8847 };
8848
8849 static ssize_t
8850 buffer_percent_read(struct file *filp, char __user *ubuf,
8851                     size_t cnt, loff_t *ppos)
8852 {
8853         struct trace_array *tr = filp->private_data;
8854         char buf[64];
8855         int r;
8856
8857         r = tr->buffer_percent;
8858         r = sprintf(buf, "%d\n", r);
8859
8860         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8861 }
8862
8863 static ssize_t
8864 buffer_percent_write(struct file *filp, const char __user *ubuf,
8865                      size_t cnt, loff_t *ppos)
8866 {
8867         struct trace_array *tr = filp->private_data;
8868         unsigned long val;
8869         int ret;
8870
8871         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8872         if (ret)
8873                 return ret;
8874
8875         if (val > 100)
8876                 return -EINVAL;
8877
8878         if (!val)
8879                 val = 1;
8880
8881         tr->buffer_percent = val;
8882
8883         (*ppos)++;
8884
8885         return cnt;
8886 }
8887
8888 static const struct file_operations buffer_percent_fops = {
8889         .open           = tracing_open_generic_tr,
8890         .read           = buffer_percent_read,
8891         .write          = buffer_percent_write,
8892         .release        = tracing_release_generic_tr,
8893         .llseek         = default_llseek,
8894 };
8895
8896 static struct dentry *trace_instance_dir;
8897
8898 static void
8899 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8900
8901 static int
8902 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8903 {
8904         enum ring_buffer_flags rb_flags;
8905
8906         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8907
8908         buf->tr = tr;
8909
8910         buf->buffer = ring_buffer_alloc(size, rb_flags);
8911         if (!buf->buffer)
8912                 return -ENOMEM;
8913
8914         buf->data = alloc_percpu(struct trace_array_cpu);
8915         if (!buf->data) {
8916                 ring_buffer_free(buf->buffer);
8917                 buf->buffer = NULL;
8918                 return -ENOMEM;
8919         }
8920
8921         /* Allocate the first page for all buffers */
8922         set_buffer_entries(&tr->array_buffer,
8923                            ring_buffer_size(tr->array_buffer.buffer, 0));
8924
8925         return 0;
8926 }
8927
8928 static int allocate_trace_buffers(struct trace_array *tr, int size)
8929 {
8930         int ret;
8931
8932         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8933         if (ret)
8934                 return ret;
8935
8936 #ifdef CONFIG_TRACER_MAX_TRACE
8937         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8938                                     allocate_snapshot ? size : 1);
8939         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8940                 ring_buffer_free(tr->array_buffer.buffer);
8941                 tr->array_buffer.buffer = NULL;
8942                 free_percpu(tr->array_buffer.data);
8943                 tr->array_buffer.data = NULL;
8944                 return -ENOMEM;
8945         }
8946         tr->allocated_snapshot = allocate_snapshot;
8947
8948         /*
8949          * Only the top level trace array gets its snapshot allocated
8950          * from the kernel command line.
8951          */
8952         allocate_snapshot = false;
8953 #endif
8954
8955         return 0;
8956 }
8957
8958 static void free_trace_buffer(struct array_buffer *buf)
8959 {
8960         if (buf->buffer) {
8961                 ring_buffer_free(buf->buffer);
8962                 buf->buffer = NULL;
8963                 free_percpu(buf->data);
8964                 buf->data = NULL;
8965         }
8966 }
8967
8968 static void free_trace_buffers(struct trace_array *tr)
8969 {
8970         if (!tr)
8971                 return;
8972
8973         free_trace_buffer(&tr->array_buffer);
8974
8975 #ifdef CONFIG_TRACER_MAX_TRACE
8976         free_trace_buffer(&tr->max_buffer);
8977 #endif
8978 }
8979
8980 static void init_trace_flags_index(struct trace_array *tr)
8981 {
8982         int i;
8983
8984         /* Used by the trace options files */
8985         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8986                 tr->trace_flags_index[i] = i;
8987 }
8988
8989 static void __update_tracer_options(struct trace_array *tr)
8990 {
8991         struct tracer *t;
8992
8993         for (t = trace_types; t; t = t->next)
8994                 add_tracer_options(tr, t);
8995 }
8996
8997 static void update_tracer_options(struct trace_array *tr)
8998 {
8999         mutex_lock(&trace_types_lock);
9000         __update_tracer_options(tr);
9001         mutex_unlock(&trace_types_lock);
9002 }
9003
9004 /* Must have trace_types_lock held */
9005 struct trace_array *trace_array_find(const char *instance)
9006 {
9007         struct trace_array *tr, *found = NULL;
9008
9009         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9010                 if (tr->name && strcmp(tr->name, instance) == 0) {
9011                         found = tr;
9012                         break;
9013                 }
9014         }
9015
9016         return found;
9017 }
9018
9019 struct trace_array *trace_array_find_get(const char *instance)
9020 {
9021         struct trace_array *tr;
9022
9023         mutex_lock(&trace_types_lock);
9024         tr = trace_array_find(instance);
9025         if (tr)
9026                 tr->ref++;
9027         mutex_unlock(&trace_types_lock);
9028
9029         return tr;
9030 }
9031
9032 static int trace_array_create_dir(struct trace_array *tr)
9033 {
9034         int ret;
9035
9036         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9037         if (!tr->dir)
9038                 return -EINVAL;
9039
9040         ret = event_trace_add_tracer(tr->dir, tr);
9041         if (ret)
9042                 tracefs_remove(tr->dir);
9043
9044         init_tracer_tracefs(tr, tr->dir);
9045         __update_tracer_options(tr);
9046
9047         return ret;
9048 }
9049
9050 static struct trace_array *trace_array_create(const char *name)
9051 {
9052         struct trace_array *tr;
9053         int ret;
9054
9055         ret = -ENOMEM;
9056         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9057         if (!tr)
9058                 return ERR_PTR(ret);
9059
9060         tr->name = kstrdup(name, GFP_KERNEL);
9061         if (!tr->name)
9062                 goto out_free_tr;
9063
9064         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9065                 goto out_free_tr;
9066
9067         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9068
9069         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9070
9071         raw_spin_lock_init(&tr->start_lock);
9072
9073         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9074
9075         tr->current_trace = &nop_trace;
9076
9077         INIT_LIST_HEAD(&tr->systems);
9078         INIT_LIST_HEAD(&tr->events);
9079         INIT_LIST_HEAD(&tr->hist_vars);
9080         INIT_LIST_HEAD(&tr->err_log);
9081
9082         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9083                 goto out_free_tr;
9084
9085         if (ftrace_allocate_ftrace_ops(tr) < 0)
9086                 goto out_free_tr;
9087
9088         ftrace_init_trace_array(tr);
9089
9090         init_trace_flags_index(tr);
9091
9092         if (trace_instance_dir) {
9093                 ret = trace_array_create_dir(tr);
9094                 if (ret)
9095                         goto out_free_tr;
9096         } else
9097                 __trace_early_add_events(tr);
9098
9099         list_add(&tr->list, &ftrace_trace_arrays);
9100
9101         tr->ref++;
9102
9103         return tr;
9104
9105  out_free_tr:
9106         ftrace_free_ftrace_ops(tr);
9107         free_trace_buffers(tr);
9108         free_cpumask_var(tr->tracing_cpumask);
9109         kfree(tr->name);
9110         kfree(tr);
9111
9112         return ERR_PTR(ret);
9113 }
9114
9115 static int instance_mkdir(const char *name)
9116 {
9117         struct trace_array *tr;
9118         int ret;
9119
9120         mutex_lock(&event_mutex);
9121         mutex_lock(&trace_types_lock);
9122
9123         ret = -EEXIST;
9124         if (trace_array_find(name))
9125                 goto out_unlock;
9126
9127         tr = trace_array_create(name);
9128
9129         ret = PTR_ERR_OR_ZERO(tr);
9130
9131 out_unlock:
9132         mutex_unlock(&trace_types_lock);
9133         mutex_unlock(&event_mutex);
9134         return ret;
9135 }
9136
9137 /**
9138  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9139  * @name: The name of the trace array to be looked up/created.
9140  *
9141  * Returns pointer to trace array with given name.
9142  * NULL, if it cannot be created.
9143  *
9144  * NOTE: This function increments the reference counter associated with the
9145  * trace array returned. This makes sure it cannot be freed while in use.
9146  * Use trace_array_put() once the trace array is no longer needed.
9147  * If the trace_array is to be freed, trace_array_destroy() needs to
9148  * be called after the trace_array_put(), or simply let user space delete
9149  * it from the tracefs instances directory. But until the
9150  * trace_array_put() is called, user space can not delete it.
9151  *
9152  */
9153 struct trace_array *trace_array_get_by_name(const char *name)
9154 {
9155         struct trace_array *tr;
9156
9157         mutex_lock(&event_mutex);
9158         mutex_lock(&trace_types_lock);
9159
9160         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9161                 if (tr->name && strcmp(tr->name, name) == 0)
9162                         goto out_unlock;
9163         }
9164
9165         tr = trace_array_create(name);
9166
9167         if (IS_ERR(tr))
9168                 tr = NULL;
9169 out_unlock:
9170         if (tr)
9171                 tr->ref++;
9172
9173         mutex_unlock(&trace_types_lock);
9174         mutex_unlock(&event_mutex);
9175         return tr;
9176 }
9177 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9178
9179 static int __remove_instance(struct trace_array *tr)
9180 {
9181         int i;
9182
9183         /* Reference counter for a newly created trace array = 1. */
9184         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9185                 return -EBUSY;
9186
9187         list_del(&tr->list);
9188
9189         /* Disable all the flags that were enabled coming in */
9190         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9191                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9192                         set_tracer_flag(tr, 1 << i, 0);
9193         }
9194
9195         tracing_set_nop(tr);
9196         clear_ftrace_function_probes(tr);
9197         event_trace_del_tracer(tr);
9198         ftrace_clear_pids(tr);
9199         ftrace_destroy_function_files(tr);
9200         tracefs_remove(tr->dir);
9201         free_percpu(tr->last_func_repeats);
9202         free_trace_buffers(tr);
9203
9204         for (i = 0; i < tr->nr_topts; i++) {
9205                 kfree(tr->topts[i].topts);
9206         }
9207         kfree(tr->topts);
9208
9209         free_cpumask_var(tr->tracing_cpumask);
9210         kfree(tr->name);
9211         kfree(tr);
9212
9213         return 0;
9214 }
9215
9216 int trace_array_destroy(struct trace_array *this_tr)
9217 {
9218         struct trace_array *tr;
9219         int ret;
9220
9221         if (!this_tr)
9222                 return -EINVAL;
9223
9224         mutex_lock(&event_mutex);
9225         mutex_lock(&trace_types_lock);
9226
9227         ret = -ENODEV;
9228
9229         /* Making sure trace array exists before destroying it. */
9230         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9231                 if (tr == this_tr) {
9232                         ret = __remove_instance(tr);
9233                         break;
9234                 }
9235         }
9236
9237         mutex_unlock(&trace_types_lock);
9238         mutex_unlock(&event_mutex);
9239
9240         return ret;
9241 }
9242 EXPORT_SYMBOL_GPL(trace_array_destroy);
9243
9244 static int instance_rmdir(const char *name)
9245 {
9246         struct trace_array *tr;
9247         int ret;
9248
9249         mutex_lock(&event_mutex);
9250         mutex_lock(&trace_types_lock);
9251
9252         ret = -ENODEV;
9253         tr = trace_array_find(name);
9254         if (tr)
9255                 ret = __remove_instance(tr);
9256
9257         mutex_unlock(&trace_types_lock);
9258         mutex_unlock(&event_mutex);
9259
9260         return ret;
9261 }
9262
9263 static __init void create_trace_instances(struct dentry *d_tracer)
9264 {
9265         struct trace_array *tr;
9266
9267         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9268                                                          instance_mkdir,
9269                                                          instance_rmdir);
9270         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9271                 return;
9272
9273         mutex_lock(&event_mutex);
9274         mutex_lock(&trace_types_lock);
9275
9276         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9277                 if (!tr->name)
9278                         continue;
9279                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9280                              "Failed to create instance directory\n"))
9281                         break;
9282         }
9283
9284         mutex_unlock(&trace_types_lock);
9285         mutex_unlock(&event_mutex);
9286 }
9287
9288 static void
9289 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9290 {
9291         struct trace_event_file *file;
9292         int cpu;
9293
9294         trace_create_file("available_tracers", 0444, d_tracer,
9295                         tr, &show_traces_fops);
9296
9297         trace_create_file("current_tracer", 0644, d_tracer,
9298                         tr, &set_tracer_fops);
9299
9300         trace_create_file("tracing_cpumask", 0644, d_tracer,
9301                           tr, &tracing_cpumask_fops);
9302
9303         trace_create_file("trace_options", 0644, d_tracer,
9304                           tr, &tracing_iter_fops);
9305
9306         trace_create_file("trace", 0644, d_tracer,
9307                           tr, &tracing_fops);
9308
9309         trace_create_file("trace_pipe", 0444, d_tracer,
9310                           tr, &tracing_pipe_fops);
9311
9312         trace_create_file("buffer_size_kb", 0644, d_tracer,
9313                           tr, &tracing_entries_fops);
9314
9315         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9316                           tr, &tracing_total_entries_fops);
9317
9318         trace_create_file("free_buffer", 0200, d_tracer,
9319                           tr, &tracing_free_buffer_fops);
9320
9321         trace_create_file("trace_marker", 0220, d_tracer,
9322                           tr, &tracing_mark_fops);
9323
9324         file = __find_event_file(tr, "ftrace", "print");
9325         if (file && file->dir)
9326                 trace_create_file("trigger", 0644, file->dir, file,
9327                                   &event_trigger_fops);
9328         tr->trace_marker_file = file;
9329
9330         trace_create_file("trace_marker_raw", 0220, d_tracer,
9331                           tr, &tracing_mark_raw_fops);
9332
9333         trace_create_file("trace_clock", 0644, d_tracer, tr,
9334                           &trace_clock_fops);
9335
9336         trace_create_file("tracing_on", 0644, d_tracer,
9337                           tr, &rb_simple_fops);
9338
9339         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9340                           &trace_time_stamp_mode_fops);
9341
9342         tr->buffer_percent = 50;
9343
9344         trace_create_file("buffer_percent", 0444, d_tracer,
9345                         tr, &buffer_percent_fops);
9346
9347         create_trace_options_dir(tr);
9348
9349 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9350         trace_create_maxlat_file(tr, d_tracer);
9351 #endif
9352
9353         if (ftrace_create_function_files(tr, d_tracer))
9354                 MEM_FAIL(1, "Could not allocate function filter files");
9355
9356 #ifdef CONFIG_TRACER_SNAPSHOT
9357         trace_create_file("snapshot", 0644, d_tracer,
9358                           tr, &snapshot_fops);
9359 #endif
9360
9361         trace_create_file("error_log", 0644, d_tracer,
9362                           tr, &tracing_err_log_fops);
9363
9364         for_each_tracing_cpu(cpu)
9365                 tracing_init_tracefs_percpu(tr, cpu);
9366
9367         ftrace_init_tracefs(tr, d_tracer);
9368 }
9369
9370 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9371 {
9372         struct vfsmount *mnt;
9373         struct file_system_type *type;
9374
9375         /*
9376          * To maintain backward compatibility for tools that mount
9377          * debugfs to get to the tracing facility, tracefs is automatically
9378          * mounted to the debugfs/tracing directory.
9379          */
9380         type = get_fs_type("tracefs");
9381         if (!type)
9382                 return NULL;
9383         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9384         put_filesystem(type);
9385         if (IS_ERR(mnt))
9386                 return NULL;
9387         mntget(mnt);
9388
9389         return mnt;
9390 }
9391
9392 /**
9393  * tracing_init_dentry - initialize top level trace array
9394  *
9395  * This is called when creating files or directories in the tracing
9396  * directory. It is called via fs_initcall() by any of the boot up code
9397  * and expects to return the dentry of the top level tracing directory.
9398  */
9399 int tracing_init_dentry(void)
9400 {
9401         struct trace_array *tr = &global_trace;
9402
9403         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9404                 pr_warn("Tracing disabled due to lockdown\n");
9405                 return -EPERM;
9406         }
9407
9408         /* The top level trace array uses  NULL as parent */
9409         if (tr->dir)
9410                 return 0;
9411
9412         if (WARN_ON(!tracefs_initialized()))
9413                 return -ENODEV;
9414
9415         /*
9416          * As there may still be users that expect the tracing
9417          * files to exist in debugfs/tracing, we must automount
9418          * the tracefs file system there, so older tools still
9419          * work with the newer kernel.
9420          */
9421         tr->dir = debugfs_create_automount("tracing", NULL,
9422                                            trace_automount, NULL);
9423
9424         return 0;
9425 }
9426
9427 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9428 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9429
9430 static struct workqueue_struct *eval_map_wq __initdata;
9431 static struct work_struct eval_map_work __initdata;
9432
9433 static void __init eval_map_work_func(struct work_struct *work)
9434 {
9435         int len;
9436
9437         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9438         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9439 }
9440
9441 static int __init trace_eval_init(void)
9442 {
9443         INIT_WORK(&eval_map_work, eval_map_work_func);
9444
9445         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9446         if (!eval_map_wq) {
9447                 pr_err("Unable to allocate eval_map_wq\n");
9448                 /* Do work here */
9449                 eval_map_work_func(&eval_map_work);
9450                 return -ENOMEM;
9451         }
9452
9453         queue_work(eval_map_wq, &eval_map_work);
9454         return 0;
9455 }
9456
9457 static int __init trace_eval_sync(void)
9458 {
9459         /* Make sure the eval map updates are finished */
9460         if (eval_map_wq)
9461                 destroy_workqueue(eval_map_wq);
9462         return 0;
9463 }
9464
9465 late_initcall_sync(trace_eval_sync);
9466
9467
9468 #ifdef CONFIG_MODULES
9469 static void trace_module_add_evals(struct module *mod)
9470 {
9471         if (!mod->num_trace_evals)
9472                 return;
9473
9474         /*
9475          * Modules with bad taint do not have events created, do
9476          * not bother with enums either.
9477          */
9478         if (trace_module_has_bad_taint(mod))
9479                 return;
9480
9481         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9482 }
9483
9484 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9485 static void trace_module_remove_evals(struct module *mod)
9486 {
9487         union trace_eval_map_item *map;
9488         union trace_eval_map_item **last = &trace_eval_maps;
9489
9490         if (!mod->num_trace_evals)
9491                 return;
9492
9493         mutex_lock(&trace_eval_mutex);
9494
9495         map = trace_eval_maps;
9496
9497         while (map) {
9498                 if (map->head.mod == mod)
9499                         break;
9500                 map = trace_eval_jmp_to_tail(map);
9501                 last = &map->tail.next;
9502                 map = map->tail.next;
9503         }
9504         if (!map)
9505                 goto out;
9506
9507         *last = trace_eval_jmp_to_tail(map)->tail.next;
9508         kfree(map);
9509  out:
9510         mutex_unlock(&trace_eval_mutex);
9511 }
9512 #else
9513 static inline void trace_module_remove_evals(struct module *mod) { }
9514 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9515
9516 static int trace_module_notify(struct notifier_block *self,
9517                                unsigned long val, void *data)
9518 {
9519         struct module *mod = data;
9520
9521         switch (val) {
9522         case MODULE_STATE_COMING:
9523                 trace_module_add_evals(mod);
9524                 break;
9525         case MODULE_STATE_GOING:
9526                 trace_module_remove_evals(mod);
9527                 break;
9528         }
9529
9530         return NOTIFY_OK;
9531 }
9532
9533 static struct notifier_block trace_module_nb = {
9534         .notifier_call = trace_module_notify,
9535         .priority = 0,
9536 };
9537 #endif /* CONFIG_MODULES */
9538
9539 static __init int tracer_init_tracefs(void)
9540 {
9541         int ret;
9542
9543         trace_access_lock_init();
9544
9545         ret = tracing_init_dentry();
9546         if (ret)
9547                 return 0;
9548
9549         event_trace_init();
9550
9551         init_tracer_tracefs(&global_trace, NULL);
9552         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9553
9554         trace_create_file("tracing_thresh", 0644, NULL,
9555                         &global_trace, &tracing_thresh_fops);
9556
9557         trace_create_file("README", 0444, NULL,
9558                         NULL, &tracing_readme_fops);
9559
9560         trace_create_file("saved_cmdlines", 0444, NULL,
9561                         NULL, &tracing_saved_cmdlines_fops);
9562
9563         trace_create_file("saved_cmdlines_size", 0644, NULL,
9564                           NULL, &tracing_saved_cmdlines_size_fops);
9565
9566         trace_create_file("saved_tgids", 0444, NULL,
9567                         NULL, &tracing_saved_tgids_fops);
9568
9569         trace_eval_init();
9570
9571         trace_create_eval_file(NULL);
9572
9573 #ifdef CONFIG_MODULES
9574         register_module_notifier(&trace_module_nb);
9575 #endif
9576
9577 #ifdef CONFIG_DYNAMIC_FTRACE
9578         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9579                         NULL, &tracing_dyn_info_fops);
9580 #endif
9581
9582         create_trace_instances(NULL);
9583
9584         update_tracer_options(&global_trace);
9585
9586         return 0;
9587 }
9588
9589 fs_initcall(tracer_init_tracefs);
9590
9591 static int trace_panic_handler(struct notifier_block *this,
9592                                unsigned long event, void *unused)
9593 {
9594         if (ftrace_dump_on_oops)
9595                 ftrace_dump(ftrace_dump_on_oops);
9596         return NOTIFY_OK;
9597 }
9598
9599 static struct notifier_block trace_panic_notifier = {
9600         .notifier_call  = trace_panic_handler,
9601         .next           = NULL,
9602         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9603 };
9604
9605 static int trace_die_handler(struct notifier_block *self,
9606                              unsigned long val,
9607                              void *data)
9608 {
9609         switch (val) {
9610         case DIE_OOPS:
9611                 if (ftrace_dump_on_oops)
9612                         ftrace_dump(ftrace_dump_on_oops);
9613                 break;
9614         default:
9615                 break;
9616         }
9617         return NOTIFY_OK;
9618 }
9619
9620 static struct notifier_block trace_die_notifier = {
9621         .notifier_call = trace_die_handler,
9622         .priority = 200
9623 };
9624
9625 /*
9626  * printk is set to max of 1024, we really don't need it that big.
9627  * Nothing should be printing 1000 characters anyway.
9628  */
9629 #define TRACE_MAX_PRINT         1000
9630
9631 /*
9632  * Define here KERN_TRACE so that we have one place to modify
9633  * it if we decide to change what log level the ftrace dump
9634  * should be at.
9635  */
9636 #define KERN_TRACE              KERN_EMERG
9637
9638 void
9639 trace_printk_seq(struct trace_seq *s)
9640 {
9641         /* Probably should print a warning here. */
9642         if (s->seq.len >= TRACE_MAX_PRINT)
9643                 s->seq.len = TRACE_MAX_PRINT;
9644
9645         /*
9646          * More paranoid code. Although the buffer size is set to
9647          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9648          * an extra layer of protection.
9649          */
9650         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9651                 s->seq.len = s->seq.size - 1;
9652
9653         /* should be zero ended, but we are paranoid. */
9654         s->buffer[s->seq.len] = 0;
9655
9656         printk(KERN_TRACE "%s", s->buffer);
9657
9658         trace_seq_init(s);
9659 }
9660
9661 void trace_init_global_iter(struct trace_iterator *iter)
9662 {
9663         iter->tr = &global_trace;
9664         iter->trace = iter->tr->current_trace;
9665         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9666         iter->array_buffer = &global_trace.array_buffer;
9667
9668         if (iter->trace && iter->trace->open)
9669                 iter->trace->open(iter);
9670
9671         /* Annotate start of buffers if we had overruns */
9672         if (ring_buffer_overruns(iter->array_buffer->buffer))
9673                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9674
9675         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9676         if (trace_clocks[iter->tr->clock_id].in_ns)
9677                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9678 }
9679
9680 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9681 {
9682         /* use static because iter can be a bit big for the stack */
9683         static struct trace_iterator iter;
9684         static atomic_t dump_running;
9685         struct trace_array *tr = &global_trace;
9686         unsigned int old_userobj;
9687         unsigned long flags;
9688         int cnt = 0, cpu;
9689
9690         /* Only allow one dump user at a time. */
9691         if (atomic_inc_return(&dump_running) != 1) {
9692                 atomic_dec(&dump_running);
9693                 return;
9694         }
9695
9696         /*
9697          * Always turn off tracing when we dump.
9698          * We don't need to show trace output of what happens
9699          * between multiple crashes.
9700          *
9701          * If the user does a sysrq-z, then they can re-enable
9702          * tracing with echo 1 > tracing_on.
9703          */
9704         tracing_off();
9705
9706         local_irq_save(flags);
9707         printk_nmi_direct_enter();
9708
9709         /* Simulate the iterator */
9710         trace_init_global_iter(&iter);
9711         /* Can not use kmalloc for iter.temp and iter.fmt */
9712         iter.temp = static_temp_buf;
9713         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9714         iter.fmt = static_fmt_buf;
9715         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9716
9717         for_each_tracing_cpu(cpu) {
9718                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9719         }
9720
9721         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9722
9723         /* don't look at user memory in panic mode */
9724         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9725
9726         switch (oops_dump_mode) {
9727         case DUMP_ALL:
9728                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9729                 break;
9730         case DUMP_ORIG:
9731                 iter.cpu_file = raw_smp_processor_id();
9732                 break;
9733         case DUMP_NONE:
9734                 goto out_enable;
9735         default:
9736                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9737                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9738         }
9739
9740         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9741
9742         /* Did function tracer already get disabled? */
9743         if (ftrace_is_dead()) {
9744                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9745                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9746         }
9747
9748         /*
9749          * We need to stop all tracing on all CPUS to read
9750          * the next buffer. This is a bit expensive, but is
9751          * not done often. We fill all what we can read,
9752          * and then release the locks again.
9753          */
9754
9755         while (!trace_empty(&iter)) {
9756
9757                 if (!cnt)
9758                         printk(KERN_TRACE "---------------------------------\n");
9759
9760                 cnt++;
9761
9762                 trace_iterator_reset(&iter);
9763                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9764
9765                 if (trace_find_next_entry_inc(&iter) != NULL) {
9766                         int ret;
9767
9768                         ret = print_trace_line(&iter);
9769                         if (ret != TRACE_TYPE_NO_CONSUME)
9770                                 trace_consume(&iter);
9771                 }
9772                 touch_nmi_watchdog();
9773
9774                 trace_printk_seq(&iter.seq);
9775         }
9776
9777         if (!cnt)
9778                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9779         else
9780                 printk(KERN_TRACE "---------------------------------\n");
9781
9782  out_enable:
9783         tr->trace_flags |= old_userobj;
9784
9785         for_each_tracing_cpu(cpu) {
9786                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9787         }
9788         atomic_dec(&dump_running);
9789         printk_nmi_direct_exit();
9790         local_irq_restore(flags);
9791 }
9792 EXPORT_SYMBOL_GPL(ftrace_dump);
9793
9794 #define WRITE_BUFSIZE  4096
9795
9796 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9797                                 size_t count, loff_t *ppos,
9798                                 int (*createfn)(const char *))
9799 {
9800         char *kbuf, *buf, *tmp;
9801         int ret = 0;
9802         size_t done = 0;
9803         size_t size;
9804
9805         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9806         if (!kbuf)
9807                 return -ENOMEM;
9808
9809         while (done < count) {
9810                 size = count - done;
9811
9812                 if (size >= WRITE_BUFSIZE)
9813                         size = WRITE_BUFSIZE - 1;
9814
9815                 if (copy_from_user(kbuf, buffer + done, size)) {
9816                         ret = -EFAULT;
9817                         goto out;
9818                 }
9819                 kbuf[size] = '\0';
9820                 buf = kbuf;
9821                 do {
9822                         tmp = strchr(buf, '\n');
9823                         if (tmp) {
9824                                 *tmp = '\0';
9825                                 size = tmp - buf + 1;
9826                         } else {
9827                                 size = strlen(buf);
9828                                 if (done + size < count) {
9829                                         if (buf != kbuf)
9830                                                 break;
9831                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9832                                         pr_warn("Line length is too long: Should be less than %d\n",
9833                                                 WRITE_BUFSIZE - 2);
9834                                         ret = -EINVAL;
9835                                         goto out;
9836                                 }
9837                         }
9838                         done += size;
9839
9840                         /* Remove comments */
9841                         tmp = strchr(buf, '#');
9842
9843                         if (tmp)
9844                                 *tmp = '\0';
9845
9846                         ret = createfn(buf);
9847                         if (ret)
9848                                 goto out;
9849                         buf += size;
9850
9851                 } while (done < count);
9852         }
9853         ret = done;
9854
9855 out:
9856         kfree(kbuf);
9857
9858         return ret;
9859 }
9860
9861 __init static int tracer_alloc_buffers(void)
9862 {
9863         int ring_buf_size;
9864         int ret = -ENOMEM;
9865
9866
9867         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9868                 pr_warn("Tracing disabled due to lockdown\n");
9869                 return -EPERM;
9870         }
9871
9872         /*
9873          * Make sure we don't accidentally add more trace options
9874          * than we have bits for.
9875          */
9876         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9877
9878         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9879                 goto out;
9880
9881         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9882                 goto out_free_buffer_mask;
9883
9884         /* Only allocate trace_printk buffers if a trace_printk exists */
9885         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9886                 /* Must be called before global_trace.buffer is allocated */
9887                 trace_printk_init_buffers();
9888
9889         /* To save memory, keep the ring buffer size to its minimum */
9890         if (ring_buffer_expanded)
9891                 ring_buf_size = trace_buf_size;
9892         else
9893                 ring_buf_size = 1;
9894
9895         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9896         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9897
9898         raw_spin_lock_init(&global_trace.start_lock);
9899
9900         /*
9901          * The prepare callbacks allocates some memory for the ring buffer. We
9902          * don't free the buffer if the CPU goes down. If we were to free
9903          * the buffer, then the user would lose any trace that was in the
9904          * buffer. The memory will be removed once the "instance" is removed.
9905          */
9906         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9907                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9908                                       NULL);
9909         if (ret < 0)
9910                 goto out_free_cpumask;
9911         /* Used for event triggers */
9912         ret = -ENOMEM;
9913         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9914         if (!temp_buffer)
9915                 goto out_rm_hp_state;
9916
9917         if (trace_create_savedcmd() < 0)
9918                 goto out_free_temp_buffer;
9919
9920         /* TODO: make the number of buffers hot pluggable with CPUS */
9921         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9922                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9923                 goto out_free_savedcmd;
9924         }
9925
9926         if (global_trace.buffer_disabled)
9927                 tracing_off();
9928
9929         if (trace_boot_clock) {
9930                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9931                 if (ret < 0)
9932                         pr_warn("Trace clock %s not defined, going back to default\n",
9933                                 trace_boot_clock);
9934         }
9935
9936         /*
9937          * register_tracer() might reference current_trace, so it
9938          * needs to be set before we register anything. This is
9939          * just a bootstrap of current_trace anyway.
9940          */
9941         global_trace.current_trace = &nop_trace;
9942
9943         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9944
9945         ftrace_init_global_array_ops(&global_trace);
9946
9947         init_trace_flags_index(&global_trace);
9948
9949         register_tracer(&nop_trace);
9950
9951         /* Function tracing may start here (via kernel command line) */
9952         init_function_trace();
9953
9954         /* All seems OK, enable tracing */
9955         tracing_disabled = 0;
9956
9957         atomic_notifier_chain_register(&panic_notifier_list,
9958                                        &trace_panic_notifier);
9959
9960         register_die_notifier(&trace_die_notifier);
9961
9962         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9963
9964         INIT_LIST_HEAD(&global_trace.systems);
9965         INIT_LIST_HEAD(&global_trace.events);
9966         INIT_LIST_HEAD(&global_trace.hist_vars);
9967         INIT_LIST_HEAD(&global_trace.err_log);
9968         list_add(&global_trace.list, &ftrace_trace_arrays);
9969
9970         apply_trace_boot_options();
9971
9972         register_snapshot_cmd();
9973
9974         test_can_verify();
9975
9976         return 0;
9977
9978 out_free_savedcmd:
9979         free_saved_cmdlines_buffer(savedcmd);
9980 out_free_temp_buffer:
9981         ring_buffer_free(temp_buffer);
9982 out_rm_hp_state:
9983         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9984 out_free_cpumask:
9985         free_cpumask_var(global_trace.tracing_cpumask);
9986 out_free_buffer_mask:
9987         free_cpumask_var(tracing_buffer_mask);
9988 out:
9989         return ret;
9990 }
9991
9992 void __init early_trace_init(void)
9993 {
9994         if (tracepoint_printk) {
9995                 tracepoint_print_iter =
9996                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9997                 if (MEM_FAIL(!tracepoint_print_iter,
9998                              "Failed to allocate trace iterator\n"))
9999                         tracepoint_printk = 0;
10000                 else
10001                         static_key_enable(&tracepoint_printk_key.key);
10002         }
10003         tracer_alloc_buffers();
10004 }
10005
10006 void __init trace_init(void)
10007 {
10008         trace_event_init();
10009 }
10010
10011 __init static void clear_boot_tracer(void)
10012 {
10013         /*
10014          * The default tracer at boot buffer is an init section.
10015          * This function is called in lateinit. If we did not
10016          * find the boot tracer, then clear it out, to prevent
10017          * later registration from accessing the buffer that is
10018          * about to be freed.
10019          */
10020         if (!default_bootup_tracer)
10021                 return;
10022
10023         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10024                default_bootup_tracer);
10025         default_bootup_tracer = NULL;
10026 }
10027
10028 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10029 __init static void tracing_set_default_clock(void)
10030 {
10031         /* sched_clock_stable() is determined in late_initcall */
10032         if (!trace_boot_clock && !sched_clock_stable()) {
10033                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10034                         pr_warn("Can not set tracing clock due to lockdown\n");
10035                         return;
10036                 }
10037
10038                 printk(KERN_WARNING
10039                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10040                        "If you want to keep using the local clock, then add:\n"
10041                        "  \"trace_clock=local\"\n"
10042                        "on the kernel command line\n");
10043                 tracing_set_clock(&global_trace, "global");
10044         }
10045 }
10046 #else
10047 static inline void tracing_set_default_clock(void) { }
10048 #endif
10049
10050 __init static int late_trace_init(void)
10051 {
10052         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10053                 static_key_disable(&tracepoint_printk_key.key);
10054                 tracepoint_printk = 0;
10055         }
10056
10057         tracing_set_default_clock();
10058         clear_boot_tracer();
10059         return 0;
10060 }
10061
10062 late_initcall_sync(late_trace_init);