tracing: Resize tgid_map to pid_max, not PID_MAX_DEFAULT
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187
188 static int __init set_cmdline_ftrace(char *str)
189 {
190         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191         default_bootup_tracer = bootup_tracer_buf;
192         /* We are using ftrace early, expand it */
193         ring_buffer_expanded = true;
194         return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200         if (*str++ != '=' || !*str || !strcmp("1", str)) {
201                 ftrace_dump_on_oops = DUMP_ALL;
202                 return 1;
203         }
204
205         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
206                 ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213
214 static int __init stop_trace_on_warning(char *str)
215 {
216         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217                 __disable_trace_on_warning = 1;
218         return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221
222 static int __init boot_alloc_snapshot(char *str)
223 {
224         allocate_snapshot = true;
225         /* We also need the main ring buffer expanded */
226         ring_buffer_expanded = true;
227         return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230
231
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233
234 static int __init set_trace_boot_options(char *str)
235 {
236         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237         return 0;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243
244 static int __init set_trace_boot_clock(char *str)
245 {
246         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247         trace_boot_clock = trace_boot_clock_buf;
248         return 0;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251
252 static int __init set_tracepoint_printk(char *str)
253 {
254         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
255                 tracepoint_printk = 1;
256         return 1;
257 }
258 __setup("tp_printk", set_tracepoint_printk);
259
260 static int __init set_tracepoint_printk_stop(char *str)
261 {
262         tracepoint_printk_stop_on_boot = true;
263         return 1;
264 }
265 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
266
267 unsigned long long ns2usecs(u64 nsec)
268 {
269         nsec += 500;
270         do_div(nsec, 1000);
271         return nsec;
272 }
273
274 static void
275 trace_process_export(struct trace_export *export,
276                struct ring_buffer_event *event, int flag)
277 {
278         struct trace_entry *entry;
279         unsigned int size = 0;
280
281         if (export->flags & flag) {
282                 entry = ring_buffer_event_data(event);
283                 size = ring_buffer_event_length(event);
284                 export->write(export, entry, size);
285         }
286 }
287
288 static DEFINE_MUTEX(ftrace_export_lock);
289
290 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
291
292 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
293 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
295
296 static inline void ftrace_exports_enable(struct trace_export *export)
297 {
298         if (export->flags & TRACE_EXPORT_FUNCTION)
299                 static_branch_inc(&trace_function_exports_enabled);
300
301         if (export->flags & TRACE_EXPORT_EVENT)
302                 static_branch_inc(&trace_event_exports_enabled);
303
304         if (export->flags & TRACE_EXPORT_MARKER)
305                 static_branch_inc(&trace_marker_exports_enabled);
306 }
307
308 static inline void ftrace_exports_disable(struct trace_export *export)
309 {
310         if (export->flags & TRACE_EXPORT_FUNCTION)
311                 static_branch_dec(&trace_function_exports_enabled);
312
313         if (export->flags & TRACE_EXPORT_EVENT)
314                 static_branch_dec(&trace_event_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_MARKER)
317                 static_branch_dec(&trace_marker_exports_enabled);
318 }
319
320 static void ftrace_exports(struct ring_buffer_event *event, int flag)
321 {
322         struct trace_export *export;
323
324         preempt_disable_notrace();
325
326         export = rcu_dereference_raw_check(ftrace_exports_list);
327         while (export) {
328                 trace_process_export(export, event, flag);
329                 export = rcu_dereference_raw_check(export->next);
330         }
331
332         preempt_enable_notrace();
333 }
334
335 static inline void
336 add_trace_export(struct trace_export **list, struct trace_export *export)
337 {
338         rcu_assign_pointer(export->next, *list);
339         /*
340          * We are entering export into the list but another
341          * CPU might be walking that list. We need to make sure
342          * the export->next pointer is valid before another CPU sees
343          * the export pointer included into the list.
344          */
345         rcu_assign_pointer(*list, export);
346 }
347
348 static inline int
349 rm_trace_export(struct trace_export **list, struct trace_export *export)
350 {
351         struct trace_export **p;
352
353         for (p = list; *p != NULL; p = &(*p)->next)
354                 if (*p == export)
355                         break;
356
357         if (*p != export)
358                 return -1;
359
360         rcu_assign_pointer(*p, (*p)->next);
361
362         return 0;
363 }
364
365 static inline void
366 add_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         ftrace_exports_enable(export);
369
370         add_trace_export(list, export);
371 }
372
373 static inline int
374 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
375 {
376         int ret;
377
378         ret = rm_trace_export(list, export);
379         ftrace_exports_disable(export);
380
381         return ret;
382 }
383
384 int register_ftrace_export(struct trace_export *export)
385 {
386         if (WARN_ON_ONCE(!export->write))
387                 return -1;
388
389         mutex_lock(&ftrace_export_lock);
390
391         add_ftrace_export(&ftrace_exports_list, export);
392
393         mutex_unlock(&ftrace_export_lock);
394
395         return 0;
396 }
397 EXPORT_SYMBOL_GPL(register_ftrace_export);
398
399 int unregister_ftrace_export(struct trace_export *export)
400 {
401         int ret;
402
403         mutex_lock(&ftrace_export_lock);
404
405         ret = rm_ftrace_export(&ftrace_exports_list, export);
406
407         mutex_unlock(&ftrace_export_lock);
408
409         return ret;
410 }
411 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
412
413 /* trace_flags holds trace_options default values */
414 #define TRACE_DEFAULT_FLAGS                                             \
415         (FUNCTION_DEFAULT_FLAGS |                                       \
416          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
417          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
418          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
419          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
420          TRACE_ITER_HASH_PTR)
421
422 /* trace_options that are only supported by global_trace */
423 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
424                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
425
426 /* trace_flags that are default zero for instances */
427 #define ZEROED_TRACE_FLAGS \
428         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
429
430 /*
431  * The global_trace is the descriptor that holds the top-level tracing
432  * buffers for the live tracing.
433  */
434 static struct trace_array global_trace = {
435         .trace_flags = TRACE_DEFAULT_FLAGS,
436 };
437
438 LIST_HEAD(ftrace_trace_arrays);
439
440 int trace_array_get(struct trace_array *this_tr)
441 {
442         struct trace_array *tr;
443         int ret = -ENODEV;
444
445         mutex_lock(&trace_types_lock);
446         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
447                 if (tr == this_tr) {
448                         tr->ref++;
449                         ret = 0;
450                         break;
451                 }
452         }
453         mutex_unlock(&trace_types_lock);
454
455         return ret;
456 }
457
458 static void __trace_array_put(struct trace_array *this_tr)
459 {
460         WARN_ON(!this_tr->ref);
461         this_tr->ref--;
462 }
463
464 /**
465  * trace_array_put - Decrement the reference counter for this trace array.
466  * @this_tr : pointer to the trace array
467  *
468  * NOTE: Use this when we no longer need the trace array returned by
469  * trace_array_get_by_name(). This ensures the trace array can be later
470  * destroyed.
471  *
472  */
473 void trace_array_put(struct trace_array *this_tr)
474 {
475         if (!this_tr)
476                 return;
477
478         mutex_lock(&trace_types_lock);
479         __trace_array_put(this_tr);
480         mutex_unlock(&trace_types_lock);
481 }
482 EXPORT_SYMBOL_GPL(trace_array_put);
483
484 int tracing_check_open_get_tr(struct trace_array *tr)
485 {
486         int ret;
487
488         ret = security_locked_down(LOCKDOWN_TRACEFS);
489         if (ret)
490                 return ret;
491
492         if (tracing_disabled)
493                 return -ENODEV;
494
495         if (tr && trace_array_get(tr) < 0)
496                 return -ENODEV;
497
498         return 0;
499 }
500
501 int call_filter_check_discard(struct trace_event_call *call, void *rec,
502                               struct trace_buffer *buffer,
503                               struct ring_buffer_event *event)
504 {
505         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
506             !filter_match_preds(call->filter, rec)) {
507                 __trace_event_discard_commit(buffer, event);
508                 return 1;
509         }
510
511         return 0;
512 }
513
514 void trace_free_pid_list(struct trace_pid_list *pid_list)
515 {
516         vfree(pid_list->pids);
517         kfree(pid_list);
518 }
519
520 /**
521  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
522  * @filtered_pids: The list of pids to check
523  * @search_pid: The PID to find in @filtered_pids
524  *
525  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
526  */
527 bool
528 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
529 {
530         /*
531          * If pid_max changed after filtered_pids was created, we
532          * by default ignore all pids greater than the previous pid_max.
533          */
534         if (search_pid >= filtered_pids->pid_max)
535                 return false;
536
537         return test_bit(search_pid, filtered_pids->pids);
538 }
539
540 /**
541  * trace_ignore_this_task - should a task be ignored for tracing
542  * @filtered_pids: The list of pids to check
543  * @filtered_no_pids: The list of pids not to be traced
544  * @task: The task that should be ignored if not filtered
545  *
546  * Checks if @task should be traced or not from @filtered_pids.
547  * Returns true if @task should *NOT* be traced.
548  * Returns false if @task should be traced.
549  */
550 bool
551 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
552                        struct trace_pid_list *filtered_no_pids,
553                        struct task_struct *task)
554 {
555         /*
556          * If filtered_no_pids is not empty, and the task's pid is listed
557          * in filtered_no_pids, then return true.
558          * Otherwise, if filtered_pids is empty, that means we can
559          * trace all tasks. If it has content, then only trace pids
560          * within filtered_pids.
561          */
562
563         return (filtered_pids &&
564                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
565                 (filtered_no_pids &&
566                  trace_find_filtered_pid(filtered_no_pids, task->pid));
567 }
568
569 /**
570  * trace_filter_add_remove_task - Add or remove a task from a pid_list
571  * @pid_list: The list to modify
572  * @self: The current task for fork or NULL for exit
573  * @task: The task to add or remove
574  *
575  * If adding a task, if @self is defined, the task is only added if @self
576  * is also included in @pid_list. This happens on fork and tasks should
577  * only be added when the parent is listed. If @self is NULL, then the
578  * @task pid will be removed from the list, which would happen on exit
579  * of a task.
580  */
581 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
582                                   struct task_struct *self,
583                                   struct task_struct *task)
584 {
585         if (!pid_list)
586                 return;
587
588         /* For forks, we only add if the forking task is listed */
589         if (self) {
590                 if (!trace_find_filtered_pid(pid_list, self->pid))
591                         return;
592         }
593
594         /* Sorry, but we don't support pid_max changing after setting */
595         if (task->pid >= pid_list->pid_max)
596                 return;
597
598         /* "self" is set for forks, and NULL for exits */
599         if (self)
600                 set_bit(task->pid, pid_list->pids);
601         else
602                 clear_bit(task->pid, pid_list->pids);
603 }
604
605 /**
606  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
607  * @pid_list: The pid list to show
608  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
609  * @pos: The position of the file
610  *
611  * This is used by the seq_file "next" operation to iterate the pids
612  * listed in a trace_pid_list structure.
613  *
614  * Returns the pid+1 as we want to display pid of zero, but NULL would
615  * stop the iteration.
616  */
617 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
618 {
619         unsigned long pid = (unsigned long)v;
620
621         (*pos)++;
622
623         /* pid already is +1 of the actual previous bit */
624         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
625
626         /* Return pid + 1 to allow zero to be represented */
627         if (pid < pid_list->pid_max)
628                 return (void *)(pid + 1);
629
630         return NULL;
631 }
632
633 /**
634  * trace_pid_start - Used for seq_file to start reading pid lists
635  * @pid_list: The pid list to show
636  * @pos: The position of the file
637  *
638  * This is used by seq_file "start" operation to start the iteration
639  * of listing pids.
640  *
641  * Returns the pid+1 as we want to display pid of zero, but NULL would
642  * stop the iteration.
643  */
644 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
645 {
646         unsigned long pid;
647         loff_t l = 0;
648
649         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
650         if (pid >= pid_list->pid_max)
651                 return NULL;
652
653         /* Return pid + 1 so that zero can be the exit value */
654         for (pid++; pid && l < *pos;
655              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
656                 ;
657         return (void *)pid;
658 }
659
660 /**
661  * trace_pid_show - show the current pid in seq_file processing
662  * @m: The seq_file structure to write into
663  * @v: A void pointer of the pid (+1) value to display
664  *
665  * Can be directly used by seq_file operations to display the current
666  * pid value.
667  */
668 int trace_pid_show(struct seq_file *m, void *v)
669 {
670         unsigned long pid = (unsigned long)v - 1;
671
672         seq_printf(m, "%lu\n", pid);
673         return 0;
674 }
675
676 /* 128 should be much more than enough */
677 #define PID_BUF_SIZE            127
678
679 int trace_pid_write(struct trace_pid_list *filtered_pids,
680                     struct trace_pid_list **new_pid_list,
681                     const char __user *ubuf, size_t cnt)
682 {
683         struct trace_pid_list *pid_list;
684         struct trace_parser parser;
685         unsigned long val;
686         int nr_pids = 0;
687         ssize_t read = 0;
688         ssize_t ret = 0;
689         loff_t pos;
690         pid_t pid;
691
692         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
693                 return -ENOMEM;
694
695         /*
696          * Always recreate a new array. The write is an all or nothing
697          * operation. Always create a new array when adding new pids by
698          * the user. If the operation fails, then the current list is
699          * not modified.
700          */
701         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
702         if (!pid_list) {
703                 trace_parser_put(&parser);
704                 return -ENOMEM;
705         }
706
707         pid_list->pid_max = READ_ONCE(pid_max);
708
709         /* Only truncating will shrink pid_max */
710         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
711                 pid_list->pid_max = filtered_pids->pid_max;
712
713         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
714         if (!pid_list->pids) {
715                 trace_parser_put(&parser);
716                 kfree(pid_list);
717                 return -ENOMEM;
718         }
719
720         if (filtered_pids) {
721                 /* copy the current bits to the new max */
722                 for_each_set_bit(pid, filtered_pids->pids,
723                                  filtered_pids->pid_max) {
724                         set_bit(pid, pid_list->pids);
725                         nr_pids++;
726                 }
727         }
728
729         while (cnt > 0) {
730
731                 pos = 0;
732
733                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
734                 if (ret < 0 || !trace_parser_loaded(&parser))
735                         break;
736
737                 read += ret;
738                 ubuf += ret;
739                 cnt -= ret;
740
741                 ret = -EINVAL;
742                 if (kstrtoul(parser.buffer, 0, &val))
743                         break;
744                 if (val >= pid_list->pid_max)
745                         break;
746
747                 pid = (pid_t)val;
748
749                 set_bit(pid, pid_list->pids);
750                 nr_pids++;
751
752                 trace_parser_clear(&parser);
753                 ret = 0;
754         }
755         trace_parser_put(&parser);
756
757         if (ret < 0) {
758                 trace_free_pid_list(pid_list);
759                 return ret;
760         }
761
762         if (!nr_pids) {
763                 /* Cleared the list of pids */
764                 trace_free_pid_list(pid_list);
765                 read = ret;
766                 pid_list = NULL;
767         }
768
769         *new_pid_list = pid_list;
770
771         return read;
772 }
773
774 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
775 {
776         u64 ts;
777
778         /* Early boot up does not have a buffer yet */
779         if (!buf->buffer)
780                 return trace_clock_local();
781
782         ts = ring_buffer_time_stamp(buf->buffer);
783         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
784
785         return ts;
786 }
787
788 u64 ftrace_now(int cpu)
789 {
790         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
791 }
792
793 /**
794  * tracing_is_enabled - Show if global_trace has been enabled
795  *
796  * Shows if the global trace has been enabled or not. It uses the
797  * mirror flag "buffer_disabled" to be used in fast paths such as for
798  * the irqsoff tracer. But it may be inaccurate due to races. If you
799  * need to know the accurate state, use tracing_is_on() which is a little
800  * slower, but accurate.
801  */
802 int tracing_is_enabled(void)
803 {
804         /*
805          * For quick access (irqsoff uses this in fast path), just
806          * return the mirror variable of the state of the ring buffer.
807          * It's a little racy, but we don't really care.
808          */
809         smp_rmb();
810         return !global_trace.buffer_disabled;
811 }
812
813 /*
814  * trace_buf_size is the size in bytes that is allocated
815  * for a buffer. Note, the number of bytes is always rounded
816  * to page size.
817  *
818  * This number is purposely set to a low number of 16384.
819  * If the dump on oops happens, it will be much appreciated
820  * to not have to wait for all that output. Anyway this can be
821  * boot time and run time configurable.
822  */
823 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
824
825 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
826
827 /* trace_types holds a link list of available tracers. */
828 static struct tracer            *trace_types __read_mostly;
829
830 /*
831  * trace_types_lock is used to protect the trace_types list.
832  */
833 DEFINE_MUTEX(trace_types_lock);
834
835 /*
836  * serialize the access of the ring buffer
837  *
838  * ring buffer serializes readers, but it is low level protection.
839  * The validity of the events (which returns by ring_buffer_peek() ..etc)
840  * are not protected by ring buffer.
841  *
842  * The content of events may become garbage if we allow other process consumes
843  * these events concurrently:
844  *   A) the page of the consumed events may become a normal page
845  *      (not reader page) in ring buffer, and this page will be rewritten
846  *      by events producer.
847  *   B) The page of the consumed events may become a page for splice_read,
848  *      and this page will be returned to system.
849  *
850  * These primitives allow multi process access to different cpu ring buffer
851  * concurrently.
852  *
853  * These primitives don't distinguish read-only and read-consume access.
854  * Multi read-only access are also serialized.
855  */
856
857 #ifdef CONFIG_SMP
858 static DECLARE_RWSEM(all_cpu_access_lock);
859 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
860
861 static inline void trace_access_lock(int cpu)
862 {
863         if (cpu == RING_BUFFER_ALL_CPUS) {
864                 /* gain it for accessing the whole ring buffer. */
865                 down_write(&all_cpu_access_lock);
866         } else {
867                 /* gain it for accessing a cpu ring buffer. */
868
869                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
870                 down_read(&all_cpu_access_lock);
871
872                 /* Secondly block other access to this @cpu ring buffer. */
873                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
874         }
875 }
876
877 static inline void trace_access_unlock(int cpu)
878 {
879         if (cpu == RING_BUFFER_ALL_CPUS) {
880                 up_write(&all_cpu_access_lock);
881         } else {
882                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
883                 up_read(&all_cpu_access_lock);
884         }
885 }
886
887 static inline void trace_access_lock_init(void)
888 {
889         int cpu;
890
891         for_each_possible_cpu(cpu)
892                 mutex_init(&per_cpu(cpu_access_lock, cpu));
893 }
894
895 #else
896
897 static DEFINE_MUTEX(access_lock);
898
899 static inline void trace_access_lock(int cpu)
900 {
901         (void)cpu;
902         mutex_lock(&access_lock);
903 }
904
905 static inline void trace_access_unlock(int cpu)
906 {
907         (void)cpu;
908         mutex_unlock(&access_lock);
909 }
910
911 static inline void trace_access_lock_init(void)
912 {
913 }
914
915 #endif
916
917 #ifdef CONFIG_STACKTRACE
918 static void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                  unsigned int trace_ctx,
920                                  int skip, struct pt_regs *regs);
921 static inline void ftrace_trace_stack(struct trace_array *tr,
922                                       struct trace_buffer *buffer,
923                                       unsigned int trace_ctx,
924                                       int skip, struct pt_regs *regs);
925
926 #else
927 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
928                                         unsigned int trace_ctx,
929                                         int skip, struct pt_regs *regs)
930 {
931 }
932 static inline void ftrace_trace_stack(struct trace_array *tr,
933                                       struct trace_buffer *buffer,
934                                       unsigned long trace_ctx,
935                                       int skip, struct pt_regs *regs)
936 {
937 }
938
939 #endif
940
941 static __always_inline void
942 trace_event_setup(struct ring_buffer_event *event,
943                   int type, unsigned int trace_ctx)
944 {
945         struct trace_entry *ent = ring_buffer_event_data(event);
946
947         tracing_generic_entry_update(ent, type, trace_ctx);
948 }
949
950 static __always_inline struct ring_buffer_event *
951 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
952                           int type,
953                           unsigned long len,
954                           unsigned int trace_ctx)
955 {
956         struct ring_buffer_event *event;
957
958         event = ring_buffer_lock_reserve(buffer, len);
959         if (event != NULL)
960                 trace_event_setup(event, type, trace_ctx);
961
962         return event;
963 }
964
965 void tracer_tracing_on(struct trace_array *tr)
966 {
967         if (tr->array_buffer.buffer)
968                 ring_buffer_record_on(tr->array_buffer.buffer);
969         /*
970          * This flag is looked at when buffers haven't been allocated
971          * yet, or by some tracers (like irqsoff), that just want to
972          * know if the ring buffer has been disabled, but it can handle
973          * races of where it gets disabled but we still do a record.
974          * As the check is in the fast path of the tracers, it is more
975          * important to be fast than accurate.
976          */
977         tr->buffer_disabled = 0;
978         /* Make the flag seen by readers */
979         smp_wmb();
980 }
981
982 /**
983  * tracing_on - enable tracing buffers
984  *
985  * This function enables tracing buffers that may have been
986  * disabled with tracing_off.
987  */
988 void tracing_on(void)
989 {
990         tracer_tracing_on(&global_trace);
991 }
992 EXPORT_SYMBOL_GPL(tracing_on);
993
994
995 static __always_inline void
996 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
997 {
998         __this_cpu_write(trace_taskinfo_save, true);
999
1000         /* If this is the temp buffer, we need to commit fully */
1001         if (this_cpu_read(trace_buffered_event) == event) {
1002                 /* Length is in event->array[0] */
1003                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1004                 /* Release the temp buffer */
1005                 this_cpu_dec(trace_buffered_event_cnt);
1006         } else
1007                 ring_buffer_unlock_commit(buffer, event);
1008 }
1009
1010 /**
1011  * __trace_puts - write a constant string into the trace buffer.
1012  * @ip:    The address of the caller
1013  * @str:   The constant string to write
1014  * @size:  The size of the string.
1015  */
1016 int __trace_puts(unsigned long ip, const char *str, int size)
1017 {
1018         struct ring_buffer_event *event;
1019         struct trace_buffer *buffer;
1020         struct print_entry *entry;
1021         unsigned int trace_ctx;
1022         int alloc;
1023
1024         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1025                 return 0;
1026
1027         if (unlikely(tracing_selftest_running || tracing_disabled))
1028                 return 0;
1029
1030         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1031
1032         trace_ctx = tracing_gen_ctx();
1033         buffer = global_trace.array_buffer.buffer;
1034         ring_buffer_nest_start(buffer);
1035         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1036                                             trace_ctx);
1037         if (!event) {
1038                 size = 0;
1039                 goto out;
1040         }
1041
1042         entry = ring_buffer_event_data(event);
1043         entry->ip = ip;
1044
1045         memcpy(&entry->buf, str, size);
1046
1047         /* Add a newline if necessary */
1048         if (entry->buf[size - 1] != '\n') {
1049                 entry->buf[size] = '\n';
1050                 entry->buf[size + 1] = '\0';
1051         } else
1052                 entry->buf[size] = '\0';
1053
1054         __buffer_unlock_commit(buffer, event);
1055         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1056  out:
1057         ring_buffer_nest_end(buffer);
1058         return size;
1059 }
1060 EXPORT_SYMBOL_GPL(__trace_puts);
1061
1062 /**
1063  * __trace_bputs - write the pointer to a constant string into trace buffer
1064  * @ip:    The address of the caller
1065  * @str:   The constant string to write to the buffer to
1066  */
1067 int __trace_bputs(unsigned long ip, const char *str)
1068 {
1069         struct ring_buffer_event *event;
1070         struct trace_buffer *buffer;
1071         struct bputs_entry *entry;
1072         unsigned int trace_ctx;
1073         int size = sizeof(struct bputs_entry);
1074         int ret = 0;
1075
1076         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1077                 return 0;
1078
1079         if (unlikely(tracing_selftest_running || tracing_disabled))
1080                 return 0;
1081
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = global_trace.array_buffer.buffer;
1084
1085         ring_buffer_nest_start(buffer);
1086         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1087                                             trace_ctx);
1088         if (!event)
1089                 goto out;
1090
1091         entry = ring_buffer_event_data(event);
1092         entry->ip                       = ip;
1093         entry->str                      = str;
1094
1095         __buffer_unlock_commit(buffer, event);
1096         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1097
1098         ret = 1;
1099  out:
1100         ring_buffer_nest_end(buffer);
1101         return ret;
1102 }
1103 EXPORT_SYMBOL_GPL(__trace_bputs);
1104
1105 #ifdef CONFIG_TRACER_SNAPSHOT
1106 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107                                            void *cond_data)
1108 {
1109         struct tracer *tracer = tr->current_trace;
1110         unsigned long flags;
1111
1112         if (in_nmi()) {
1113                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1114                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1115                 return;
1116         }
1117
1118         if (!tr->allocated_snapshot) {
1119                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1120                 internal_trace_puts("*** stopping trace here!   ***\n");
1121                 tracing_off();
1122                 return;
1123         }
1124
1125         /* Note, snapshot can not be used when the tracer uses it */
1126         if (tracer->use_max_tr) {
1127                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1128                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1129                 return;
1130         }
1131
1132         local_irq_save(flags);
1133         update_max_tr(tr, current, smp_processor_id(), cond_data);
1134         local_irq_restore(flags);
1135 }
1136
1137 void tracing_snapshot_instance(struct trace_array *tr)
1138 {
1139         tracing_snapshot_instance_cond(tr, NULL);
1140 }
1141
1142 /**
1143  * tracing_snapshot - take a snapshot of the current buffer.
1144  *
1145  * This causes a swap between the snapshot buffer and the current live
1146  * tracing buffer. You can use this to take snapshots of the live
1147  * trace when some condition is triggered, but continue to trace.
1148  *
1149  * Note, make sure to allocate the snapshot with either
1150  * a tracing_snapshot_alloc(), or by doing it manually
1151  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1152  *
1153  * If the snapshot buffer is not allocated, it will stop tracing.
1154  * Basically making a permanent snapshot.
1155  */
1156 void tracing_snapshot(void)
1157 {
1158         struct trace_array *tr = &global_trace;
1159
1160         tracing_snapshot_instance(tr);
1161 }
1162 EXPORT_SYMBOL_GPL(tracing_snapshot);
1163
1164 /**
1165  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166  * @tr:         The tracing instance to snapshot
1167  * @cond_data:  The data to be tested conditionally, and possibly saved
1168  *
1169  * This is the same as tracing_snapshot() except that the snapshot is
1170  * conditional - the snapshot will only happen if the
1171  * cond_snapshot.update() implementation receiving the cond_data
1172  * returns true, which means that the trace array's cond_snapshot
1173  * update() operation used the cond_data to determine whether the
1174  * snapshot should be taken, and if it was, presumably saved it along
1175  * with the snapshot.
1176  */
1177 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178 {
1179         tracing_snapshot_instance_cond(tr, cond_data);
1180 }
1181 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182
1183 /**
1184  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185  * @tr:         The tracing instance
1186  *
1187  * When the user enables a conditional snapshot using
1188  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189  * with the snapshot.  This accessor is used to retrieve it.
1190  *
1191  * Should not be called from cond_snapshot.update(), since it takes
1192  * the tr->max_lock lock, which the code calling
1193  * cond_snapshot.update() has already done.
1194  *
1195  * Returns the cond_data associated with the trace array's snapshot.
1196  */
1197 void *tracing_cond_snapshot_data(struct trace_array *tr)
1198 {
1199         void *cond_data = NULL;
1200
1201         arch_spin_lock(&tr->max_lock);
1202
1203         if (tr->cond_snapshot)
1204                 cond_data = tr->cond_snapshot->cond_data;
1205
1206         arch_spin_unlock(&tr->max_lock);
1207
1208         return cond_data;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1211
1212 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1213                                         struct array_buffer *size_buf, int cpu_id);
1214 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1215
1216 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1217 {
1218         int ret;
1219
1220         if (!tr->allocated_snapshot) {
1221
1222                 /* allocate spare buffer */
1223                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1224                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1225                 if (ret < 0)
1226                         return ret;
1227
1228                 tr->allocated_snapshot = true;
1229         }
1230
1231         return 0;
1232 }
1233
1234 static void free_snapshot(struct trace_array *tr)
1235 {
1236         /*
1237          * We don't free the ring buffer. instead, resize it because
1238          * The max_tr ring buffer has some state (e.g. ring->clock) and
1239          * we want preserve it.
1240          */
1241         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1242         set_buffer_entries(&tr->max_buffer, 1);
1243         tracing_reset_online_cpus(&tr->max_buffer);
1244         tr->allocated_snapshot = false;
1245 }
1246
1247 /**
1248  * tracing_alloc_snapshot - allocate snapshot buffer.
1249  *
1250  * This only allocates the snapshot buffer if it isn't already
1251  * allocated - it doesn't also take a snapshot.
1252  *
1253  * This is meant to be used in cases where the snapshot buffer needs
1254  * to be set up for events that can't sleep but need to be able to
1255  * trigger a snapshot.
1256  */
1257 int tracing_alloc_snapshot(void)
1258 {
1259         struct trace_array *tr = &global_trace;
1260         int ret;
1261
1262         ret = tracing_alloc_snapshot_instance(tr);
1263         WARN_ON(ret < 0);
1264
1265         return ret;
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1268
1269 /**
1270  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1271  *
1272  * This is similar to tracing_snapshot(), but it will allocate the
1273  * snapshot buffer if it isn't already allocated. Use this only
1274  * where it is safe to sleep, as the allocation may sleep.
1275  *
1276  * This causes a swap between the snapshot buffer and the current live
1277  * tracing buffer. You can use this to take snapshots of the live
1278  * trace when some condition is triggered, but continue to trace.
1279  */
1280 void tracing_snapshot_alloc(void)
1281 {
1282         int ret;
1283
1284         ret = tracing_alloc_snapshot();
1285         if (ret < 0)
1286                 return;
1287
1288         tracing_snapshot();
1289 }
1290 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1291
1292 /**
1293  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1294  * @tr:         The tracing instance
1295  * @cond_data:  User data to associate with the snapshot
1296  * @update:     Implementation of the cond_snapshot update function
1297  *
1298  * Check whether the conditional snapshot for the given instance has
1299  * already been enabled, or if the current tracer is already using a
1300  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1301  * save the cond_data and update function inside.
1302  *
1303  * Returns 0 if successful, error otherwise.
1304  */
1305 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1306                                  cond_update_fn_t update)
1307 {
1308         struct cond_snapshot *cond_snapshot;
1309         int ret = 0;
1310
1311         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1312         if (!cond_snapshot)
1313                 return -ENOMEM;
1314
1315         cond_snapshot->cond_data = cond_data;
1316         cond_snapshot->update = update;
1317
1318         mutex_lock(&trace_types_lock);
1319
1320         ret = tracing_alloc_snapshot_instance(tr);
1321         if (ret)
1322                 goto fail_unlock;
1323
1324         if (tr->current_trace->use_max_tr) {
1325                 ret = -EBUSY;
1326                 goto fail_unlock;
1327         }
1328
1329         /*
1330          * The cond_snapshot can only change to NULL without the
1331          * trace_types_lock. We don't care if we race with it going
1332          * to NULL, but we want to make sure that it's not set to
1333          * something other than NULL when we get here, which we can
1334          * do safely with only holding the trace_types_lock and not
1335          * having to take the max_lock.
1336          */
1337         if (tr->cond_snapshot) {
1338                 ret = -EBUSY;
1339                 goto fail_unlock;
1340         }
1341
1342         arch_spin_lock(&tr->max_lock);
1343         tr->cond_snapshot = cond_snapshot;
1344         arch_spin_unlock(&tr->max_lock);
1345
1346         mutex_unlock(&trace_types_lock);
1347
1348         return ret;
1349
1350  fail_unlock:
1351         mutex_unlock(&trace_types_lock);
1352         kfree(cond_snapshot);
1353         return ret;
1354 }
1355 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1356
1357 /**
1358  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1359  * @tr:         The tracing instance
1360  *
1361  * Check whether the conditional snapshot for the given instance is
1362  * enabled; if so, free the cond_snapshot associated with it,
1363  * otherwise return -EINVAL.
1364  *
1365  * Returns 0 if successful, error otherwise.
1366  */
1367 int tracing_snapshot_cond_disable(struct trace_array *tr)
1368 {
1369         int ret = 0;
1370
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381
1382         return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1385 #else
1386 void tracing_snapshot(void)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot);
1391 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1396 int tracing_alloc_snapshot(void)
1397 {
1398         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1399         return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1402 void tracing_snapshot_alloc(void)
1403 {
1404         /* Give warning */
1405         tracing_snapshot();
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1408 void *tracing_cond_snapshot_data(struct trace_array *tr)
1409 {
1410         return NULL;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1413 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1414 {
1415         return -ENODEV;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420         return false;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1423 #endif /* CONFIG_TRACER_SNAPSHOT */
1424
1425 void tracer_tracing_off(struct trace_array *tr)
1426 {
1427         if (tr->array_buffer.buffer)
1428                 ring_buffer_record_off(tr->array_buffer.buffer);
1429         /*
1430          * This flag is looked at when buffers haven't been allocated
1431          * yet, or by some tracers (like irqsoff), that just want to
1432          * know if the ring buffer has been disabled, but it can handle
1433          * races of where it gets disabled but we still do a record.
1434          * As the check is in the fast path of the tracers, it is more
1435          * important to be fast than accurate.
1436          */
1437         tr->buffer_disabled = 1;
1438         /* Make the flag seen by readers */
1439         smp_wmb();
1440 }
1441
1442 /**
1443  * tracing_off - turn off tracing buffers
1444  *
1445  * This function stops the tracing buffers from recording data.
1446  * It does not disable any overhead the tracers themselves may
1447  * be causing. This function simply causes all recording to
1448  * the ring buffers to fail.
1449  */
1450 void tracing_off(void)
1451 {
1452         tracer_tracing_off(&global_trace);
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_off);
1455
1456 void disable_trace_on_warning(void)
1457 {
1458         if (__disable_trace_on_warning) {
1459                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1460                         "Disabling tracing due to warning\n");
1461                 tracing_off();
1462         }
1463 }
1464
1465 /**
1466  * tracer_tracing_is_on - show real state of ring buffer enabled
1467  * @tr : the trace array to know if ring buffer is enabled
1468  *
1469  * Shows real state of the ring buffer if it is enabled or not.
1470  */
1471 bool tracer_tracing_is_on(struct trace_array *tr)
1472 {
1473         if (tr->array_buffer.buffer)
1474                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1475         return !tr->buffer_disabled;
1476 }
1477
1478 /**
1479  * tracing_is_on - show state of ring buffers enabled
1480  */
1481 int tracing_is_on(void)
1482 {
1483         return tracer_tracing_is_on(&global_trace);
1484 }
1485 EXPORT_SYMBOL_GPL(tracing_is_on);
1486
1487 static int __init set_buf_size(char *str)
1488 {
1489         unsigned long buf_size;
1490
1491         if (!str)
1492                 return 0;
1493         buf_size = memparse(str, &str);
1494         /* nr_entries can not be zero */
1495         if (buf_size == 0)
1496                 return 0;
1497         trace_buf_size = buf_size;
1498         return 1;
1499 }
1500 __setup("trace_buf_size=", set_buf_size);
1501
1502 static int __init set_tracing_thresh(char *str)
1503 {
1504         unsigned long threshold;
1505         int ret;
1506
1507         if (!str)
1508                 return 0;
1509         ret = kstrtoul(str, 0, &threshold);
1510         if (ret < 0)
1511                 return 0;
1512         tracing_thresh = threshold * 1000;
1513         return 1;
1514 }
1515 __setup("tracing_thresh=", set_tracing_thresh);
1516
1517 unsigned long nsecs_to_usecs(unsigned long nsecs)
1518 {
1519         return nsecs / 1000;
1520 }
1521
1522 /*
1523  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1524  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1525  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1526  * of strings in the order that the evals (enum) were defined.
1527  */
1528 #undef C
1529 #define C(a, b) b
1530
1531 /* These must match the bit positions in trace_iterator_flags */
1532 static const char *trace_options[] = {
1533         TRACE_FLAGS
1534         NULL
1535 };
1536
1537 static struct {
1538         u64 (*func)(void);
1539         const char *name;
1540         int in_ns;              /* is this clock in nanoseconds? */
1541 } trace_clocks[] = {
1542         { trace_clock_local,            "local",        1 },
1543         { trace_clock_global,           "global",       1 },
1544         { trace_clock_counter,          "counter",      0 },
1545         { trace_clock_jiffies,          "uptime",       0 },
1546         { trace_clock,                  "perf",         1 },
1547         { ktime_get_mono_fast_ns,       "mono",         1 },
1548         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1549         { ktime_get_boot_fast_ns,       "boot",         1 },
1550         ARCH_TRACE_CLOCKS
1551 };
1552
1553 bool trace_clock_in_ns(struct trace_array *tr)
1554 {
1555         if (trace_clocks[tr->clock_id].in_ns)
1556                 return true;
1557
1558         return false;
1559 }
1560
1561 /*
1562  * trace_parser_get_init - gets the buffer for trace parser
1563  */
1564 int trace_parser_get_init(struct trace_parser *parser, int size)
1565 {
1566         memset(parser, 0, sizeof(*parser));
1567
1568         parser->buffer = kmalloc(size, GFP_KERNEL);
1569         if (!parser->buffer)
1570                 return 1;
1571
1572         parser->size = size;
1573         return 0;
1574 }
1575
1576 /*
1577  * trace_parser_put - frees the buffer for trace parser
1578  */
1579 void trace_parser_put(struct trace_parser *parser)
1580 {
1581         kfree(parser->buffer);
1582         parser->buffer = NULL;
1583 }
1584
1585 /*
1586  * trace_get_user - reads the user input string separated by  space
1587  * (matched by isspace(ch))
1588  *
1589  * For each string found the 'struct trace_parser' is updated,
1590  * and the function returns.
1591  *
1592  * Returns number of bytes read.
1593  *
1594  * See kernel/trace/trace.h for 'struct trace_parser' details.
1595  */
1596 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1597         size_t cnt, loff_t *ppos)
1598 {
1599         char ch;
1600         size_t read = 0;
1601         ssize_t ret;
1602
1603         if (!*ppos)
1604                 trace_parser_clear(parser);
1605
1606         ret = get_user(ch, ubuf++);
1607         if (ret)
1608                 goto out;
1609
1610         read++;
1611         cnt--;
1612
1613         /*
1614          * The parser is not finished with the last write,
1615          * continue reading the user input without skipping spaces.
1616          */
1617         if (!parser->cont) {
1618                 /* skip white space */
1619                 while (cnt && isspace(ch)) {
1620                         ret = get_user(ch, ubuf++);
1621                         if (ret)
1622                                 goto out;
1623                         read++;
1624                         cnt--;
1625                 }
1626
1627                 parser->idx = 0;
1628
1629                 /* only spaces were written */
1630                 if (isspace(ch) || !ch) {
1631                         *ppos += read;
1632                         ret = read;
1633                         goto out;
1634                 }
1635         }
1636
1637         /* read the non-space input */
1638         while (cnt && !isspace(ch) && ch) {
1639                 if (parser->idx < parser->size - 1)
1640                         parser->buffer[parser->idx++] = ch;
1641                 else {
1642                         ret = -EINVAL;
1643                         goto out;
1644                 }
1645                 ret = get_user(ch, ubuf++);
1646                 if (ret)
1647                         goto out;
1648                 read++;
1649                 cnt--;
1650         }
1651
1652         /* We either got finished input or we have to wait for another call. */
1653         if (isspace(ch) || !ch) {
1654                 parser->buffer[parser->idx] = 0;
1655                 parser->cont = false;
1656         } else if (parser->idx < parser->size - 1) {
1657                 parser->cont = true;
1658                 parser->buffer[parser->idx++] = ch;
1659                 /* Make sure the parsed string always terminates with '\0'. */
1660                 parser->buffer[parser->idx] = 0;
1661         } else {
1662                 ret = -EINVAL;
1663                 goto out;
1664         }
1665
1666         *ppos += read;
1667         ret = read;
1668
1669 out:
1670         return ret;
1671 }
1672
1673 /* TODO add a seq_buf_to_buffer() */
1674 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1675 {
1676         int len;
1677
1678         if (trace_seq_used(s) <= s->seq.readpos)
1679                 return -EBUSY;
1680
1681         len = trace_seq_used(s) - s->seq.readpos;
1682         if (cnt > len)
1683                 cnt = len;
1684         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1685
1686         s->seq.readpos += cnt;
1687         return cnt;
1688 }
1689
1690 unsigned long __read_mostly     tracing_thresh;
1691 static const struct file_operations tracing_max_lat_fops;
1692
1693 #ifdef LATENCY_FS_NOTIFY
1694
1695 static struct workqueue_struct *fsnotify_wq;
1696
1697 static void latency_fsnotify_workfn(struct work_struct *work)
1698 {
1699         struct trace_array *tr = container_of(work, struct trace_array,
1700                                               fsnotify_work);
1701         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1702 }
1703
1704 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1705 {
1706         struct trace_array *tr = container_of(iwork, struct trace_array,
1707                                               fsnotify_irqwork);
1708         queue_work(fsnotify_wq, &tr->fsnotify_work);
1709 }
1710
1711 static void trace_create_maxlat_file(struct trace_array *tr,
1712                                      struct dentry *d_tracer)
1713 {
1714         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1715         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1716         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1717                                               d_tracer, &tr->max_latency,
1718                                               &tracing_max_lat_fops);
1719 }
1720
1721 __init static int latency_fsnotify_init(void)
1722 {
1723         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1724                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1725         if (!fsnotify_wq) {
1726                 pr_err("Unable to allocate tr_max_lat_wq\n");
1727                 return -ENOMEM;
1728         }
1729         return 0;
1730 }
1731
1732 late_initcall_sync(latency_fsnotify_init);
1733
1734 void latency_fsnotify(struct trace_array *tr)
1735 {
1736         if (!fsnotify_wq)
1737                 return;
1738         /*
1739          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1740          * possible that we are called from __schedule() or do_idle(), which
1741          * could cause a deadlock.
1742          */
1743         irq_work_queue(&tr->fsnotify_irqwork);
1744 }
1745
1746 /*
1747  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1748  *  defined(CONFIG_FSNOTIFY)
1749  */
1750 #else
1751
1752 #define trace_create_maxlat_file(tr, d_tracer)                          \
1753         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1754                           &tr->max_latency, &tracing_max_lat_fops)
1755
1756 #endif
1757
1758 #ifdef CONFIG_TRACER_MAX_TRACE
1759 /*
1760  * Copy the new maximum trace into the separate maximum-trace
1761  * structure. (this way the maximum trace is permanently saved,
1762  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763  */
1764 static void
1765 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766 {
1767         struct array_buffer *trace_buf = &tr->array_buffer;
1768         struct array_buffer *max_buf = &tr->max_buffer;
1769         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771
1772         max_buf->cpu = cpu;
1773         max_buf->time_start = data->preempt_timestamp;
1774
1775         max_data->saved_latency = tr->max_latency;
1776         max_data->critical_start = data->critical_start;
1777         max_data->critical_end = data->critical_end;
1778
1779         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780         max_data->pid = tsk->pid;
1781         /*
1782          * If tsk == current, then use current_uid(), as that does not use
1783          * RCU. The irq tracer can be called out of RCU scope.
1784          */
1785         if (tsk == current)
1786                 max_data->uid = current_uid();
1787         else
1788                 max_data->uid = task_uid(tsk);
1789
1790         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791         max_data->policy = tsk->policy;
1792         max_data->rt_priority = tsk->rt_priority;
1793
1794         /* record this tasks comm */
1795         tracing_record_cmdline(tsk);
1796         latency_fsnotify(tr);
1797 }
1798
1799 /**
1800  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801  * @tr: tracer
1802  * @tsk: the task with the latency
1803  * @cpu: The cpu that initiated the trace.
1804  * @cond_data: User data associated with a conditional snapshot
1805  *
1806  * Flip the buffers between the @tr and the max_tr and record information
1807  * about which task was the cause of this latency.
1808  */
1809 void
1810 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811               void *cond_data)
1812 {
1813         if (tr->stop_count)
1814                 return;
1815
1816         WARN_ON_ONCE(!irqs_disabled());
1817
1818         if (!tr->allocated_snapshot) {
1819                 /* Only the nop tracer should hit this when disabling */
1820                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821                 return;
1822         }
1823
1824         arch_spin_lock(&tr->max_lock);
1825
1826         /* Inherit the recordable setting from array_buffer */
1827         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828                 ring_buffer_record_on(tr->max_buffer.buffer);
1829         else
1830                 ring_buffer_record_off(tr->max_buffer.buffer);
1831
1832 #ifdef CONFIG_TRACER_SNAPSHOT
1833         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834                 goto out_unlock;
1835 #endif
1836         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837
1838         __update_max_tr(tr, tsk, cpu);
1839
1840  out_unlock:
1841         arch_spin_unlock(&tr->max_lock);
1842 }
1843
1844 /**
1845  * update_max_tr_single - only copy one trace over, and reset the rest
1846  * @tr: tracer
1847  * @tsk: task with the latency
1848  * @cpu: the cpu of the buffer to copy.
1849  *
1850  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851  */
1852 void
1853 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854 {
1855         int ret;
1856
1857         if (tr->stop_count)
1858                 return;
1859
1860         WARN_ON_ONCE(!irqs_disabled());
1861         if (!tr->allocated_snapshot) {
1862                 /* Only the nop tracer should hit this when disabling */
1863                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864                 return;
1865         }
1866
1867         arch_spin_lock(&tr->max_lock);
1868
1869         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870
1871         if (ret == -EBUSY) {
1872                 /*
1873                  * We failed to swap the buffer due to a commit taking
1874                  * place on this CPU. We fail to record, but we reset
1875                  * the max trace buffer (no one writes directly to it)
1876                  * and flag that it failed.
1877                  */
1878                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879                         "Failed to swap buffers due to commit in progress\n");
1880         }
1881
1882         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883
1884         __update_max_tr(tr, tsk, cpu);
1885         arch_spin_unlock(&tr->max_lock);
1886 }
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891         /* Iterators are static, they should be filled or empty */
1892         if (trace_buffer_iter(iter, iter->cpu_file))
1893                 return 0;
1894
1895         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896                                 full);
1897 }
1898
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901
1902 struct trace_selftests {
1903         struct list_head                list;
1904         struct tracer                   *type;
1905 };
1906
1907 static LIST_HEAD(postponed_selftests);
1908
1909 static int save_selftest(struct tracer *type)
1910 {
1911         struct trace_selftests *selftest;
1912
1913         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914         if (!selftest)
1915                 return -ENOMEM;
1916
1917         selftest->type = type;
1918         list_add(&selftest->list, &postponed_selftests);
1919         return 0;
1920 }
1921
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924         struct trace_array *tr = &global_trace;
1925         struct tracer *saved_tracer = tr->current_trace;
1926         int ret;
1927
1928         if (!type->selftest || tracing_selftest_disabled)
1929                 return 0;
1930
1931         /*
1932          * If a tracer registers early in boot up (before scheduling is
1933          * initialized and such), then do not run its selftests yet.
1934          * Instead, run it a little later in the boot process.
1935          */
1936         if (!selftests_can_run)
1937                 return save_selftest(type);
1938
1939         if (!tracing_is_on()) {
1940                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941                         type->name);
1942                 return 0;
1943         }
1944
1945         /*
1946          * Run a selftest on this tracer.
1947          * Here we reset the trace buffer, and set the current
1948          * tracer to be this tracer. The tracer can then run some
1949          * internal tracing to verify that everything is in order.
1950          * If we fail, we do not register this tracer.
1951          */
1952         tracing_reset_online_cpus(&tr->array_buffer);
1953
1954         tr->current_trace = type;
1955
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957         if (type->use_max_tr) {
1958                 /* If we expanded the buffers, make sure the max is expanded too */
1959                 if (ring_buffer_expanded)
1960                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961                                            RING_BUFFER_ALL_CPUS);
1962                 tr->allocated_snapshot = true;
1963         }
1964 #endif
1965
1966         /* the test is responsible for initializing and enabling */
1967         pr_info("Testing tracer %s: ", type->name);
1968         ret = type->selftest(type, tr);
1969         /* the test is responsible for resetting too */
1970         tr->current_trace = saved_tracer;
1971         if (ret) {
1972                 printk(KERN_CONT "FAILED!\n");
1973                 /* Add the warning after printing 'FAILED' */
1974                 WARN_ON(1);
1975                 return -1;
1976         }
1977         /* Only reset on passing, to avoid touching corrupted buffers */
1978         tracing_reset_online_cpus(&tr->array_buffer);
1979
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981         if (type->use_max_tr) {
1982                 tr->allocated_snapshot = false;
1983
1984                 /* Shrink the max buffer again */
1985                 if (ring_buffer_expanded)
1986                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1987                                            RING_BUFFER_ALL_CPUS);
1988         }
1989 #endif
1990
1991         printk(KERN_CONT "PASSED\n");
1992         return 0;
1993 }
1994
1995 static __init int init_trace_selftests(void)
1996 {
1997         struct trace_selftests *p, *n;
1998         struct tracer *t, **last;
1999         int ret;
2000
2001         selftests_can_run = true;
2002
2003         mutex_lock(&trace_types_lock);
2004
2005         if (list_empty(&postponed_selftests))
2006                 goto out;
2007
2008         pr_info("Running postponed tracer tests:\n");
2009
2010         tracing_selftest_running = true;
2011         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012                 /* This loop can take minutes when sanitizers are enabled, so
2013                  * lets make sure we allow RCU processing.
2014                  */
2015                 cond_resched();
2016                 ret = run_tracer_selftest(p->type);
2017                 /* If the test fails, then warn and remove from available_tracers */
2018                 if (ret < 0) {
2019                         WARN(1, "tracer: %s failed selftest, disabling\n",
2020                              p->type->name);
2021                         last = &trace_types;
2022                         for (t = trace_types; t; t = t->next) {
2023                                 if (t == p->type) {
2024                                         *last = t->next;
2025                                         break;
2026                                 }
2027                                 last = &t->next;
2028                         }
2029                 }
2030                 list_del(&p->list);
2031                 kfree(p);
2032         }
2033         tracing_selftest_running = false;
2034
2035  out:
2036         mutex_unlock(&trace_types_lock);
2037
2038         return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044         return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049
2050 static void __init apply_trace_boot_options(void);
2051
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060         struct tracer *t;
2061         int ret = 0;
2062
2063         if (!type->name) {
2064                 pr_info("Tracer must have a name\n");
2065                 return -1;
2066         }
2067
2068         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070                 return -1;
2071         }
2072
2073         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074                 pr_warn("Can not register tracer %s due to lockdown\n",
2075                            type->name);
2076                 return -EPERM;
2077         }
2078
2079         mutex_lock(&trace_types_lock);
2080
2081         tracing_selftest_running = true;
2082
2083         for (t = trace_types; t; t = t->next) {
2084                 if (strcmp(type->name, t->name) == 0) {
2085                         /* already found */
2086                         pr_info("Tracer %s already registered\n",
2087                                 type->name);
2088                         ret = -1;
2089                         goto out;
2090                 }
2091         }
2092
2093         if (!type->set_flag)
2094                 type->set_flag = &dummy_set_flag;
2095         if (!type->flags) {
2096                 /*allocate a dummy tracer_flags*/
2097                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098                 if (!type->flags) {
2099                         ret = -ENOMEM;
2100                         goto out;
2101                 }
2102                 type->flags->val = 0;
2103                 type->flags->opts = dummy_tracer_opt;
2104         } else
2105                 if (!type->flags->opts)
2106                         type->flags->opts = dummy_tracer_opt;
2107
2108         /* store the tracer for __set_tracer_option */
2109         type->flags->trace = type;
2110
2111         ret = run_tracer_selftest(type);
2112         if (ret < 0)
2113                 goto out;
2114
2115         type->next = trace_types;
2116         trace_types = type;
2117         add_tracer_options(&global_trace, type);
2118
2119  out:
2120         tracing_selftest_running = false;
2121         mutex_unlock(&trace_types_lock);
2122
2123         if (ret || !default_bootup_tracer)
2124                 goto out_unlock;
2125
2126         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127                 goto out_unlock;
2128
2129         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130         /* Do we want this tracer to start on bootup? */
2131         tracing_set_tracer(&global_trace, type->name);
2132         default_bootup_tracer = NULL;
2133
2134         apply_trace_boot_options();
2135
2136         /* disable other selftests, since this will break it. */
2137         disable_tracing_selftest("running a tracer");
2138
2139  out_unlock:
2140         return ret;
2141 }
2142
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145         struct trace_buffer *buffer = buf->buffer;
2146
2147         if (!buffer)
2148                 return;
2149
2150         ring_buffer_record_disable(buffer);
2151
2152         /* Make sure all commits have finished */
2153         synchronize_rcu();
2154         ring_buffer_reset_cpu(buffer, cpu);
2155
2156         ring_buffer_record_enable(buffer);
2157 }
2158
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161         struct trace_buffer *buffer = buf->buffer;
2162
2163         if (!buffer)
2164                 return;
2165
2166         ring_buffer_record_disable(buffer);
2167
2168         /* Make sure all commits have finished */
2169         synchronize_rcu();
2170
2171         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172
2173         ring_buffer_reset_online_cpus(buffer);
2174
2175         ring_buffer_record_enable(buffer);
2176 }
2177
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus(void)
2180 {
2181         struct trace_array *tr;
2182
2183         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184                 if (!tr->clear_trace)
2185                         continue;
2186                 tr->clear_trace = false;
2187                 tracing_reset_online_cpus(&tr->array_buffer);
2188 #ifdef CONFIG_TRACER_MAX_TRACE
2189                 tracing_reset_online_cpus(&tr->max_buffer);
2190 #endif
2191         }
2192 }
2193
2194 /*
2195  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2196  * is the tgid last observed corresponding to pid=i.
2197  */
2198 static int *tgid_map;
2199
2200 /* The maximum valid index into tgid_map. */
2201 static size_t tgid_map_max;
2202
2203 #define SAVED_CMDLINES_DEFAULT 128
2204 #define NO_CMDLINE_MAP UINT_MAX
2205 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2206 struct saved_cmdlines_buffer {
2207         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2208         unsigned *map_cmdline_to_pid;
2209         unsigned cmdline_num;
2210         int cmdline_idx;
2211         char *saved_cmdlines;
2212 };
2213 static struct saved_cmdlines_buffer *savedcmd;
2214
2215 /* temporary disable recording */
2216 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2217
2218 static inline char *get_saved_cmdlines(int idx)
2219 {
2220         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2221 }
2222
2223 static inline void set_cmdline(int idx, const char *cmdline)
2224 {
2225         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2226 }
2227
2228 static int allocate_cmdlines_buffer(unsigned int val,
2229                                     struct saved_cmdlines_buffer *s)
2230 {
2231         s->map_cmdline_to_pid = kmalloc_array(val,
2232                                               sizeof(*s->map_cmdline_to_pid),
2233                                               GFP_KERNEL);
2234         if (!s->map_cmdline_to_pid)
2235                 return -ENOMEM;
2236
2237         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2238         if (!s->saved_cmdlines) {
2239                 kfree(s->map_cmdline_to_pid);
2240                 return -ENOMEM;
2241         }
2242
2243         s->cmdline_idx = 0;
2244         s->cmdline_num = val;
2245         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2246                sizeof(s->map_pid_to_cmdline));
2247         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2248                val * sizeof(*s->map_cmdline_to_pid));
2249
2250         return 0;
2251 }
2252
2253 static int trace_create_savedcmd(void)
2254 {
2255         int ret;
2256
2257         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2258         if (!savedcmd)
2259                 return -ENOMEM;
2260
2261         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2262         if (ret < 0) {
2263                 kfree(savedcmd);
2264                 savedcmd = NULL;
2265                 return -ENOMEM;
2266         }
2267
2268         return 0;
2269 }
2270
2271 int is_tracing_stopped(void)
2272 {
2273         return global_trace.stop_count;
2274 }
2275
2276 /**
2277  * tracing_start - quick start of the tracer
2278  *
2279  * If tracing is enabled but was stopped by tracing_stop,
2280  * this will start the tracer back up.
2281  */
2282 void tracing_start(void)
2283 {
2284         struct trace_buffer *buffer;
2285         unsigned long flags;
2286
2287         if (tracing_disabled)
2288                 return;
2289
2290         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2291         if (--global_trace.stop_count) {
2292                 if (global_trace.stop_count < 0) {
2293                         /* Someone screwed up their debugging */
2294                         WARN_ON_ONCE(1);
2295                         global_trace.stop_count = 0;
2296                 }
2297                 goto out;
2298         }
2299
2300         /* Prevent the buffers from switching */
2301         arch_spin_lock(&global_trace.max_lock);
2302
2303         buffer = global_trace.array_buffer.buffer;
2304         if (buffer)
2305                 ring_buffer_record_enable(buffer);
2306
2307 #ifdef CONFIG_TRACER_MAX_TRACE
2308         buffer = global_trace.max_buffer.buffer;
2309         if (buffer)
2310                 ring_buffer_record_enable(buffer);
2311 #endif
2312
2313         arch_spin_unlock(&global_trace.max_lock);
2314
2315  out:
2316         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2317 }
2318
2319 static void tracing_start_tr(struct trace_array *tr)
2320 {
2321         struct trace_buffer *buffer;
2322         unsigned long flags;
2323
2324         if (tracing_disabled)
2325                 return;
2326
2327         /* If global, we need to also start the max tracer */
2328         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2329                 return tracing_start();
2330
2331         raw_spin_lock_irqsave(&tr->start_lock, flags);
2332
2333         if (--tr->stop_count) {
2334                 if (tr->stop_count < 0) {
2335                         /* Someone screwed up their debugging */
2336                         WARN_ON_ONCE(1);
2337                         tr->stop_count = 0;
2338                 }
2339                 goto out;
2340         }
2341
2342         buffer = tr->array_buffer.buffer;
2343         if (buffer)
2344                 ring_buffer_record_enable(buffer);
2345
2346  out:
2347         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2348 }
2349
2350 /**
2351  * tracing_stop - quick stop of the tracer
2352  *
2353  * Light weight way to stop tracing. Use in conjunction with
2354  * tracing_start.
2355  */
2356 void tracing_stop(void)
2357 {
2358         struct trace_buffer *buffer;
2359         unsigned long flags;
2360
2361         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2362         if (global_trace.stop_count++)
2363                 goto out;
2364
2365         /* Prevent the buffers from switching */
2366         arch_spin_lock(&global_trace.max_lock);
2367
2368         buffer = global_trace.array_buffer.buffer;
2369         if (buffer)
2370                 ring_buffer_record_disable(buffer);
2371
2372 #ifdef CONFIG_TRACER_MAX_TRACE
2373         buffer = global_trace.max_buffer.buffer;
2374         if (buffer)
2375                 ring_buffer_record_disable(buffer);
2376 #endif
2377
2378         arch_spin_unlock(&global_trace.max_lock);
2379
2380  out:
2381         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2382 }
2383
2384 static void tracing_stop_tr(struct trace_array *tr)
2385 {
2386         struct trace_buffer *buffer;
2387         unsigned long flags;
2388
2389         /* If global, we need to also stop the max tracer */
2390         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2391                 return tracing_stop();
2392
2393         raw_spin_lock_irqsave(&tr->start_lock, flags);
2394         if (tr->stop_count++)
2395                 goto out;
2396
2397         buffer = tr->array_buffer.buffer;
2398         if (buffer)
2399                 ring_buffer_record_disable(buffer);
2400
2401  out:
2402         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2403 }
2404
2405 static int trace_save_cmdline(struct task_struct *tsk)
2406 {
2407         unsigned tpid, idx;
2408
2409         /* treat recording of idle task as a success */
2410         if (!tsk->pid)
2411                 return 1;
2412
2413         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2414
2415         /*
2416          * It's not the end of the world if we don't get
2417          * the lock, but we also don't want to spin
2418          * nor do we want to disable interrupts,
2419          * so if we miss here, then better luck next time.
2420          */
2421         if (!arch_spin_trylock(&trace_cmdline_lock))
2422                 return 0;
2423
2424         idx = savedcmd->map_pid_to_cmdline[tpid];
2425         if (idx == NO_CMDLINE_MAP) {
2426                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2427
2428                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2429                 savedcmd->cmdline_idx = idx;
2430         }
2431
2432         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2433         set_cmdline(idx, tsk->comm);
2434
2435         arch_spin_unlock(&trace_cmdline_lock);
2436
2437         return 1;
2438 }
2439
2440 static void __trace_find_cmdline(int pid, char comm[])
2441 {
2442         unsigned map;
2443         int tpid;
2444
2445         if (!pid) {
2446                 strcpy(comm, "<idle>");
2447                 return;
2448         }
2449
2450         if (WARN_ON_ONCE(pid < 0)) {
2451                 strcpy(comm, "<XXX>");
2452                 return;
2453         }
2454
2455         tpid = pid & (PID_MAX_DEFAULT - 1);
2456         map = savedcmd->map_pid_to_cmdline[tpid];
2457         if (map != NO_CMDLINE_MAP) {
2458                 tpid = savedcmd->map_cmdline_to_pid[map];
2459                 if (tpid == pid) {
2460                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2461                         return;
2462                 }
2463         }
2464         strcpy(comm, "<...>");
2465 }
2466
2467 void trace_find_cmdline(int pid, char comm[])
2468 {
2469         preempt_disable();
2470         arch_spin_lock(&trace_cmdline_lock);
2471
2472         __trace_find_cmdline(pid, comm);
2473
2474         arch_spin_unlock(&trace_cmdline_lock);
2475         preempt_enable();
2476 }
2477
2478 static int *trace_find_tgid_ptr(int pid)
2479 {
2480         /*
2481          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2482          * if we observe a non-NULL tgid_map then we also observe the correct
2483          * tgid_map_max.
2484          */
2485         int *map = smp_load_acquire(&tgid_map);
2486
2487         if (unlikely(!map || pid > tgid_map_max))
2488                 return NULL;
2489
2490         return &map[pid];
2491 }
2492
2493 int trace_find_tgid(int pid)
2494 {
2495         int *ptr = trace_find_tgid_ptr(pid);
2496
2497         return ptr ? *ptr : 0;
2498 }
2499
2500 static int trace_save_tgid(struct task_struct *tsk)
2501 {
2502         int *ptr;
2503
2504         /* treat recording of idle task as a success */
2505         if (!tsk->pid)
2506                 return 1;
2507
2508         ptr = trace_find_tgid_ptr(tsk->pid);
2509         if (!ptr)
2510                 return 0;
2511
2512         *ptr = tsk->tgid;
2513         return 1;
2514 }
2515
2516 static bool tracing_record_taskinfo_skip(int flags)
2517 {
2518         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2519                 return true;
2520         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2521                 return true;
2522         if (!__this_cpu_read(trace_taskinfo_save))
2523                 return true;
2524         return false;
2525 }
2526
2527 /**
2528  * tracing_record_taskinfo - record the task info of a task
2529  *
2530  * @task:  task to record
2531  * @flags: TRACE_RECORD_CMDLINE for recording comm
2532  *         TRACE_RECORD_TGID for recording tgid
2533  */
2534 void tracing_record_taskinfo(struct task_struct *task, int flags)
2535 {
2536         bool done;
2537
2538         if (tracing_record_taskinfo_skip(flags))
2539                 return;
2540
2541         /*
2542          * Record as much task information as possible. If some fail, continue
2543          * to try to record the others.
2544          */
2545         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2547
2548         /* If recording any information failed, retry again soon. */
2549         if (!done)
2550                 return;
2551
2552         __this_cpu_write(trace_taskinfo_save, false);
2553 }
2554
2555 /**
2556  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2557  *
2558  * @prev: previous task during sched_switch
2559  * @next: next task during sched_switch
2560  * @flags: TRACE_RECORD_CMDLINE for recording comm
2561  *         TRACE_RECORD_TGID for recording tgid
2562  */
2563 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2564                                           struct task_struct *next, int flags)
2565 {
2566         bool done;
2567
2568         if (tracing_record_taskinfo_skip(flags))
2569                 return;
2570
2571         /*
2572          * Record as much task information as possible. If some fail, continue
2573          * to try to record the others.
2574          */
2575         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2576         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2577         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2578         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2579
2580         /* If recording any information failed, retry again soon. */
2581         if (!done)
2582                 return;
2583
2584         __this_cpu_write(trace_taskinfo_save, false);
2585 }
2586
2587 /* Helpers to record a specific task information */
2588 void tracing_record_cmdline(struct task_struct *task)
2589 {
2590         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2591 }
2592
2593 void tracing_record_tgid(struct task_struct *task)
2594 {
2595         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2596 }
2597
2598 /*
2599  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601  * simplifies those functions and keeps them in sync.
2602  */
2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605         return trace_seq_has_overflowed(s) ?
2606                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609
2610 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2611 {
2612         unsigned int trace_flags = irqs_status;
2613         unsigned int pc;
2614
2615         pc = preempt_count();
2616
2617         if (pc & NMI_MASK)
2618                 trace_flags |= TRACE_FLAG_NMI;
2619         if (pc & HARDIRQ_MASK)
2620                 trace_flags |= TRACE_FLAG_HARDIRQ;
2621         if (in_serving_softirq())
2622                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2623
2624         if (tif_need_resched())
2625                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2626         if (test_preempt_need_resched())
2627                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2628         return (trace_flags << 16) | (pc & 0xff);
2629 }
2630
2631 struct ring_buffer_event *
2632 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2633                           int type,
2634                           unsigned long len,
2635                           unsigned int trace_ctx)
2636 {
2637         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2638 }
2639
2640 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2641 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2642 static int trace_buffered_event_ref;
2643
2644 /**
2645  * trace_buffered_event_enable - enable buffering events
2646  *
2647  * When events are being filtered, it is quicker to use a temporary
2648  * buffer to write the event data into if there's a likely chance
2649  * that it will not be committed. The discard of the ring buffer
2650  * is not as fast as committing, and is much slower than copying
2651  * a commit.
2652  *
2653  * When an event is to be filtered, allocate per cpu buffers to
2654  * write the event data into, and if the event is filtered and discarded
2655  * it is simply dropped, otherwise, the entire data is to be committed
2656  * in one shot.
2657  */
2658 void trace_buffered_event_enable(void)
2659 {
2660         struct ring_buffer_event *event;
2661         struct page *page;
2662         int cpu;
2663
2664         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2665
2666         if (trace_buffered_event_ref++)
2667                 return;
2668
2669         for_each_tracing_cpu(cpu) {
2670                 page = alloc_pages_node(cpu_to_node(cpu),
2671                                         GFP_KERNEL | __GFP_NORETRY, 0);
2672                 if (!page)
2673                         goto failed;
2674
2675                 event = page_address(page);
2676                 memset(event, 0, sizeof(*event));
2677
2678                 per_cpu(trace_buffered_event, cpu) = event;
2679
2680                 preempt_disable();
2681                 if (cpu == smp_processor_id() &&
2682                     __this_cpu_read(trace_buffered_event) !=
2683                     per_cpu(trace_buffered_event, cpu))
2684                         WARN_ON_ONCE(1);
2685                 preempt_enable();
2686         }
2687
2688         return;
2689  failed:
2690         trace_buffered_event_disable();
2691 }
2692
2693 static void enable_trace_buffered_event(void *data)
2694 {
2695         /* Probably not needed, but do it anyway */
2696         smp_rmb();
2697         this_cpu_dec(trace_buffered_event_cnt);
2698 }
2699
2700 static void disable_trace_buffered_event(void *data)
2701 {
2702         this_cpu_inc(trace_buffered_event_cnt);
2703 }
2704
2705 /**
2706  * trace_buffered_event_disable - disable buffering events
2707  *
2708  * When a filter is removed, it is faster to not use the buffered
2709  * events, and to commit directly into the ring buffer. Free up
2710  * the temp buffers when there are no more users. This requires
2711  * special synchronization with current events.
2712  */
2713 void trace_buffered_event_disable(void)
2714 {
2715         int cpu;
2716
2717         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2718
2719         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2720                 return;
2721
2722         if (--trace_buffered_event_ref)
2723                 return;
2724
2725         preempt_disable();
2726         /* For each CPU, set the buffer as used. */
2727         smp_call_function_many(tracing_buffer_mask,
2728                                disable_trace_buffered_event, NULL, 1);
2729         preempt_enable();
2730
2731         /* Wait for all current users to finish */
2732         synchronize_rcu();
2733
2734         for_each_tracing_cpu(cpu) {
2735                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2736                 per_cpu(trace_buffered_event, cpu) = NULL;
2737         }
2738         /*
2739          * Make sure trace_buffered_event is NULL before clearing
2740          * trace_buffered_event_cnt.
2741          */
2742         smp_wmb();
2743
2744         preempt_disable();
2745         /* Do the work on each cpu */
2746         smp_call_function_many(tracing_buffer_mask,
2747                                enable_trace_buffered_event, NULL, 1);
2748         preempt_enable();
2749 }
2750
2751 static struct trace_buffer *temp_buffer;
2752
2753 struct ring_buffer_event *
2754 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2755                           struct trace_event_file *trace_file,
2756                           int type, unsigned long len,
2757                           unsigned int trace_ctx)
2758 {
2759         struct ring_buffer_event *entry;
2760         struct trace_array *tr = trace_file->tr;
2761         int val;
2762
2763         *current_rb = tr->array_buffer.buffer;
2764
2765         if (!tr->no_filter_buffering_ref &&
2766             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2767             (entry = this_cpu_read(trace_buffered_event))) {
2768                 /*
2769                  * Filtering is on, so try to use the per cpu buffer first.
2770                  * This buffer will simulate a ring_buffer_event,
2771                  * where the type_len is zero and the array[0] will
2772                  * hold the full length.
2773                  * (see include/linux/ring-buffer.h for details on
2774                  *  how the ring_buffer_event is structured).
2775                  *
2776                  * Using a temp buffer during filtering and copying it
2777                  * on a matched filter is quicker than writing directly
2778                  * into the ring buffer and then discarding it when
2779                  * it doesn't match. That is because the discard
2780                  * requires several atomic operations to get right.
2781                  * Copying on match and doing nothing on a failed match
2782                  * is still quicker than no copy on match, but having
2783                  * to discard out of the ring buffer on a failed match.
2784                  */
2785                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2786
2787                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2788
2789                 /*
2790                  * Preemption is disabled, but interrupts and NMIs
2791                  * can still come in now. If that happens after
2792                  * the above increment, then it will have to go
2793                  * back to the old method of allocating the event
2794                  * on the ring buffer, and if the filter fails, it
2795                  * will have to call ring_buffer_discard_commit()
2796                  * to remove it.
2797                  *
2798                  * Need to also check the unlikely case that the
2799                  * length is bigger than the temp buffer size.
2800                  * If that happens, then the reserve is pretty much
2801                  * guaranteed to fail, as the ring buffer currently
2802                  * only allows events less than a page. But that may
2803                  * change in the future, so let the ring buffer reserve
2804                  * handle the failure in that case.
2805                  */
2806                 if (val == 1 && likely(len <= max_len)) {
2807                         trace_event_setup(entry, type, trace_ctx);
2808                         entry->array[0] = len;
2809                         return entry;
2810                 }
2811                 this_cpu_dec(trace_buffered_event_cnt);
2812         }
2813
2814         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2815                                             trace_ctx);
2816         /*
2817          * If tracing is off, but we have triggers enabled
2818          * we still need to look at the event data. Use the temp_buffer
2819          * to store the trace event for the trigger to use. It's recursive
2820          * safe and will not be recorded anywhere.
2821          */
2822         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2823                 *current_rb = temp_buffer;
2824                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825                                                     trace_ctx);
2826         }
2827         return entry;
2828 }
2829 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2830
2831 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2832 static DEFINE_MUTEX(tracepoint_printk_mutex);
2833
2834 static void output_printk(struct trace_event_buffer *fbuffer)
2835 {
2836         struct trace_event_call *event_call;
2837         struct trace_event_file *file;
2838         struct trace_event *event;
2839         unsigned long flags;
2840         struct trace_iterator *iter = tracepoint_print_iter;
2841
2842         /* We should never get here if iter is NULL */
2843         if (WARN_ON_ONCE(!iter))
2844                 return;
2845
2846         event_call = fbuffer->trace_file->event_call;
2847         if (!event_call || !event_call->event.funcs ||
2848             !event_call->event.funcs->trace)
2849                 return;
2850
2851         file = fbuffer->trace_file;
2852         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2853             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2854              !filter_match_preds(file->filter, fbuffer->entry)))
2855                 return;
2856
2857         event = &fbuffer->trace_file->event_call->event;
2858
2859         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2860         trace_seq_init(&iter->seq);
2861         iter->ent = fbuffer->entry;
2862         event_call->event.funcs->trace(iter, 0, event);
2863         trace_seq_putc(&iter->seq, 0);
2864         printk("%s", iter->seq.buffer);
2865
2866         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2867 }
2868
2869 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2870                              void *buffer, size_t *lenp,
2871                              loff_t *ppos)
2872 {
2873         int save_tracepoint_printk;
2874         int ret;
2875
2876         mutex_lock(&tracepoint_printk_mutex);
2877         save_tracepoint_printk = tracepoint_printk;
2878
2879         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2880
2881         /*
2882          * This will force exiting early, as tracepoint_printk
2883          * is always zero when tracepoint_printk_iter is not allocated
2884          */
2885         if (!tracepoint_print_iter)
2886                 tracepoint_printk = 0;
2887
2888         if (save_tracepoint_printk == tracepoint_printk)
2889                 goto out;
2890
2891         if (tracepoint_printk)
2892                 static_key_enable(&tracepoint_printk_key.key);
2893         else
2894                 static_key_disable(&tracepoint_printk_key.key);
2895
2896  out:
2897         mutex_unlock(&tracepoint_printk_mutex);
2898
2899         return ret;
2900 }
2901
2902 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2903 {
2904         if (static_key_false(&tracepoint_printk_key.key))
2905                 output_printk(fbuffer);
2906
2907         if (static_branch_unlikely(&trace_event_exports_enabled))
2908                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2909         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2910                                     fbuffer->event, fbuffer->entry,
2911                                     fbuffer->trace_ctx, fbuffer->regs);
2912 }
2913 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2914
2915 /*
2916  * Skip 3:
2917  *
2918  *   trace_buffer_unlock_commit_regs()
2919  *   trace_event_buffer_commit()
2920  *   trace_event_raw_event_xxx()
2921  */
2922 # define STACK_SKIP 3
2923
2924 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2925                                      struct trace_buffer *buffer,
2926                                      struct ring_buffer_event *event,
2927                                      unsigned int trace_ctx,
2928                                      struct pt_regs *regs)
2929 {
2930         __buffer_unlock_commit(buffer, event);
2931
2932         /*
2933          * If regs is not set, then skip the necessary functions.
2934          * Note, we can still get here via blktrace, wakeup tracer
2935          * and mmiotrace, but that's ok if they lose a function or
2936          * two. They are not that meaningful.
2937          */
2938         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2939         ftrace_trace_userstack(tr, buffer, trace_ctx);
2940 }
2941
2942 /*
2943  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2944  */
2945 void
2946 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2947                                    struct ring_buffer_event *event)
2948 {
2949         __buffer_unlock_commit(buffer, event);
2950 }
2951
2952 void
2953 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2954                parent_ip, unsigned int trace_ctx)
2955 {
2956         struct trace_event_call *call = &event_function;
2957         struct trace_buffer *buffer = tr->array_buffer.buffer;
2958         struct ring_buffer_event *event;
2959         struct ftrace_entry *entry;
2960
2961         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2962                                             trace_ctx);
2963         if (!event)
2964                 return;
2965         entry   = ring_buffer_event_data(event);
2966         entry->ip                       = ip;
2967         entry->parent_ip                = parent_ip;
2968
2969         if (!call_filter_check_discard(call, entry, buffer, event)) {
2970                 if (static_branch_unlikely(&trace_function_exports_enabled))
2971                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2972                 __buffer_unlock_commit(buffer, event);
2973         }
2974 }
2975
2976 #ifdef CONFIG_STACKTRACE
2977
2978 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2979 #define FTRACE_KSTACK_NESTING   4
2980
2981 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2982
2983 struct ftrace_stack {
2984         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2985 };
2986
2987
2988 struct ftrace_stacks {
2989         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2990 };
2991
2992 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2993 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2994
2995 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2996                                  unsigned int trace_ctx,
2997                                  int skip, struct pt_regs *regs)
2998 {
2999         struct trace_event_call *call = &event_kernel_stack;
3000         struct ring_buffer_event *event;
3001         unsigned int size, nr_entries;
3002         struct ftrace_stack *fstack;
3003         struct stack_entry *entry;
3004         int stackidx;
3005
3006         /*
3007          * Add one, for this function and the call to save_stack_trace()
3008          * If regs is set, then these functions will not be in the way.
3009          */
3010 #ifndef CONFIG_UNWINDER_ORC
3011         if (!regs)
3012                 skip++;
3013 #endif
3014
3015         preempt_disable_notrace();
3016
3017         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3018
3019         /* This should never happen. If it does, yell once and skip */
3020         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3021                 goto out;
3022
3023         /*
3024          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3025          * interrupt will either see the value pre increment or post
3026          * increment. If the interrupt happens pre increment it will have
3027          * restored the counter when it returns.  We just need a barrier to
3028          * keep gcc from moving things around.
3029          */
3030         barrier();
3031
3032         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3033         size = ARRAY_SIZE(fstack->calls);
3034
3035         if (regs) {
3036                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3037                                                    size, skip);
3038         } else {
3039                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3040         }
3041
3042         size = nr_entries * sizeof(unsigned long);
3043         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3044                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3045                                     trace_ctx);
3046         if (!event)
3047                 goto out;
3048         entry = ring_buffer_event_data(event);
3049
3050         memcpy(&entry->caller, fstack->calls, size);
3051         entry->size = nr_entries;
3052
3053         if (!call_filter_check_discard(call, entry, buffer, event))
3054                 __buffer_unlock_commit(buffer, event);
3055
3056  out:
3057         /* Again, don't let gcc optimize things here */
3058         barrier();
3059         __this_cpu_dec(ftrace_stack_reserve);
3060         preempt_enable_notrace();
3061
3062 }
3063
3064 static inline void ftrace_trace_stack(struct trace_array *tr,
3065                                       struct trace_buffer *buffer,
3066                                       unsigned int trace_ctx,
3067                                       int skip, struct pt_regs *regs)
3068 {
3069         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3070                 return;
3071
3072         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3073 }
3074
3075 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3076                    int skip)
3077 {
3078         struct trace_buffer *buffer = tr->array_buffer.buffer;
3079
3080         if (rcu_is_watching()) {
3081                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3082                 return;
3083         }
3084
3085         /*
3086          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3087          * but if the above rcu_is_watching() failed, then the NMI
3088          * triggered someplace critical, and rcu_irq_enter() should
3089          * not be called from NMI.
3090          */
3091         if (unlikely(in_nmi()))
3092                 return;
3093
3094         rcu_irq_enter_irqson();
3095         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3096         rcu_irq_exit_irqson();
3097 }
3098
3099 /**
3100  * trace_dump_stack - record a stack back trace in the trace buffer
3101  * @skip: Number of functions to skip (helper handlers)
3102  */
3103 void trace_dump_stack(int skip)
3104 {
3105         if (tracing_disabled || tracing_selftest_running)
3106                 return;
3107
3108 #ifndef CONFIG_UNWINDER_ORC
3109         /* Skip 1 to skip this function. */
3110         skip++;
3111 #endif
3112         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3113                              tracing_gen_ctx(), skip, NULL);
3114 }
3115 EXPORT_SYMBOL_GPL(trace_dump_stack);
3116
3117 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3118 static DEFINE_PER_CPU(int, user_stack_count);
3119
3120 static void
3121 ftrace_trace_userstack(struct trace_array *tr,
3122                        struct trace_buffer *buffer, unsigned int trace_ctx)
3123 {
3124         struct trace_event_call *call = &event_user_stack;
3125         struct ring_buffer_event *event;
3126         struct userstack_entry *entry;
3127
3128         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3129                 return;
3130
3131         /*
3132          * NMIs can not handle page faults, even with fix ups.
3133          * The save user stack can (and often does) fault.
3134          */
3135         if (unlikely(in_nmi()))
3136                 return;
3137
3138         /*
3139          * prevent recursion, since the user stack tracing may
3140          * trigger other kernel events.
3141          */
3142         preempt_disable();
3143         if (__this_cpu_read(user_stack_count))
3144                 goto out;
3145
3146         __this_cpu_inc(user_stack_count);
3147
3148         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3149                                             sizeof(*entry), trace_ctx);
3150         if (!event)
3151                 goto out_drop_count;
3152         entry   = ring_buffer_event_data(event);
3153
3154         entry->tgid             = current->tgid;
3155         memset(&entry->caller, 0, sizeof(entry->caller));
3156
3157         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3158         if (!call_filter_check_discard(call, entry, buffer, event))
3159                 __buffer_unlock_commit(buffer, event);
3160
3161  out_drop_count:
3162         __this_cpu_dec(user_stack_count);
3163  out:
3164         preempt_enable();
3165 }
3166 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3167 static void ftrace_trace_userstack(struct trace_array *tr,
3168                                    struct trace_buffer *buffer,
3169                                    unsigned int trace_ctx)
3170 {
3171 }
3172 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3173
3174 #endif /* CONFIG_STACKTRACE */
3175
3176 static inline void
3177 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3178                           unsigned long long delta)
3179 {
3180         entry->bottom_delta_ts = delta & U32_MAX;
3181         entry->top_delta_ts = (delta >> 32);
3182 }
3183
3184 void trace_last_func_repeats(struct trace_array *tr,
3185                              struct trace_func_repeats *last_info,
3186                              unsigned int trace_ctx)
3187 {
3188         struct trace_buffer *buffer = tr->array_buffer.buffer;
3189         struct func_repeats_entry *entry;
3190         struct ring_buffer_event *event;
3191         u64 delta;
3192
3193         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3194                                             sizeof(*entry), trace_ctx);
3195         if (!event)
3196                 return;
3197
3198         delta = ring_buffer_event_time_stamp(buffer, event) -
3199                 last_info->ts_last_call;
3200
3201         entry = ring_buffer_event_data(event);
3202         entry->ip = last_info->ip;
3203         entry->parent_ip = last_info->parent_ip;
3204         entry->count = last_info->count;
3205         func_repeats_set_delta_ts(entry, delta);
3206
3207         __buffer_unlock_commit(buffer, event);
3208 }
3209
3210 /* created for use with alloc_percpu */
3211 struct trace_buffer_struct {
3212         int nesting;
3213         char buffer[4][TRACE_BUF_SIZE];
3214 };
3215
3216 static struct trace_buffer_struct *trace_percpu_buffer;
3217
3218 /*
3219  * This allows for lockless recording.  If we're nested too deeply, then
3220  * this returns NULL.
3221  */
3222 static char *get_trace_buf(void)
3223 {
3224         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3225
3226         if (!buffer || buffer->nesting >= 4)
3227                 return NULL;
3228
3229         buffer->nesting++;
3230
3231         /* Interrupts must see nesting incremented before we use the buffer */
3232         barrier();
3233         return &buffer->buffer[buffer->nesting - 1][0];
3234 }
3235
3236 static void put_trace_buf(void)
3237 {
3238         /* Don't let the decrement of nesting leak before this */
3239         barrier();
3240         this_cpu_dec(trace_percpu_buffer->nesting);
3241 }
3242
3243 static int alloc_percpu_trace_buffer(void)
3244 {
3245         struct trace_buffer_struct *buffers;
3246
3247         if (trace_percpu_buffer)
3248                 return 0;
3249
3250         buffers = alloc_percpu(struct trace_buffer_struct);
3251         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3252                 return -ENOMEM;
3253
3254         trace_percpu_buffer = buffers;
3255         return 0;
3256 }
3257
3258 static int buffers_allocated;
3259
3260 void trace_printk_init_buffers(void)
3261 {
3262         if (buffers_allocated)
3263                 return;
3264
3265         if (alloc_percpu_trace_buffer())
3266                 return;
3267
3268         /* trace_printk() is for debug use only. Don't use it in production. */
3269
3270         pr_warn("\n");
3271         pr_warn("**********************************************************\n");
3272         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3273         pr_warn("**                                                      **\n");
3274         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3275         pr_warn("**                                                      **\n");
3276         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3277         pr_warn("** unsafe for production use.                           **\n");
3278         pr_warn("**                                                      **\n");
3279         pr_warn("** If you see this message and you are not debugging    **\n");
3280         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3281         pr_warn("**                                                      **\n");
3282         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3283         pr_warn("**********************************************************\n");
3284
3285         /* Expand the buffers to set size */
3286         tracing_update_buffers();
3287
3288         buffers_allocated = 1;
3289
3290         /*
3291          * trace_printk_init_buffers() can be called by modules.
3292          * If that happens, then we need to start cmdline recording
3293          * directly here. If the global_trace.buffer is already
3294          * allocated here, then this was called by module code.
3295          */
3296         if (global_trace.array_buffer.buffer)
3297                 tracing_start_cmdline_record();
3298 }
3299 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3300
3301 void trace_printk_start_comm(void)
3302 {
3303         /* Start tracing comms if trace printk is set */
3304         if (!buffers_allocated)
3305                 return;
3306         tracing_start_cmdline_record();
3307 }
3308
3309 static void trace_printk_start_stop_comm(int enabled)
3310 {
3311         if (!buffers_allocated)
3312                 return;
3313
3314         if (enabled)
3315                 tracing_start_cmdline_record();
3316         else
3317                 tracing_stop_cmdline_record();
3318 }
3319
3320 /**
3321  * trace_vbprintk - write binary msg to tracing buffer
3322  * @ip:    The address of the caller
3323  * @fmt:   The string format to write to the buffer
3324  * @args:  Arguments for @fmt
3325  */
3326 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3327 {
3328         struct trace_event_call *call = &event_bprint;
3329         struct ring_buffer_event *event;
3330         struct trace_buffer *buffer;
3331         struct trace_array *tr = &global_trace;
3332         struct bprint_entry *entry;
3333         unsigned int trace_ctx;
3334         char *tbuffer;
3335         int len = 0, size;
3336
3337         if (unlikely(tracing_selftest_running || tracing_disabled))
3338                 return 0;
3339
3340         /* Don't pollute graph traces with trace_vprintk internals */
3341         pause_graph_tracing();
3342
3343         trace_ctx = tracing_gen_ctx();
3344         preempt_disable_notrace();
3345
3346         tbuffer = get_trace_buf();
3347         if (!tbuffer) {
3348                 len = 0;
3349                 goto out_nobuffer;
3350         }
3351
3352         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3353
3354         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3355                 goto out_put;
3356
3357         size = sizeof(*entry) + sizeof(u32) * len;
3358         buffer = tr->array_buffer.buffer;
3359         ring_buffer_nest_start(buffer);
3360         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3361                                             trace_ctx);
3362         if (!event)
3363                 goto out;
3364         entry = ring_buffer_event_data(event);
3365         entry->ip                       = ip;
3366         entry->fmt                      = fmt;
3367
3368         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3369         if (!call_filter_check_discard(call, entry, buffer, event)) {
3370                 __buffer_unlock_commit(buffer, event);
3371                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3372         }
3373
3374 out:
3375         ring_buffer_nest_end(buffer);
3376 out_put:
3377         put_trace_buf();
3378
3379 out_nobuffer:
3380         preempt_enable_notrace();
3381         unpause_graph_tracing();
3382
3383         return len;
3384 }
3385 EXPORT_SYMBOL_GPL(trace_vbprintk);
3386
3387 __printf(3, 0)
3388 static int
3389 __trace_array_vprintk(struct trace_buffer *buffer,
3390                       unsigned long ip, const char *fmt, va_list args)
3391 {
3392         struct trace_event_call *call = &event_print;
3393         struct ring_buffer_event *event;
3394         int len = 0, size;
3395         struct print_entry *entry;
3396         unsigned int trace_ctx;
3397         char *tbuffer;
3398
3399         if (tracing_disabled || tracing_selftest_running)
3400                 return 0;
3401
3402         /* Don't pollute graph traces with trace_vprintk internals */
3403         pause_graph_tracing();
3404
3405         trace_ctx = tracing_gen_ctx();
3406         preempt_disable_notrace();
3407
3408
3409         tbuffer = get_trace_buf();
3410         if (!tbuffer) {
3411                 len = 0;
3412                 goto out_nobuffer;
3413         }
3414
3415         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3416
3417         size = sizeof(*entry) + len + 1;
3418         ring_buffer_nest_start(buffer);
3419         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3420                                             trace_ctx);
3421         if (!event)
3422                 goto out;
3423         entry = ring_buffer_event_data(event);
3424         entry->ip = ip;
3425
3426         memcpy(&entry->buf, tbuffer, len + 1);
3427         if (!call_filter_check_discard(call, entry, buffer, event)) {
3428                 __buffer_unlock_commit(buffer, event);
3429                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3430         }
3431
3432 out:
3433         ring_buffer_nest_end(buffer);
3434         put_trace_buf();
3435
3436 out_nobuffer:
3437         preempt_enable_notrace();
3438         unpause_graph_tracing();
3439
3440         return len;
3441 }
3442
3443 __printf(3, 0)
3444 int trace_array_vprintk(struct trace_array *tr,
3445                         unsigned long ip, const char *fmt, va_list args)
3446 {
3447         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3448 }
3449
3450 /**
3451  * trace_array_printk - Print a message to a specific instance
3452  * @tr: The instance trace_array descriptor
3453  * @ip: The instruction pointer that this is called from.
3454  * @fmt: The format to print (printf format)
3455  *
3456  * If a subsystem sets up its own instance, they have the right to
3457  * printk strings into their tracing instance buffer using this
3458  * function. Note, this function will not write into the top level
3459  * buffer (use trace_printk() for that), as writing into the top level
3460  * buffer should only have events that can be individually disabled.
3461  * trace_printk() is only used for debugging a kernel, and should not
3462  * be ever incorporated in normal use.
3463  *
3464  * trace_array_printk() can be used, as it will not add noise to the
3465  * top level tracing buffer.
3466  *
3467  * Note, trace_array_init_printk() must be called on @tr before this
3468  * can be used.
3469  */
3470 __printf(3, 0)
3471 int trace_array_printk(struct trace_array *tr,
3472                        unsigned long ip, const char *fmt, ...)
3473 {
3474         int ret;
3475         va_list ap;
3476
3477         if (!tr)
3478                 return -ENOENT;
3479
3480         /* This is only allowed for created instances */
3481         if (tr == &global_trace)
3482                 return 0;
3483
3484         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3485                 return 0;
3486
3487         va_start(ap, fmt);
3488         ret = trace_array_vprintk(tr, ip, fmt, ap);
3489         va_end(ap);
3490         return ret;
3491 }
3492 EXPORT_SYMBOL_GPL(trace_array_printk);
3493
3494 /**
3495  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3496  * @tr: The trace array to initialize the buffers for
3497  *
3498  * As trace_array_printk() only writes into instances, they are OK to
3499  * have in the kernel (unlike trace_printk()). This needs to be called
3500  * before trace_array_printk() can be used on a trace_array.
3501  */
3502 int trace_array_init_printk(struct trace_array *tr)
3503 {
3504         if (!tr)
3505                 return -ENOENT;
3506
3507         /* This is only allowed for created instances */
3508         if (tr == &global_trace)
3509                 return -EINVAL;
3510
3511         return alloc_percpu_trace_buffer();
3512 }
3513 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3514
3515 __printf(3, 4)
3516 int trace_array_printk_buf(struct trace_buffer *buffer,
3517                            unsigned long ip, const char *fmt, ...)
3518 {
3519         int ret;
3520         va_list ap;
3521
3522         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3523                 return 0;
3524
3525         va_start(ap, fmt);
3526         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3527         va_end(ap);
3528         return ret;
3529 }
3530
3531 __printf(2, 0)
3532 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3533 {
3534         return trace_array_vprintk(&global_trace, ip, fmt, args);
3535 }
3536 EXPORT_SYMBOL_GPL(trace_vprintk);
3537
3538 static void trace_iterator_increment(struct trace_iterator *iter)
3539 {
3540         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3541
3542         iter->idx++;
3543         if (buf_iter)
3544                 ring_buffer_iter_advance(buf_iter);
3545 }
3546
3547 static struct trace_entry *
3548 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3549                 unsigned long *lost_events)
3550 {
3551         struct ring_buffer_event *event;
3552         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3553
3554         if (buf_iter) {
3555                 event = ring_buffer_iter_peek(buf_iter, ts);
3556                 if (lost_events)
3557                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3558                                 (unsigned long)-1 : 0;
3559         } else {
3560                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3561                                          lost_events);
3562         }
3563
3564         if (event) {
3565                 iter->ent_size = ring_buffer_event_length(event);
3566                 return ring_buffer_event_data(event);
3567         }
3568         iter->ent_size = 0;
3569         return NULL;
3570 }
3571
3572 static struct trace_entry *
3573 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3574                   unsigned long *missing_events, u64 *ent_ts)
3575 {
3576         struct trace_buffer *buffer = iter->array_buffer->buffer;
3577         struct trace_entry *ent, *next = NULL;
3578         unsigned long lost_events = 0, next_lost = 0;
3579         int cpu_file = iter->cpu_file;
3580         u64 next_ts = 0, ts;
3581         int next_cpu = -1;
3582         int next_size = 0;
3583         int cpu;
3584
3585         /*
3586          * If we are in a per_cpu trace file, don't bother by iterating over
3587          * all cpu and peek directly.
3588          */
3589         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3590                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3591                         return NULL;
3592                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3593                 if (ent_cpu)
3594                         *ent_cpu = cpu_file;
3595
3596                 return ent;
3597         }
3598
3599         for_each_tracing_cpu(cpu) {
3600
3601                 if (ring_buffer_empty_cpu(buffer, cpu))
3602                         continue;
3603
3604                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3605
3606                 /*
3607                  * Pick the entry with the smallest timestamp:
3608                  */
3609                 if (ent && (!next || ts < next_ts)) {
3610                         next = ent;
3611                         next_cpu = cpu;
3612                         next_ts = ts;
3613                         next_lost = lost_events;
3614                         next_size = iter->ent_size;
3615                 }
3616         }
3617
3618         iter->ent_size = next_size;
3619
3620         if (ent_cpu)
3621                 *ent_cpu = next_cpu;
3622
3623         if (ent_ts)
3624                 *ent_ts = next_ts;
3625
3626         if (missing_events)
3627                 *missing_events = next_lost;
3628
3629         return next;
3630 }
3631
3632 #define STATIC_FMT_BUF_SIZE     128
3633 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3634
3635 static char *trace_iter_expand_format(struct trace_iterator *iter)
3636 {
3637         char *tmp;
3638
3639         /*
3640          * iter->tr is NULL when used with tp_printk, which makes
3641          * this get called where it is not safe to call krealloc().
3642          */
3643         if (!iter->tr || iter->fmt == static_fmt_buf)
3644                 return NULL;
3645
3646         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3647                        GFP_KERNEL);
3648         if (tmp) {
3649                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3650                 iter->fmt = tmp;
3651         }
3652
3653         return tmp;
3654 }
3655
3656 /* Returns true if the string is safe to dereference from an event */
3657 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3658 {
3659         unsigned long addr = (unsigned long)str;
3660         struct trace_event *trace_event;
3661         struct trace_event_call *event;
3662
3663         /* OK if part of the event data */
3664         if ((addr >= (unsigned long)iter->ent) &&
3665             (addr < (unsigned long)iter->ent + iter->ent_size))
3666                 return true;
3667
3668         /* OK if part of the temp seq buffer */
3669         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3670             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3671                 return true;
3672
3673         /* Core rodata can not be freed */
3674         if (is_kernel_rodata(addr))
3675                 return true;
3676
3677         if (trace_is_tracepoint_string(str))
3678                 return true;
3679
3680         /*
3681          * Now this could be a module event, referencing core module
3682          * data, which is OK.
3683          */
3684         if (!iter->ent)
3685                 return false;
3686
3687         trace_event = ftrace_find_event(iter->ent->type);
3688         if (!trace_event)
3689                 return false;
3690
3691         event = container_of(trace_event, struct trace_event_call, event);
3692         if (!event->mod)
3693                 return false;
3694
3695         /* Would rather have rodata, but this will suffice */
3696         if (within_module_core(addr, event->mod))
3697                 return true;
3698
3699         return false;
3700 }
3701
3702 static const char *show_buffer(struct trace_seq *s)
3703 {
3704         struct seq_buf *seq = &s->seq;
3705
3706         seq_buf_terminate(seq);
3707
3708         return seq->buffer;
3709 }
3710
3711 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3712
3713 static int test_can_verify_check(const char *fmt, ...)
3714 {
3715         char buf[16];
3716         va_list ap;
3717         int ret;
3718
3719         /*
3720          * The verifier is dependent on vsnprintf() modifies the va_list
3721          * passed to it, where it is sent as a reference. Some architectures
3722          * (like x86_32) passes it by value, which means that vsnprintf()
3723          * does not modify the va_list passed to it, and the verifier
3724          * would then need to be able to understand all the values that
3725          * vsnprintf can use. If it is passed by value, then the verifier
3726          * is disabled.
3727          */
3728         va_start(ap, fmt);
3729         vsnprintf(buf, 16, "%d", ap);
3730         ret = va_arg(ap, int);
3731         va_end(ap);
3732
3733         return ret;
3734 }
3735
3736 static void test_can_verify(void)
3737 {
3738         if (!test_can_verify_check("%d %d", 0, 1)) {
3739                 pr_info("trace event string verifier disabled\n");
3740                 static_branch_inc(&trace_no_verify);
3741         }
3742 }
3743
3744 /**
3745  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3746  * @iter: The iterator that holds the seq buffer and the event being printed
3747  * @fmt: The format used to print the event
3748  * @ap: The va_list holding the data to print from @fmt.
3749  *
3750  * This writes the data into the @iter->seq buffer using the data from
3751  * @fmt and @ap. If the format has a %s, then the source of the string
3752  * is examined to make sure it is safe to print, otherwise it will
3753  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3754  * pointer.
3755  */
3756 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3757                          va_list ap)
3758 {
3759         const char *p = fmt;
3760         const char *str;
3761         int i, j;
3762
3763         if (WARN_ON_ONCE(!fmt))
3764                 return;
3765
3766         if (static_branch_unlikely(&trace_no_verify))
3767                 goto print;
3768
3769         /* Don't bother checking when doing a ftrace_dump() */
3770         if (iter->fmt == static_fmt_buf)
3771                 goto print;
3772
3773         while (*p) {
3774                 bool star = false;
3775                 int len = 0;
3776
3777                 j = 0;
3778
3779                 /* We only care about %s and variants */
3780                 for (i = 0; p[i]; i++) {
3781                         if (i + 1 >= iter->fmt_size) {
3782                                 /*
3783                                  * If we can't expand the copy buffer,
3784                                  * just print it.
3785                                  */
3786                                 if (!trace_iter_expand_format(iter))
3787                                         goto print;
3788                         }
3789
3790                         if (p[i] == '\\' && p[i+1]) {
3791                                 i++;
3792                                 continue;
3793                         }
3794                         if (p[i] == '%') {
3795                                 /* Need to test cases like %08.*s */
3796                                 for (j = 1; p[i+j]; j++) {
3797                                         if (isdigit(p[i+j]) ||
3798                                             p[i+j] == '.')
3799                                                 continue;
3800                                         if (p[i+j] == '*') {
3801                                                 star = true;
3802                                                 continue;
3803                                         }
3804                                         break;
3805                                 }
3806                                 if (p[i+j] == 's')
3807                                         break;
3808                                 star = false;
3809                         }
3810                         j = 0;
3811                 }
3812                 /* If no %s found then just print normally */
3813                 if (!p[i])
3814                         break;
3815
3816                 /* Copy up to the %s, and print that */
3817                 strncpy(iter->fmt, p, i);
3818                 iter->fmt[i] = '\0';
3819                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3820
3821                 if (star)
3822                         len = va_arg(ap, int);
3823
3824                 /* The ap now points to the string data of the %s */
3825                 str = va_arg(ap, const char *);
3826
3827                 /*
3828                  * If you hit this warning, it is likely that the
3829                  * trace event in question used %s on a string that
3830                  * was saved at the time of the event, but may not be
3831                  * around when the trace is read. Use __string(),
3832                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3833                  * instead. See samples/trace_events/trace-events-sample.h
3834                  * for reference.
3835                  */
3836                 if (WARN_ONCE(!trace_safe_str(iter, str),
3837                               "fmt: '%s' current_buffer: '%s'",
3838                               fmt, show_buffer(&iter->seq))) {
3839                         int ret;
3840
3841                         /* Try to safely read the string */
3842                         if (star) {
3843                                 if (len + 1 > iter->fmt_size)
3844                                         len = iter->fmt_size - 1;
3845                                 if (len < 0)
3846                                         len = 0;
3847                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3848                                 iter->fmt[len] = 0;
3849                                 star = false;
3850                         } else {
3851                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3852                                                                   iter->fmt_size);
3853                         }
3854                         if (ret < 0)
3855                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3856                         else
3857                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3858                                                  str, iter->fmt);
3859                         str = "[UNSAFE-MEMORY]";
3860                         strcpy(iter->fmt, "%s");
3861                 } else {
3862                         strncpy(iter->fmt, p + i, j + 1);
3863                         iter->fmt[j+1] = '\0';
3864                 }
3865                 if (star)
3866                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3867                 else
3868                         trace_seq_printf(&iter->seq, iter->fmt, str);
3869
3870                 p += i + j + 1;
3871         }
3872  print:
3873         if (*p)
3874                 trace_seq_vprintf(&iter->seq, p, ap);
3875 }
3876
3877 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3878 {
3879         const char *p, *new_fmt;
3880         char *q;
3881
3882         if (WARN_ON_ONCE(!fmt))
3883                 return fmt;
3884
3885         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3886                 return fmt;
3887
3888         p = fmt;
3889         new_fmt = q = iter->fmt;
3890         while (*p) {
3891                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3892                         if (!trace_iter_expand_format(iter))
3893                                 return fmt;
3894
3895                         q += iter->fmt - new_fmt;
3896                         new_fmt = iter->fmt;
3897                 }
3898
3899                 *q++ = *p++;
3900
3901                 /* Replace %p with %px */
3902                 if (p[-1] == '%') {
3903                         if (p[0] == '%') {
3904                                 *q++ = *p++;
3905                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3906                                 *q++ = *p++;
3907                                 *q++ = 'x';
3908                         }
3909                 }
3910         }
3911         *q = '\0';
3912
3913         return new_fmt;
3914 }
3915
3916 #define STATIC_TEMP_BUF_SIZE    128
3917 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3918
3919 /* Find the next real entry, without updating the iterator itself */
3920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3921                                           int *ent_cpu, u64 *ent_ts)
3922 {
3923         /* __find_next_entry will reset ent_size */
3924         int ent_size = iter->ent_size;
3925         struct trace_entry *entry;
3926
3927         /*
3928          * If called from ftrace_dump(), then the iter->temp buffer
3929          * will be the static_temp_buf and not created from kmalloc.
3930          * If the entry size is greater than the buffer, we can
3931          * not save it. Just return NULL in that case. This is only
3932          * used to add markers when two consecutive events' time
3933          * stamps have a large delta. See trace_print_lat_context()
3934          */
3935         if (iter->temp == static_temp_buf &&
3936             STATIC_TEMP_BUF_SIZE < ent_size)
3937                 return NULL;
3938
3939         /*
3940          * The __find_next_entry() may call peek_next_entry(), which may
3941          * call ring_buffer_peek() that may make the contents of iter->ent
3942          * undefined. Need to copy iter->ent now.
3943          */
3944         if (iter->ent && iter->ent != iter->temp) {
3945                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3946                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3947                         void *temp;
3948                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3949                         if (!temp)
3950                                 return NULL;
3951                         kfree(iter->temp);
3952                         iter->temp = temp;
3953                         iter->temp_size = iter->ent_size;
3954                 }
3955                 memcpy(iter->temp, iter->ent, iter->ent_size);
3956                 iter->ent = iter->temp;
3957         }
3958         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3959         /* Put back the original ent_size */
3960         iter->ent_size = ent_size;
3961
3962         return entry;
3963 }
3964
3965 /* Find the next real entry, and increment the iterator to the next entry */
3966 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3967 {
3968         iter->ent = __find_next_entry(iter, &iter->cpu,
3969                                       &iter->lost_events, &iter->ts);
3970
3971         if (iter->ent)
3972                 trace_iterator_increment(iter);
3973
3974         return iter->ent ? iter : NULL;
3975 }
3976
3977 static void trace_consume(struct trace_iterator *iter)
3978 {
3979         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3980                             &iter->lost_events);
3981 }
3982
3983 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3984 {
3985         struct trace_iterator *iter = m->private;
3986         int i = (int)*pos;
3987         void *ent;
3988
3989         WARN_ON_ONCE(iter->leftover);
3990
3991         (*pos)++;
3992
3993         /* can't go backwards */
3994         if (iter->idx > i)
3995                 return NULL;
3996
3997         if (iter->idx < 0)
3998                 ent = trace_find_next_entry_inc(iter);
3999         else
4000                 ent = iter;
4001
4002         while (ent && iter->idx < i)
4003                 ent = trace_find_next_entry_inc(iter);
4004
4005         iter->pos = *pos;
4006
4007         return ent;
4008 }
4009
4010 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4011 {
4012         struct ring_buffer_iter *buf_iter;
4013         unsigned long entries = 0;
4014         u64 ts;
4015
4016         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4017
4018         buf_iter = trace_buffer_iter(iter, cpu);
4019         if (!buf_iter)
4020                 return;
4021
4022         ring_buffer_iter_reset(buf_iter);
4023
4024         /*
4025          * We could have the case with the max latency tracers
4026          * that a reset never took place on a cpu. This is evident
4027          * by the timestamp being before the start of the buffer.
4028          */
4029         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4030                 if (ts >= iter->array_buffer->time_start)
4031                         break;
4032                 entries++;
4033                 ring_buffer_iter_advance(buf_iter);
4034         }
4035
4036         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4037 }
4038
4039 /*
4040  * The current tracer is copied to avoid a global locking
4041  * all around.
4042  */
4043 static void *s_start(struct seq_file *m, loff_t *pos)
4044 {
4045         struct trace_iterator *iter = m->private;
4046         struct trace_array *tr = iter->tr;
4047         int cpu_file = iter->cpu_file;
4048         void *p = NULL;
4049         loff_t l = 0;
4050         int cpu;
4051
4052         /*
4053          * copy the tracer to avoid using a global lock all around.
4054          * iter->trace is a copy of current_trace, the pointer to the
4055          * name may be used instead of a strcmp(), as iter->trace->name
4056          * will point to the same string as current_trace->name.
4057          */
4058         mutex_lock(&trace_types_lock);
4059         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4060                 *iter->trace = *tr->current_trace;
4061         mutex_unlock(&trace_types_lock);
4062
4063 #ifdef CONFIG_TRACER_MAX_TRACE
4064         if (iter->snapshot && iter->trace->use_max_tr)
4065                 return ERR_PTR(-EBUSY);
4066 #endif
4067
4068         if (!iter->snapshot)
4069                 atomic_inc(&trace_record_taskinfo_disabled);
4070
4071         if (*pos != iter->pos) {
4072                 iter->ent = NULL;
4073                 iter->cpu = 0;
4074                 iter->idx = -1;
4075
4076                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4077                         for_each_tracing_cpu(cpu)
4078                                 tracing_iter_reset(iter, cpu);
4079                 } else
4080                         tracing_iter_reset(iter, cpu_file);
4081
4082                 iter->leftover = 0;
4083                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4084                         ;
4085
4086         } else {
4087                 /*
4088                  * If we overflowed the seq_file before, then we want
4089                  * to just reuse the trace_seq buffer again.
4090                  */
4091                 if (iter->leftover)
4092                         p = iter;
4093                 else {
4094                         l = *pos - 1;
4095                         p = s_next(m, p, &l);
4096                 }
4097         }
4098
4099         trace_event_read_lock();
4100         trace_access_lock(cpu_file);
4101         return p;
4102 }
4103
4104 static void s_stop(struct seq_file *m, void *p)
4105 {
4106         struct trace_iterator *iter = m->private;
4107
4108 #ifdef CONFIG_TRACER_MAX_TRACE
4109         if (iter->snapshot && iter->trace->use_max_tr)
4110                 return;
4111 #endif
4112
4113         if (!iter->snapshot)
4114                 atomic_dec(&trace_record_taskinfo_disabled);
4115
4116         trace_access_unlock(iter->cpu_file);
4117         trace_event_read_unlock();
4118 }
4119
4120 static void
4121 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4122                       unsigned long *entries, int cpu)
4123 {
4124         unsigned long count;
4125
4126         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4127         /*
4128          * If this buffer has skipped entries, then we hold all
4129          * entries for the trace and we need to ignore the
4130          * ones before the time stamp.
4131          */
4132         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4133                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4134                 /* total is the same as the entries */
4135                 *total = count;
4136         } else
4137                 *total = count +
4138                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4139         *entries = count;
4140 }
4141
4142 static void
4143 get_total_entries(struct array_buffer *buf,
4144                   unsigned long *total, unsigned long *entries)
4145 {
4146         unsigned long t, e;
4147         int cpu;
4148
4149         *total = 0;
4150         *entries = 0;
4151
4152         for_each_tracing_cpu(cpu) {
4153                 get_total_entries_cpu(buf, &t, &e, cpu);
4154                 *total += t;
4155                 *entries += e;
4156         }
4157 }
4158
4159 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4160 {
4161         unsigned long total, entries;
4162
4163         if (!tr)
4164                 tr = &global_trace;
4165
4166         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4167
4168         return entries;
4169 }
4170
4171 unsigned long trace_total_entries(struct trace_array *tr)
4172 {
4173         unsigned long total, entries;
4174
4175         if (!tr)
4176                 tr = &global_trace;
4177
4178         get_total_entries(&tr->array_buffer, &total, &entries);
4179
4180         return entries;
4181 }
4182
4183 static void print_lat_help_header(struct seq_file *m)
4184 {
4185         seq_puts(m, "#                    _------=> CPU#            \n"
4186                     "#                   / _-----=> irqs-off        \n"
4187                     "#                  | / _----=> need-resched    \n"
4188                     "#                  || / _---=> hardirq/softirq \n"
4189                     "#                  ||| / _--=> preempt-depth   \n"
4190                     "#                  |||| /     delay            \n"
4191                     "#  cmd     pid     ||||| time  |   caller      \n"
4192                     "#     \\   /        |||||  \\    |   /         \n");
4193 }
4194
4195 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4196 {
4197         unsigned long total;
4198         unsigned long entries;
4199
4200         get_total_entries(buf, &total, &entries);
4201         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4202                    entries, total, num_online_cpus());
4203         seq_puts(m, "#\n");
4204 }
4205
4206 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4207                                    unsigned int flags)
4208 {
4209         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4210
4211         print_event_info(buf, m);
4212
4213         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4214         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4215 }
4216
4217 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4218                                        unsigned int flags)
4219 {
4220         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4221         const char *space = "            ";
4222         int prec = tgid ? 12 : 2;
4223
4224         print_event_info(buf, m);
4225
4226         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4227         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4228         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4229         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4230         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4231         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4232         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4233 }
4234
4235 void
4236 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4237 {
4238         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4239         struct array_buffer *buf = iter->array_buffer;
4240         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4241         struct tracer *type = iter->trace;
4242         unsigned long entries;
4243         unsigned long total;
4244         const char *name = "preemption";
4245
4246         name = type->name;
4247
4248         get_total_entries(buf, &total, &entries);
4249
4250         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4251                    name, UTS_RELEASE);
4252         seq_puts(m, "# -----------------------------------"
4253                  "---------------------------------\n");
4254         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4255                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4256                    nsecs_to_usecs(data->saved_latency),
4257                    entries,
4258                    total,
4259                    buf->cpu,
4260 #if defined(CONFIG_PREEMPT_NONE)
4261                    "server",
4262 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4263                    "desktop",
4264 #elif defined(CONFIG_PREEMPT)
4265                    "preempt",
4266 #elif defined(CONFIG_PREEMPT_RT)
4267                    "preempt_rt",
4268 #else
4269                    "unknown",
4270 #endif
4271                    /* These are reserved for later use */
4272                    0, 0, 0, 0);
4273 #ifdef CONFIG_SMP
4274         seq_printf(m, " #P:%d)\n", num_online_cpus());
4275 #else
4276         seq_puts(m, ")\n");
4277 #endif
4278         seq_puts(m, "#    -----------------\n");
4279         seq_printf(m, "#    | task: %.16s-%d "
4280                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4281                    data->comm, data->pid,
4282                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4283                    data->policy, data->rt_priority);
4284         seq_puts(m, "#    -----------------\n");
4285
4286         if (data->critical_start) {
4287                 seq_puts(m, "#  => started at: ");
4288                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4289                 trace_print_seq(m, &iter->seq);
4290                 seq_puts(m, "\n#  => ended at:   ");
4291                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4292                 trace_print_seq(m, &iter->seq);
4293                 seq_puts(m, "\n#\n");
4294         }
4295
4296         seq_puts(m, "#\n");
4297 }
4298
4299 static void test_cpu_buff_start(struct trace_iterator *iter)
4300 {
4301         struct trace_seq *s = &iter->seq;
4302         struct trace_array *tr = iter->tr;
4303
4304         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4305                 return;
4306
4307         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4308                 return;
4309
4310         if (cpumask_available(iter->started) &&
4311             cpumask_test_cpu(iter->cpu, iter->started))
4312                 return;
4313
4314         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4315                 return;
4316
4317         if (cpumask_available(iter->started))
4318                 cpumask_set_cpu(iter->cpu, iter->started);
4319
4320         /* Don't print started cpu buffer for the first entry of the trace */
4321         if (iter->idx > 1)
4322                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4323                                 iter->cpu);
4324 }
4325
4326 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4327 {
4328         struct trace_array *tr = iter->tr;
4329         struct trace_seq *s = &iter->seq;
4330         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4331         struct trace_entry *entry;
4332         struct trace_event *event;
4333
4334         entry = iter->ent;
4335
4336         test_cpu_buff_start(iter);
4337
4338         event = ftrace_find_event(entry->type);
4339
4340         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4341                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4342                         trace_print_lat_context(iter);
4343                 else
4344                         trace_print_context(iter);
4345         }
4346
4347         if (trace_seq_has_overflowed(s))
4348                 return TRACE_TYPE_PARTIAL_LINE;
4349
4350         if (event)
4351                 return event->funcs->trace(iter, sym_flags, event);
4352
4353         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4354
4355         return trace_handle_return(s);
4356 }
4357
4358 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4359 {
4360         struct trace_array *tr = iter->tr;
4361         struct trace_seq *s = &iter->seq;
4362         struct trace_entry *entry;
4363         struct trace_event *event;
4364
4365         entry = iter->ent;
4366
4367         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4368                 trace_seq_printf(s, "%d %d %llu ",
4369                                  entry->pid, iter->cpu, iter->ts);
4370
4371         if (trace_seq_has_overflowed(s))
4372                 return TRACE_TYPE_PARTIAL_LINE;
4373
4374         event = ftrace_find_event(entry->type);
4375         if (event)
4376                 return event->funcs->raw(iter, 0, event);
4377
4378         trace_seq_printf(s, "%d ?\n", entry->type);
4379
4380         return trace_handle_return(s);
4381 }
4382
4383 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4384 {
4385         struct trace_array *tr = iter->tr;
4386         struct trace_seq *s = &iter->seq;
4387         unsigned char newline = '\n';
4388         struct trace_entry *entry;
4389         struct trace_event *event;
4390
4391         entry = iter->ent;
4392
4393         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4394                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4395                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4396                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4397                 if (trace_seq_has_overflowed(s))
4398                         return TRACE_TYPE_PARTIAL_LINE;
4399         }
4400
4401         event = ftrace_find_event(entry->type);
4402         if (event) {
4403                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4404                 if (ret != TRACE_TYPE_HANDLED)
4405                         return ret;
4406         }
4407
4408         SEQ_PUT_FIELD(s, newline);
4409
4410         return trace_handle_return(s);
4411 }
4412
4413 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4414 {
4415         struct trace_array *tr = iter->tr;
4416         struct trace_seq *s = &iter->seq;
4417         struct trace_entry *entry;
4418         struct trace_event *event;
4419
4420         entry = iter->ent;
4421
4422         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4423                 SEQ_PUT_FIELD(s, entry->pid);
4424                 SEQ_PUT_FIELD(s, iter->cpu);
4425                 SEQ_PUT_FIELD(s, iter->ts);
4426                 if (trace_seq_has_overflowed(s))
4427                         return TRACE_TYPE_PARTIAL_LINE;
4428         }
4429
4430         event = ftrace_find_event(entry->type);
4431         return event ? event->funcs->binary(iter, 0, event) :
4432                 TRACE_TYPE_HANDLED;
4433 }
4434
4435 int trace_empty(struct trace_iterator *iter)
4436 {
4437         struct ring_buffer_iter *buf_iter;
4438         int cpu;
4439
4440         /* If we are looking at one CPU buffer, only check that one */
4441         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4442                 cpu = iter->cpu_file;
4443                 buf_iter = trace_buffer_iter(iter, cpu);
4444                 if (buf_iter) {
4445                         if (!ring_buffer_iter_empty(buf_iter))
4446                                 return 0;
4447                 } else {
4448                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4449                                 return 0;
4450                 }
4451                 return 1;
4452         }
4453
4454         for_each_tracing_cpu(cpu) {
4455                 buf_iter = trace_buffer_iter(iter, cpu);
4456                 if (buf_iter) {
4457                         if (!ring_buffer_iter_empty(buf_iter))
4458                                 return 0;
4459                 } else {
4460                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4461                                 return 0;
4462                 }
4463         }
4464
4465         return 1;
4466 }
4467
4468 /*  Called with trace_event_read_lock() held. */
4469 enum print_line_t print_trace_line(struct trace_iterator *iter)
4470 {
4471         struct trace_array *tr = iter->tr;
4472         unsigned long trace_flags = tr->trace_flags;
4473         enum print_line_t ret;
4474
4475         if (iter->lost_events) {
4476                 if (iter->lost_events == (unsigned long)-1)
4477                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4478                                          iter->cpu);
4479                 else
4480                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4481                                          iter->cpu, iter->lost_events);
4482                 if (trace_seq_has_overflowed(&iter->seq))
4483                         return TRACE_TYPE_PARTIAL_LINE;
4484         }
4485
4486         if (iter->trace && iter->trace->print_line) {
4487                 ret = iter->trace->print_line(iter);
4488                 if (ret != TRACE_TYPE_UNHANDLED)
4489                         return ret;
4490         }
4491
4492         if (iter->ent->type == TRACE_BPUTS &&
4493                         trace_flags & TRACE_ITER_PRINTK &&
4494                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495                 return trace_print_bputs_msg_only(iter);
4496
4497         if (iter->ent->type == TRACE_BPRINT &&
4498                         trace_flags & TRACE_ITER_PRINTK &&
4499                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4500                 return trace_print_bprintk_msg_only(iter);
4501
4502         if (iter->ent->type == TRACE_PRINT &&
4503                         trace_flags & TRACE_ITER_PRINTK &&
4504                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4505                 return trace_print_printk_msg_only(iter);
4506
4507         if (trace_flags & TRACE_ITER_BIN)
4508                 return print_bin_fmt(iter);
4509
4510         if (trace_flags & TRACE_ITER_HEX)
4511                 return print_hex_fmt(iter);
4512
4513         if (trace_flags & TRACE_ITER_RAW)
4514                 return print_raw_fmt(iter);
4515
4516         return print_trace_fmt(iter);
4517 }
4518
4519 void trace_latency_header(struct seq_file *m)
4520 {
4521         struct trace_iterator *iter = m->private;
4522         struct trace_array *tr = iter->tr;
4523
4524         /* print nothing if the buffers are empty */
4525         if (trace_empty(iter))
4526                 return;
4527
4528         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4529                 print_trace_header(m, iter);
4530
4531         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4532                 print_lat_help_header(m);
4533 }
4534
4535 void trace_default_header(struct seq_file *m)
4536 {
4537         struct trace_iterator *iter = m->private;
4538         struct trace_array *tr = iter->tr;
4539         unsigned long trace_flags = tr->trace_flags;
4540
4541         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4542                 return;
4543
4544         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4545                 /* print nothing if the buffers are empty */
4546                 if (trace_empty(iter))
4547                         return;
4548                 print_trace_header(m, iter);
4549                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4550                         print_lat_help_header(m);
4551         } else {
4552                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4553                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4554                                 print_func_help_header_irq(iter->array_buffer,
4555                                                            m, trace_flags);
4556                         else
4557                                 print_func_help_header(iter->array_buffer, m,
4558                                                        trace_flags);
4559                 }
4560         }
4561 }
4562
4563 static void test_ftrace_alive(struct seq_file *m)
4564 {
4565         if (!ftrace_is_dead())
4566                 return;
4567         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4568                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4569 }
4570
4571 #ifdef CONFIG_TRACER_MAX_TRACE
4572 static void show_snapshot_main_help(struct seq_file *m)
4573 {
4574         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4575                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4576                     "#                      Takes a snapshot of the main buffer.\n"
4577                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4578                     "#                      (Doesn't have to be '2' works with any number that\n"
4579                     "#                       is not a '0' or '1')\n");
4580 }
4581
4582 static void show_snapshot_percpu_help(struct seq_file *m)
4583 {
4584         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4585 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4586         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4587                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4588 #else
4589         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4590                     "#                     Must use main snapshot file to allocate.\n");
4591 #endif
4592         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4593                     "#                      (Doesn't have to be '2' works with any number that\n"
4594                     "#                       is not a '0' or '1')\n");
4595 }
4596
4597 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4598 {
4599         if (iter->tr->allocated_snapshot)
4600                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4601         else
4602                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4603
4604         seq_puts(m, "# Snapshot commands:\n");
4605         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4606                 show_snapshot_main_help(m);
4607         else
4608                 show_snapshot_percpu_help(m);
4609 }
4610 #else
4611 /* Should never be called */
4612 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4613 #endif
4614
4615 static int s_show(struct seq_file *m, void *v)
4616 {
4617         struct trace_iterator *iter = v;
4618         int ret;
4619
4620         if (iter->ent == NULL) {
4621                 if (iter->tr) {
4622                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4623                         seq_puts(m, "#\n");
4624                         test_ftrace_alive(m);
4625                 }
4626                 if (iter->snapshot && trace_empty(iter))
4627                         print_snapshot_help(m, iter);
4628                 else if (iter->trace && iter->trace->print_header)
4629                         iter->trace->print_header(m);
4630                 else
4631                         trace_default_header(m);
4632
4633         } else if (iter->leftover) {
4634                 /*
4635                  * If we filled the seq_file buffer earlier, we
4636                  * want to just show it now.
4637                  */
4638                 ret = trace_print_seq(m, &iter->seq);
4639
4640                 /* ret should this time be zero, but you never know */
4641                 iter->leftover = ret;
4642
4643         } else {
4644                 print_trace_line(iter);
4645                 ret = trace_print_seq(m, &iter->seq);
4646                 /*
4647                  * If we overflow the seq_file buffer, then it will
4648                  * ask us for this data again at start up.
4649                  * Use that instead.
4650                  *  ret is 0 if seq_file write succeeded.
4651                  *        -1 otherwise.
4652                  */
4653                 iter->leftover = ret;
4654         }
4655
4656         return 0;
4657 }
4658
4659 /*
4660  * Should be used after trace_array_get(), trace_types_lock
4661  * ensures that i_cdev was already initialized.
4662  */
4663 static inline int tracing_get_cpu(struct inode *inode)
4664 {
4665         if (inode->i_cdev) /* See trace_create_cpu_file() */
4666                 return (long)inode->i_cdev - 1;
4667         return RING_BUFFER_ALL_CPUS;
4668 }
4669
4670 static const struct seq_operations tracer_seq_ops = {
4671         .start          = s_start,
4672         .next           = s_next,
4673         .stop           = s_stop,
4674         .show           = s_show,
4675 };
4676
4677 static struct trace_iterator *
4678 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4679 {
4680         struct trace_array *tr = inode->i_private;
4681         struct trace_iterator *iter;
4682         int cpu;
4683
4684         if (tracing_disabled)
4685                 return ERR_PTR(-ENODEV);
4686
4687         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4688         if (!iter)
4689                 return ERR_PTR(-ENOMEM);
4690
4691         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4692                                     GFP_KERNEL);
4693         if (!iter->buffer_iter)
4694                 goto release;
4695
4696         /*
4697          * trace_find_next_entry() may need to save off iter->ent.
4698          * It will place it into the iter->temp buffer. As most
4699          * events are less than 128, allocate a buffer of that size.
4700          * If one is greater, then trace_find_next_entry() will
4701          * allocate a new buffer to adjust for the bigger iter->ent.
4702          * It's not critical if it fails to get allocated here.
4703          */
4704         iter->temp = kmalloc(128, GFP_KERNEL);
4705         if (iter->temp)
4706                 iter->temp_size = 128;
4707
4708         /*
4709          * trace_event_printf() may need to modify given format
4710          * string to replace %p with %px so that it shows real address
4711          * instead of hash value. However, that is only for the event
4712          * tracing, other tracer may not need. Defer the allocation
4713          * until it is needed.
4714          */
4715         iter->fmt = NULL;
4716         iter->fmt_size = 0;
4717
4718         /*
4719          * We make a copy of the current tracer to avoid concurrent
4720          * changes on it while we are reading.
4721          */
4722         mutex_lock(&trace_types_lock);
4723         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4724         if (!iter->trace)
4725                 goto fail;
4726
4727         *iter->trace = *tr->current_trace;
4728
4729         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4730                 goto fail;
4731
4732         iter->tr = tr;
4733
4734 #ifdef CONFIG_TRACER_MAX_TRACE
4735         /* Currently only the top directory has a snapshot */
4736         if (tr->current_trace->print_max || snapshot)
4737                 iter->array_buffer = &tr->max_buffer;
4738         else
4739 #endif
4740                 iter->array_buffer = &tr->array_buffer;
4741         iter->snapshot = snapshot;
4742         iter->pos = -1;
4743         iter->cpu_file = tracing_get_cpu(inode);
4744         mutex_init(&iter->mutex);
4745
4746         /* Notify the tracer early; before we stop tracing. */
4747         if (iter->trace->open)
4748                 iter->trace->open(iter);
4749
4750         /* Annotate start of buffers if we had overruns */
4751         if (ring_buffer_overruns(iter->array_buffer->buffer))
4752                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4753
4754         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4755         if (trace_clocks[tr->clock_id].in_ns)
4756                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4757
4758         /*
4759          * If pause-on-trace is enabled, then stop the trace while
4760          * dumping, unless this is the "snapshot" file
4761          */
4762         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4763                 tracing_stop_tr(tr);
4764
4765         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4766                 for_each_tracing_cpu(cpu) {
4767                         iter->buffer_iter[cpu] =
4768                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4769                                                          cpu, GFP_KERNEL);
4770                 }
4771                 ring_buffer_read_prepare_sync();
4772                 for_each_tracing_cpu(cpu) {
4773                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4774                         tracing_iter_reset(iter, cpu);
4775                 }
4776         } else {
4777                 cpu = iter->cpu_file;
4778                 iter->buffer_iter[cpu] =
4779                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4780                                                  cpu, GFP_KERNEL);
4781                 ring_buffer_read_prepare_sync();
4782                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4783                 tracing_iter_reset(iter, cpu);
4784         }
4785
4786         mutex_unlock(&trace_types_lock);
4787
4788         return iter;
4789
4790  fail:
4791         mutex_unlock(&trace_types_lock);
4792         kfree(iter->trace);
4793         kfree(iter->temp);
4794         kfree(iter->buffer_iter);
4795 release:
4796         seq_release_private(inode, file);
4797         return ERR_PTR(-ENOMEM);
4798 }
4799
4800 int tracing_open_generic(struct inode *inode, struct file *filp)
4801 {
4802         int ret;
4803
4804         ret = tracing_check_open_get_tr(NULL);
4805         if (ret)
4806                 return ret;
4807
4808         filp->private_data = inode->i_private;
4809         return 0;
4810 }
4811
4812 bool tracing_is_disabled(void)
4813 {
4814         return (tracing_disabled) ? true: false;
4815 }
4816
4817 /*
4818  * Open and update trace_array ref count.
4819  * Must have the current trace_array passed to it.
4820  */
4821 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4822 {
4823         struct trace_array *tr = inode->i_private;
4824         int ret;
4825
4826         ret = tracing_check_open_get_tr(tr);
4827         if (ret)
4828                 return ret;
4829
4830         filp->private_data = inode->i_private;
4831
4832         return 0;
4833 }
4834
4835 static int tracing_release(struct inode *inode, struct file *file)
4836 {
4837         struct trace_array *tr = inode->i_private;
4838         struct seq_file *m = file->private_data;
4839         struct trace_iterator *iter;
4840         int cpu;
4841
4842         if (!(file->f_mode & FMODE_READ)) {
4843                 trace_array_put(tr);
4844                 return 0;
4845         }
4846
4847         /* Writes do not use seq_file */
4848         iter = m->private;
4849         mutex_lock(&trace_types_lock);
4850
4851         for_each_tracing_cpu(cpu) {
4852                 if (iter->buffer_iter[cpu])
4853                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4854         }
4855
4856         if (iter->trace && iter->trace->close)
4857                 iter->trace->close(iter);
4858
4859         if (!iter->snapshot && tr->stop_count)
4860                 /* reenable tracing if it was previously enabled */
4861                 tracing_start_tr(tr);
4862
4863         __trace_array_put(tr);
4864
4865         mutex_unlock(&trace_types_lock);
4866
4867         mutex_destroy(&iter->mutex);
4868         free_cpumask_var(iter->started);
4869         kfree(iter->fmt);
4870         kfree(iter->temp);
4871         kfree(iter->trace);
4872         kfree(iter->buffer_iter);
4873         seq_release_private(inode, file);
4874
4875         return 0;
4876 }
4877
4878 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4879 {
4880         struct trace_array *tr = inode->i_private;
4881
4882         trace_array_put(tr);
4883         return 0;
4884 }
4885
4886 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4887 {
4888         struct trace_array *tr = inode->i_private;
4889
4890         trace_array_put(tr);
4891
4892         return single_release(inode, file);
4893 }
4894
4895 static int tracing_open(struct inode *inode, struct file *file)
4896 {
4897         struct trace_array *tr = inode->i_private;
4898         struct trace_iterator *iter;
4899         int ret;
4900
4901         ret = tracing_check_open_get_tr(tr);
4902         if (ret)
4903                 return ret;
4904
4905         /* If this file was open for write, then erase contents */
4906         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4907                 int cpu = tracing_get_cpu(inode);
4908                 struct array_buffer *trace_buf = &tr->array_buffer;
4909
4910 #ifdef CONFIG_TRACER_MAX_TRACE
4911                 if (tr->current_trace->print_max)
4912                         trace_buf = &tr->max_buffer;
4913 #endif
4914
4915                 if (cpu == RING_BUFFER_ALL_CPUS)
4916                         tracing_reset_online_cpus(trace_buf);
4917                 else
4918                         tracing_reset_cpu(trace_buf, cpu);
4919         }
4920
4921         if (file->f_mode & FMODE_READ) {
4922                 iter = __tracing_open(inode, file, false);
4923                 if (IS_ERR(iter))
4924                         ret = PTR_ERR(iter);
4925                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4926                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4927         }
4928
4929         if (ret < 0)
4930                 trace_array_put(tr);
4931
4932         return ret;
4933 }
4934
4935 /*
4936  * Some tracers are not suitable for instance buffers.
4937  * A tracer is always available for the global array (toplevel)
4938  * or if it explicitly states that it is.
4939  */
4940 static bool
4941 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4942 {
4943         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4944 }
4945
4946 /* Find the next tracer that this trace array may use */
4947 static struct tracer *
4948 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4949 {
4950         while (t && !trace_ok_for_array(t, tr))
4951                 t = t->next;
4952
4953         return t;
4954 }
4955
4956 static void *
4957 t_next(struct seq_file *m, void *v, loff_t *pos)
4958 {
4959         struct trace_array *tr = m->private;
4960         struct tracer *t = v;
4961
4962         (*pos)++;
4963
4964         if (t)
4965                 t = get_tracer_for_array(tr, t->next);
4966
4967         return t;
4968 }
4969
4970 static void *t_start(struct seq_file *m, loff_t *pos)
4971 {
4972         struct trace_array *tr = m->private;
4973         struct tracer *t;
4974         loff_t l = 0;
4975
4976         mutex_lock(&trace_types_lock);
4977
4978         t = get_tracer_for_array(tr, trace_types);
4979         for (; t && l < *pos; t = t_next(m, t, &l))
4980                         ;
4981
4982         return t;
4983 }
4984
4985 static void t_stop(struct seq_file *m, void *p)
4986 {
4987         mutex_unlock(&trace_types_lock);
4988 }
4989
4990 static int t_show(struct seq_file *m, void *v)
4991 {
4992         struct tracer *t = v;
4993
4994         if (!t)
4995                 return 0;
4996
4997         seq_puts(m, t->name);
4998         if (t->next)
4999                 seq_putc(m, ' ');
5000         else
5001                 seq_putc(m, '\n');
5002
5003         return 0;
5004 }
5005
5006 static const struct seq_operations show_traces_seq_ops = {
5007         .start          = t_start,
5008         .next           = t_next,
5009         .stop           = t_stop,
5010         .show           = t_show,
5011 };
5012
5013 static int show_traces_open(struct inode *inode, struct file *file)
5014 {
5015         struct trace_array *tr = inode->i_private;
5016         struct seq_file *m;
5017         int ret;
5018
5019         ret = tracing_check_open_get_tr(tr);
5020         if (ret)
5021                 return ret;
5022
5023         ret = seq_open(file, &show_traces_seq_ops);
5024         if (ret) {
5025                 trace_array_put(tr);
5026                 return ret;
5027         }
5028
5029         m = file->private_data;
5030         m->private = tr;
5031
5032         return 0;
5033 }
5034
5035 static int show_traces_release(struct inode *inode, struct file *file)
5036 {
5037         struct trace_array *tr = inode->i_private;
5038
5039         trace_array_put(tr);
5040         return seq_release(inode, file);
5041 }
5042
5043 static ssize_t
5044 tracing_write_stub(struct file *filp, const char __user *ubuf,
5045                    size_t count, loff_t *ppos)
5046 {
5047         return count;
5048 }
5049
5050 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5051 {
5052         int ret;
5053
5054         if (file->f_mode & FMODE_READ)
5055                 ret = seq_lseek(file, offset, whence);
5056         else
5057                 file->f_pos = ret = 0;
5058
5059         return ret;
5060 }
5061
5062 static const struct file_operations tracing_fops = {
5063         .open           = tracing_open,
5064         .read           = seq_read,
5065         .write          = tracing_write_stub,
5066         .llseek         = tracing_lseek,
5067         .release        = tracing_release,
5068 };
5069
5070 static const struct file_operations show_traces_fops = {
5071         .open           = show_traces_open,
5072         .read           = seq_read,
5073         .llseek         = seq_lseek,
5074         .release        = show_traces_release,
5075 };
5076
5077 static ssize_t
5078 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5079                      size_t count, loff_t *ppos)
5080 {
5081         struct trace_array *tr = file_inode(filp)->i_private;
5082         char *mask_str;
5083         int len;
5084
5085         len = snprintf(NULL, 0, "%*pb\n",
5086                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5087         mask_str = kmalloc(len, GFP_KERNEL);
5088         if (!mask_str)
5089                 return -ENOMEM;
5090
5091         len = snprintf(mask_str, len, "%*pb\n",
5092                        cpumask_pr_args(tr->tracing_cpumask));
5093         if (len >= count) {
5094                 count = -EINVAL;
5095                 goto out_err;
5096         }
5097         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5098
5099 out_err:
5100         kfree(mask_str);
5101
5102         return count;
5103 }
5104
5105 int tracing_set_cpumask(struct trace_array *tr,
5106                         cpumask_var_t tracing_cpumask_new)
5107 {
5108         int cpu;
5109
5110         if (!tr)
5111                 return -EINVAL;
5112
5113         local_irq_disable();
5114         arch_spin_lock(&tr->max_lock);
5115         for_each_tracing_cpu(cpu) {
5116                 /*
5117                  * Increase/decrease the disabled counter if we are
5118                  * about to flip a bit in the cpumask:
5119                  */
5120                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5121                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5122                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5123                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5124                 }
5125                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5126                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5127                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5128                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5129                 }
5130         }
5131         arch_spin_unlock(&tr->max_lock);
5132         local_irq_enable();
5133
5134         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5135
5136         return 0;
5137 }
5138
5139 static ssize_t
5140 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5141                       size_t count, loff_t *ppos)
5142 {
5143         struct trace_array *tr = file_inode(filp)->i_private;
5144         cpumask_var_t tracing_cpumask_new;
5145         int err;
5146
5147         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5148                 return -ENOMEM;
5149
5150         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5151         if (err)
5152                 goto err_free;
5153
5154         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5155         if (err)
5156                 goto err_free;
5157
5158         free_cpumask_var(tracing_cpumask_new);
5159
5160         return count;
5161
5162 err_free:
5163         free_cpumask_var(tracing_cpumask_new);
5164
5165         return err;
5166 }
5167
5168 static const struct file_operations tracing_cpumask_fops = {
5169         .open           = tracing_open_generic_tr,
5170         .read           = tracing_cpumask_read,
5171         .write          = tracing_cpumask_write,
5172         .release        = tracing_release_generic_tr,
5173         .llseek         = generic_file_llseek,
5174 };
5175
5176 static int tracing_trace_options_show(struct seq_file *m, void *v)
5177 {
5178         struct tracer_opt *trace_opts;
5179         struct trace_array *tr = m->private;
5180         u32 tracer_flags;
5181         int i;
5182
5183         mutex_lock(&trace_types_lock);
5184         tracer_flags = tr->current_trace->flags->val;
5185         trace_opts = tr->current_trace->flags->opts;
5186
5187         for (i = 0; trace_options[i]; i++) {
5188                 if (tr->trace_flags & (1 << i))
5189                         seq_printf(m, "%s\n", trace_options[i]);
5190                 else
5191                         seq_printf(m, "no%s\n", trace_options[i]);
5192         }
5193
5194         for (i = 0; trace_opts[i].name; i++) {
5195                 if (tracer_flags & trace_opts[i].bit)
5196                         seq_printf(m, "%s\n", trace_opts[i].name);
5197                 else
5198                         seq_printf(m, "no%s\n", trace_opts[i].name);
5199         }
5200         mutex_unlock(&trace_types_lock);
5201
5202         return 0;
5203 }
5204
5205 static int __set_tracer_option(struct trace_array *tr,
5206                                struct tracer_flags *tracer_flags,
5207                                struct tracer_opt *opts, int neg)
5208 {
5209         struct tracer *trace = tracer_flags->trace;
5210         int ret;
5211
5212         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5213         if (ret)
5214                 return ret;
5215
5216         if (neg)
5217                 tracer_flags->val &= ~opts->bit;
5218         else
5219                 tracer_flags->val |= opts->bit;
5220         return 0;
5221 }
5222
5223 /* Try to assign a tracer specific option */
5224 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5225 {
5226         struct tracer *trace = tr->current_trace;
5227         struct tracer_flags *tracer_flags = trace->flags;
5228         struct tracer_opt *opts = NULL;
5229         int i;
5230
5231         for (i = 0; tracer_flags->opts[i].name; i++) {
5232                 opts = &tracer_flags->opts[i];
5233
5234                 if (strcmp(cmp, opts->name) == 0)
5235                         return __set_tracer_option(tr, trace->flags, opts, neg);
5236         }
5237
5238         return -EINVAL;
5239 }
5240
5241 /* Some tracers require overwrite to stay enabled */
5242 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5243 {
5244         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5245                 return -1;
5246
5247         return 0;
5248 }
5249
5250 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5251 {
5252         int *map;
5253
5254         if ((mask == TRACE_ITER_RECORD_TGID) ||
5255             (mask == TRACE_ITER_RECORD_CMD))
5256                 lockdep_assert_held(&event_mutex);
5257
5258         /* do nothing if flag is already set */
5259         if (!!(tr->trace_flags & mask) == !!enabled)
5260                 return 0;
5261
5262         /* Give the tracer a chance to approve the change */
5263         if (tr->current_trace->flag_changed)
5264                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5265                         return -EINVAL;
5266
5267         if (enabled)
5268                 tr->trace_flags |= mask;
5269         else
5270                 tr->trace_flags &= ~mask;
5271
5272         if (mask == TRACE_ITER_RECORD_CMD)
5273                 trace_event_enable_cmd_record(enabled);
5274
5275         if (mask == TRACE_ITER_RECORD_TGID) {
5276                 if (!tgid_map) {
5277                         tgid_map_max = pid_max;
5278                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5279                                        GFP_KERNEL);
5280
5281                         /*
5282                          * Pairs with smp_load_acquire() in
5283                          * trace_find_tgid_ptr() to ensure that if it observes
5284                          * the tgid_map we just allocated then it also observes
5285                          * the corresponding tgid_map_max value.
5286                          */
5287                         smp_store_release(&tgid_map, map);
5288                 }
5289                 if (!tgid_map) {
5290                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5291                         return -ENOMEM;
5292                 }
5293
5294                 trace_event_enable_tgid_record(enabled);
5295         }
5296
5297         if (mask == TRACE_ITER_EVENT_FORK)
5298                 trace_event_follow_fork(tr, enabled);
5299
5300         if (mask == TRACE_ITER_FUNC_FORK)
5301                 ftrace_pid_follow_fork(tr, enabled);
5302
5303         if (mask == TRACE_ITER_OVERWRITE) {
5304                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5307 #endif
5308         }
5309
5310         if (mask == TRACE_ITER_PRINTK) {
5311                 trace_printk_start_stop_comm(enabled);
5312                 trace_printk_control(enabled);
5313         }
5314
5315         return 0;
5316 }
5317
5318 int trace_set_options(struct trace_array *tr, char *option)
5319 {
5320         char *cmp;
5321         int neg = 0;
5322         int ret;
5323         size_t orig_len = strlen(option);
5324         int len;
5325
5326         cmp = strstrip(option);
5327
5328         len = str_has_prefix(cmp, "no");
5329         if (len)
5330                 neg = 1;
5331
5332         cmp += len;
5333
5334         mutex_lock(&event_mutex);
5335         mutex_lock(&trace_types_lock);
5336
5337         ret = match_string(trace_options, -1, cmp);
5338         /* If no option could be set, test the specific tracer options */
5339         if (ret < 0)
5340                 ret = set_tracer_option(tr, cmp, neg);
5341         else
5342                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5343
5344         mutex_unlock(&trace_types_lock);
5345         mutex_unlock(&event_mutex);
5346
5347         /*
5348          * If the first trailing whitespace is replaced with '\0' by strstrip,
5349          * turn it back into a space.
5350          */
5351         if (orig_len > strlen(option))
5352                 option[strlen(option)] = ' ';
5353
5354         return ret;
5355 }
5356
5357 static void __init apply_trace_boot_options(void)
5358 {
5359         char *buf = trace_boot_options_buf;
5360         char *option;
5361
5362         while (true) {
5363                 option = strsep(&buf, ",");
5364
5365                 if (!option)
5366                         break;
5367
5368                 if (*option)
5369                         trace_set_options(&global_trace, option);
5370
5371                 /* Put back the comma to allow this to be called again */
5372                 if (buf)
5373                         *(buf - 1) = ',';
5374         }
5375 }
5376
5377 static ssize_t
5378 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5379                         size_t cnt, loff_t *ppos)
5380 {
5381         struct seq_file *m = filp->private_data;
5382         struct trace_array *tr = m->private;
5383         char buf[64];
5384         int ret;
5385
5386         if (cnt >= sizeof(buf))
5387                 return -EINVAL;
5388
5389         if (copy_from_user(buf, ubuf, cnt))
5390                 return -EFAULT;
5391
5392         buf[cnt] = 0;
5393
5394         ret = trace_set_options(tr, buf);
5395         if (ret < 0)
5396                 return ret;
5397
5398         *ppos += cnt;
5399
5400         return cnt;
5401 }
5402
5403 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5404 {
5405         struct trace_array *tr = inode->i_private;
5406         int ret;
5407
5408         ret = tracing_check_open_get_tr(tr);
5409         if (ret)
5410                 return ret;
5411
5412         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5413         if (ret < 0)
5414                 trace_array_put(tr);
5415
5416         return ret;
5417 }
5418
5419 static const struct file_operations tracing_iter_fops = {
5420         .open           = tracing_trace_options_open,
5421         .read           = seq_read,
5422         .llseek         = seq_lseek,
5423         .release        = tracing_single_release_tr,
5424         .write          = tracing_trace_options_write,
5425 };
5426
5427 static const char readme_msg[] =
5428         "tracing mini-HOWTO:\n\n"
5429         "# echo 0 > tracing_on : quick way to disable tracing\n"
5430         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5431         " Important files:\n"
5432         "  trace\t\t\t- The static contents of the buffer\n"
5433         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5434         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5435         "  current_tracer\t- function and latency tracers\n"
5436         "  available_tracers\t- list of configured tracers for current_tracer\n"
5437         "  error_log\t- error log for failed commands (that support it)\n"
5438         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5439         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5440         "  trace_clock\t\t-change the clock used to order events\n"
5441         "       local:   Per cpu clock but may not be synced across CPUs\n"
5442         "      global:   Synced across CPUs but slows tracing down.\n"
5443         "     counter:   Not a clock, but just an increment\n"
5444         "      uptime:   Jiffy counter from time of boot\n"
5445         "        perf:   Same clock that perf events use\n"
5446 #ifdef CONFIG_X86_64
5447         "     x86-tsc:   TSC cycle counter\n"
5448 #endif
5449         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5450         "       delta:   Delta difference against a buffer-wide timestamp\n"
5451         "    absolute:   Absolute (standalone) timestamp\n"
5452         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5453         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5454         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5455         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5456         "\t\t\t  Remove sub-buffer with rmdir\n"
5457         "  trace_options\t\t- Set format or modify how tracing happens\n"
5458         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5459         "\t\t\t  option name\n"
5460         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5461 #ifdef CONFIG_DYNAMIC_FTRACE
5462         "\n  available_filter_functions - list of functions that can be filtered on\n"
5463         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5464         "\t\t\t  functions\n"
5465         "\t     accepts: func_full_name or glob-matching-pattern\n"
5466         "\t     modules: Can select a group via module\n"
5467         "\t      Format: :mod:<module-name>\n"
5468         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5469         "\t    triggers: a command to perform when function is hit\n"
5470         "\t      Format: <function>:<trigger>[:count]\n"
5471         "\t     trigger: traceon, traceoff\n"
5472         "\t\t      enable_event:<system>:<event>\n"
5473         "\t\t      disable_event:<system>:<event>\n"
5474 #ifdef CONFIG_STACKTRACE
5475         "\t\t      stacktrace\n"
5476 #endif
5477 #ifdef CONFIG_TRACER_SNAPSHOT
5478         "\t\t      snapshot\n"
5479 #endif
5480         "\t\t      dump\n"
5481         "\t\t      cpudump\n"
5482         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5483         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5484         "\t     The first one will disable tracing every time do_fault is hit\n"
5485         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5486         "\t       The first time do trap is hit and it disables tracing, the\n"
5487         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5488         "\t       the counter will not decrement. It only decrements when the\n"
5489         "\t       trigger did work\n"
5490         "\t     To remove trigger without count:\n"
5491         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5492         "\t     To remove trigger with a count:\n"
5493         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5494         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5495         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5496         "\t    modules: Can select a group via module command :mod:\n"
5497         "\t    Does not accept triggers\n"
5498 #endif /* CONFIG_DYNAMIC_FTRACE */
5499 #ifdef CONFIG_FUNCTION_TRACER
5500         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5501         "\t\t    (function)\n"
5502         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5503         "\t\t    (function)\n"
5504 #endif
5505 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5506         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5507         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5508         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5512         "\t\t\t  snapshot buffer. Read the contents for more\n"
5513         "\t\t\t  information\n"
5514 #endif
5515 #ifdef CONFIG_STACK_TRACER
5516         "  stack_trace\t\t- Shows the max stack trace when active\n"
5517         "  stack_max_size\t- Shows current max stack size that was traced\n"
5518         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5519         "\t\t\t  new trace)\n"
5520 #ifdef CONFIG_DYNAMIC_FTRACE
5521         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5522         "\t\t\t  traces\n"
5523 #endif
5524 #endif /* CONFIG_STACK_TRACER */
5525 #ifdef CONFIG_DYNAMIC_EVENTS
5526         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5527         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5528 #endif
5529 #ifdef CONFIG_KPROBE_EVENTS
5530         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5531         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5532 #endif
5533 #ifdef CONFIG_UPROBE_EVENTS
5534         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5535         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5536 #endif
5537 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5538         "\t  accepts: event-definitions (one definition per line)\n"
5539         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5540         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5541 #ifdef CONFIG_HIST_TRIGGERS
5542         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5543 #endif
5544         "\t           -:[<group>/]<event>\n"
5545 #ifdef CONFIG_KPROBE_EVENTS
5546         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5547   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5548 #endif
5549 #ifdef CONFIG_UPROBE_EVENTS
5550   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5551 #endif
5552         "\t     args: <name>=fetcharg[:type]\n"
5553         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5554 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5555         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5556 #else
5557         "\t           $stack<index>, $stack, $retval, $comm,\n"
5558 #endif
5559         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5560         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5561         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5562         "\t           <type>\\[<array-size>\\]\n"
5563 #ifdef CONFIG_HIST_TRIGGERS
5564         "\t    field: <stype> <name>;\n"
5565         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5566         "\t           [unsigned] char/int/long\n"
5567 #endif
5568 #endif
5569         "  events/\t\t- Directory containing all trace event subsystems:\n"
5570         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5571         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5572         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5573         "\t\t\t  events\n"
5574         "      filter\t\t- If set, only events passing filter are traced\n"
5575         "  events/<system>/<event>/\t- Directory containing control files for\n"
5576         "\t\t\t  <event>:\n"
5577         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5578         "      filter\t\t- If set, only events passing filter are traced\n"
5579         "      trigger\t\t- If set, a command to perform when event is hit\n"
5580         "\t    Format: <trigger>[:count][if <filter>]\n"
5581         "\t   trigger: traceon, traceoff\n"
5582         "\t            enable_event:<system>:<event>\n"
5583         "\t            disable_event:<system>:<event>\n"
5584 #ifdef CONFIG_HIST_TRIGGERS
5585         "\t            enable_hist:<system>:<event>\n"
5586         "\t            disable_hist:<system>:<event>\n"
5587 #endif
5588 #ifdef CONFIG_STACKTRACE
5589         "\t\t    stacktrace\n"
5590 #endif
5591 #ifdef CONFIG_TRACER_SNAPSHOT
5592         "\t\t    snapshot\n"
5593 #endif
5594 #ifdef CONFIG_HIST_TRIGGERS
5595         "\t\t    hist (see below)\n"
5596 #endif
5597         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5598         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5599         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5600         "\t                  events/block/block_unplug/trigger\n"
5601         "\t   The first disables tracing every time block_unplug is hit.\n"
5602         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5603         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5604         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5605         "\t   Like function triggers, the counter is only decremented if it\n"
5606         "\t    enabled or disabled tracing.\n"
5607         "\t   To remove a trigger without a count:\n"
5608         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5609         "\t   To remove a trigger with a count:\n"
5610         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5611         "\t   Filters can be ignored when removing a trigger.\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5614         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5615         "\t            [:values=<field1[,field2,...]>]\n"
5616         "\t            [:sort=<field1[,field2,...]>]\n"
5617         "\t            [:size=#entries]\n"
5618         "\t            [:pause][:continue][:clear]\n"
5619         "\t            [:name=histname1]\n"
5620         "\t            [:<handler>.<action>]\n"
5621         "\t            [if <filter>]\n\n"
5622         "\t    When a matching event is hit, an entry is added to a hash\n"
5623         "\t    table using the key(s) and value(s) named, and the value of a\n"
5624         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5625         "\t    correspond to fields in the event's format description.  Keys\n"
5626         "\t    can be any field, or the special string 'stacktrace'.\n"
5627         "\t    Compound keys consisting of up to two fields can be specified\n"
5628         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5629         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5630         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5631         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5632         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5633         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5634         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5635         "\t    its histogram data will be shared with other triggers of the\n"
5636         "\t    same name, and trigger hits will update this common data.\n\n"
5637         "\t    Reading the 'hist' file for the event will dump the hash\n"
5638         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5639         "\t    triggers attached to an event, there will be a table for each\n"
5640         "\t    trigger in the output.  The table displayed for a named\n"
5641         "\t    trigger will be the same as any other instance having the\n"
5642         "\t    same name.  The default format used to display a given field\n"
5643         "\t    can be modified by appending any of the following modifiers\n"
5644         "\t    to the field name, as applicable:\n\n"
5645         "\t            .hex        display a number as a hex value\n"
5646         "\t            .sym        display an address as a symbol\n"
5647         "\t            .sym-offset display an address as a symbol and offset\n"
5648         "\t            .execname   display a common_pid as a program name\n"
5649         "\t            .syscall    display a syscall id as a syscall name\n"
5650         "\t            .log2       display log2 value rather than raw number\n"
5651         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5652         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5653         "\t    trigger or to start a hist trigger but not log any events\n"
5654         "\t    until told to do so.  'continue' can be used to start or\n"
5655         "\t    restart a paused hist trigger.\n\n"
5656         "\t    The 'clear' parameter will clear the contents of a running\n"
5657         "\t    hist trigger and leave its current paused/active state\n"
5658         "\t    unchanged.\n\n"
5659         "\t    The enable_hist and disable_hist triggers can be used to\n"
5660         "\t    have one event conditionally start and stop another event's\n"
5661         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5662         "\t    the enable_event and disable_event triggers.\n\n"
5663         "\t    Hist trigger handlers and actions are executed whenever a\n"
5664         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5665         "\t        <handler>.<action>\n\n"
5666         "\t    The available handlers are:\n\n"
5667         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5668         "\t        onmax(var)               - invoke if var exceeds current max\n"
5669         "\t        onchange(var)            - invoke action if var changes\n\n"
5670         "\t    The available actions are:\n\n"
5671         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5672         "\t        save(field,...)                      - save current event fields\n"
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5674         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5675 #endif
5676 #ifdef CONFIG_SYNTH_EVENTS
5677         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5678         "\t  Write into this file to define/undefine new synthetic events.\n"
5679         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5680 #endif
5681 #endif
5682 ;
5683
5684 static ssize_t
5685 tracing_readme_read(struct file *filp, char __user *ubuf,
5686                        size_t cnt, loff_t *ppos)
5687 {
5688         return simple_read_from_buffer(ubuf, cnt, ppos,
5689                                         readme_msg, strlen(readme_msg));
5690 }
5691
5692 static const struct file_operations tracing_readme_fops = {
5693         .open           = tracing_open_generic,
5694         .read           = tracing_readme_read,
5695         .llseek         = generic_file_llseek,
5696 };
5697
5698 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5699 {
5700         int pid = ++(*pos);
5701
5702         return trace_find_tgid_ptr(pid);
5703 }
5704
5705 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5706 {
5707         int pid = *pos;
5708
5709         return trace_find_tgid_ptr(pid);
5710 }
5711
5712 static void saved_tgids_stop(struct seq_file *m, void *v)
5713 {
5714 }
5715
5716 static int saved_tgids_show(struct seq_file *m, void *v)
5717 {
5718         int *entry = (int *)v;
5719         int pid = entry - tgid_map;
5720         int tgid = *entry;
5721
5722         if (tgid == 0)
5723                 return SEQ_SKIP;
5724
5725         seq_printf(m, "%d %d\n", pid, tgid);
5726         return 0;
5727 }
5728
5729 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5730         .start          = saved_tgids_start,
5731         .stop           = saved_tgids_stop,
5732         .next           = saved_tgids_next,
5733         .show           = saved_tgids_show,
5734 };
5735
5736 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5737 {
5738         int ret;
5739
5740         ret = tracing_check_open_get_tr(NULL);
5741         if (ret)
5742                 return ret;
5743
5744         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5745 }
5746
5747
5748 static const struct file_operations tracing_saved_tgids_fops = {
5749         .open           = tracing_saved_tgids_open,
5750         .read           = seq_read,
5751         .llseek         = seq_lseek,
5752         .release        = seq_release,
5753 };
5754
5755 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5756 {
5757         unsigned int *ptr = v;
5758
5759         if (*pos || m->count)
5760                 ptr++;
5761
5762         (*pos)++;
5763
5764         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5765              ptr++) {
5766                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5767                         continue;
5768
5769                 return ptr;
5770         }
5771
5772         return NULL;
5773 }
5774
5775 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5776 {
5777         void *v;
5778         loff_t l = 0;
5779
5780         preempt_disable();
5781         arch_spin_lock(&trace_cmdline_lock);
5782
5783         v = &savedcmd->map_cmdline_to_pid[0];
5784         while (l <= *pos) {
5785                 v = saved_cmdlines_next(m, v, &l);
5786                 if (!v)
5787                         return NULL;
5788         }
5789
5790         return v;
5791 }
5792
5793 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5794 {
5795         arch_spin_unlock(&trace_cmdline_lock);
5796         preempt_enable();
5797 }
5798
5799 static int saved_cmdlines_show(struct seq_file *m, void *v)
5800 {
5801         char buf[TASK_COMM_LEN];
5802         unsigned int *pid = v;
5803
5804         __trace_find_cmdline(*pid, buf);
5805         seq_printf(m, "%d %s\n", *pid, buf);
5806         return 0;
5807 }
5808
5809 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5810         .start          = saved_cmdlines_start,
5811         .next           = saved_cmdlines_next,
5812         .stop           = saved_cmdlines_stop,
5813         .show           = saved_cmdlines_show,
5814 };
5815
5816 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5817 {
5818         int ret;
5819
5820         ret = tracing_check_open_get_tr(NULL);
5821         if (ret)
5822                 return ret;
5823
5824         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5825 }
5826
5827 static const struct file_operations tracing_saved_cmdlines_fops = {
5828         .open           = tracing_saved_cmdlines_open,
5829         .read           = seq_read,
5830         .llseek         = seq_lseek,
5831         .release        = seq_release,
5832 };
5833
5834 static ssize_t
5835 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5836                                  size_t cnt, loff_t *ppos)
5837 {
5838         char buf[64];
5839         int r;
5840
5841         arch_spin_lock(&trace_cmdline_lock);
5842         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5843         arch_spin_unlock(&trace_cmdline_lock);
5844
5845         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5846 }
5847
5848 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5849 {
5850         kfree(s->saved_cmdlines);
5851         kfree(s->map_cmdline_to_pid);
5852         kfree(s);
5853 }
5854
5855 static int tracing_resize_saved_cmdlines(unsigned int val)
5856 {
5857         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5858
5859         s = kmalloc(sizeof(*s), GFP_KERNEL);
5860         if (!s)
5861                 return -ENOMEM;
5862
5863         if (allocate_cmdlines_buffer(val, s) < 0) {
5864                 kfree(s);
5865                 return -ENOMEM;
5866         }
5867
5868         arch_spin_lock(&trace_cmdline_lock);
5869         savedcmd_temp = savedcmd;
5870         savedcmd = s;
5871         arch_spin_unlock(&trace_cmdline_lock);
5872         free_saved_cmdlines_buffer(savedcmd_temp);
5873
5874         return 0;
5875 }
5876
5877 static ssize_t
5878 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5879                                   size_t cnt, loff_t *ppos)
5880 {
5881         unsigned long val;
5882         int ret;
5883
5884         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5885         if (ret)
5886                 return ret;
5887
5888         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5889         if (!val || val > PID_MAX_DEFAULT)
5890                 return -EINVAL;
5891
5892         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5893         if (ret < 0)
5894                 return ret;
5895
5896         *ppos += cnt;
5897
5898         return cnt;
5899 }
5900
5901 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5902         .open           = tracing_open_generic,
5903         .read           = tracing_saved_cmdlines_size_read,
5904         .write          = tracing_saved_cmdlines_size_write,
5905 };
5906
5907 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5908 static union trace_eval_map_item *
5909 update_eval_map(union trace_eval_map_item *ptr)
5910 {
5911         if (!ptr->map.eval_string) {
5912                 if (ptr->tail.next) {
5913                         ptr = ptr->tail.next;
5914                         /* Set ptr to the next real item (skip head) */
5915                         ptr++;
5916                 } else
5917                         return NULL;
5918         }
5919         return ptr;
5920 }
5921
5922 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5923 {
5924         union trace_eval_map_item *ptr = v;
5925
5926         /*
5927          * Paranoid! If ptr points to end, we don't want to increment past it.
5928          * This really should never happen.
5929          */
5930         (*pos)++;
5931         ptr = update_eval_map(ptr);
5932         if (WARN_ON_ONCE(!ptr))
5933                 return NULL;
5934
5935         ptr++;
5936         ptr = update_eval_map(ptr);
5937
5938         return ptr;
5939 }
5940
5941 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5942 {
5943         union trace_eval_map_item *v;
5944         loff_t l = 0;
5945
5946         mutex_lock(&trace_eval_mutex);
5947
5948         v = trace_eval_maps;
5949         if (v)
5950                 v++;
5951
5952         while (v && l < *pos) {
5953                 v = eval_map_next(m, v, &l);
5954         }
5955
5956         return v;
5957 }
5958
5959 static void eval_map_stop(struct seq_file *m, void *v)
5960 {
5961         mutex_unlock(&trace_eval_mutex);
5962 }
5963
5964 static int eval_map_show(struct seq_file *m, void *v)
5965 {
5966         union trace_eval_map_item *ptr = v;
5967
5968         seq_printf(m, "%s %ld (%s)\n",
5969                    ptr->map.eval_string, ptr->map.eval_value,
5970                    ptr->map.system);
5971
5972         return 0;
5973 }
5974
5975 static const struct seq_operations tracing_eval_map_seq_ops = {
5976         .start          = eval_map_start,
5977         .next           = eval_map_next,
5978         .stop           = eval_map_stop,
5979         .show           = eval_map_show,
5980 };
5981
5982 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5983 {
5984         int ret;
5985
5986         ret = tracing_check_open_get_tr(NULL);
5987         if (ret)
5988                 return ret;
5989
5990         return seq_open(filp, &tracing_eval_map_seq_ops);
5991 }
5992
5993 static const struct file_operations tracing_eval_map_fops = {
5994         .open           = tracing_eval_map_open,
5995         .read           = seq_read,
5996         .llseek         = seq_lseek,
5997         .release        = seq_release,
5998 };
5999
6000 static inline union trace_eval_map_item *
6001 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6002 {
6003         /* Return tail of array given the head */
6004         return ptr + ptr->head.length + 1;
6005 }
6006
6007 static void
6008 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6009                            int len)
6010 {
6011         struct trace_eval_map **stop;
6012         struct trace_eval_map **map;
6013         union trace_eval_map_item *map_array;
6014         union trace_eval_map_item *ptr;
6015
6016         stop = start + len;
6017
6018         /*
6019          * The trace_eval_maps contains the map plus a head and tail item,
6020          * where the head holds the module and length of array, and the
6021          * tail holds a pointer to the next list.
6022          */
6023         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6024         if (!map_array) {
6025                 pr_warn("Unable to allocate trace eval mapping\n");
6026                 return;
6027         }
6028
6029         mutex_lock(&trace_eval_mutex);
6030
6031         if (!trace_eval_maps)
6032                 trace_eval_maps = map_array;
6033         else {
6034                 ptr = trace_eval_maps;
6035                 for (;;) {
6036                         ptr = trace_eval_jmp_to_tail(ptr);
6037                         if (!ptr->tail.next)
6038                                 break;
6039                         ptr = ptr->tail.next;
6040
6041                 }
6042                 ptr->tail.next = map_array;
6043         }
6044         map_array->head.mod = mod;
6045         map_array->head.length = len;
6046         map_array++;
6047
6048         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6049                 map_array->map = **map;
6050                 map_array++;
6051         }
6052         memset(map_array, 0, sizeof(*map_array));
6053
6054         mutex_unlock(&trace_eval_mutex);
6055 }
6056
6057 static void trace_create_eval_file(struct dentry *d_tracer)
6058 {
6059         trace_create_file("eval_map", 0444, d_tracer,
6060                           NULL, &tracing_eval_map_fops);
6061 }
6062
6063 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6064 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6065 static inline void trace_insert_eval_map_file(struct module *mod,
6066                               struct trace_eval_map **start, int len) { }
6067 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6068
6069 static void trace_insert_eval_map(struct module *mod,
6070                                   struct trace_eval_map **start, int len)
6071 {
6072         struct trace_eval_map **map;
6073
6074         if (len <= 0)
6075                 return;
6076
6077         map = start;
6078
6079         trace_event_eval_update(map, len);
6080
6081         trace_insert_eval_map_file(mod, start, len);
6082 }
6083
6084 static ssize_t
6085 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6086                        size_t cnt, loff_t *ppos)
6087 {
6088         struct trace_array *tr = filp->private_data;
6089         char buf[MAX_TRACER_SIZE+2];
6090         int r;
6091
6092         mutex_lock(&trace_types_lock);
6093         r = sprintf(buf, "%s\n", tr->current_trace->name);
6094         mutex_unlock(&trace_types_lock);
6095
6096         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6097 }
6098
6099 int tracer_init(struct tracer *t, struct trace_array *tr)
6100 {
6101         tracing_reset_online_cpus(&tr->array_buffer);
6102         return t->init(tr);
6103 }
6104
6105 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6106 {
6107         int cpu;
6108
6109         for_each_tracing_cpu(cpu)
6110                 per_cpu_ptr(buf->data, cpu)->entries = val;
6111 }
6112
6113 #ifdef CONFIG_TRACER_MAX_TRACE
6114 /* resize @tr's buffer to the size of @size_tr's entries */
6115 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6116                                         struct array_buffer *size_buf, int cpu_id)
6117 {
6118         int cpu, ret = 0;
6119
6120         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6121                 for_each_tracing_cpu(cpu) {
6122                         ret = ring_buffer_resize(trace_buf->buffer,
6123                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6124                         if (ret < 0)
6125                                 break;
6126                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6127                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6128                 }
6129         } else {
6130                 ret = ring_buffer_resize(trace_buf->buffer,
6131                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6132                 if (ret == 0)
6133                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6134                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6135         }
6136
6137         return ret;
6138 }
6139 #endif /* CONFIG_TRACER_MAX_TRACE */
6140
6141 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6142                                         unsigned long size, int cpu)
6143 {
6144         int ret;
6145
6146         /*
6147          * If kernel or user changes the size of the ring buffer
6148          * we use the size that was given, and we can forget about
6149          * expanding it later.
6150          */
6151         ring_buffer_expanded = true;
6152
6153         /* May be called before buffers are initialized */
6154         if (!tr->array_buffer.buffer)
6155                 return 0;
6156
6157         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6158         if (ret < 0)
6159                 return ret;
6160
6161 #ifdef CONFIG_TRACER_MAX_TRACE
6162         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6163             !tr->current_trace->use_max_tr)
6164                 goto out;
6165
6166         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6167         if (ret < 0) {
6168                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6169                                                      &tr->array_buffer, cpu);
6170                 if (r < 0) {
6171                         /*
6172                          * AARGH! We are left with different
6173                          * size max buffer!!!!
6174                          * The max buffer is our "snapshot" buffer.
6175                          * When a tracer needs a snapshot (one of the
6176                          * latency tracers), it swaps the max buffer
6177                          * with the saved snap shot. We succeeded to
6178                          * update the size of the main buffer, but failed to
6179                          * update the size of the max buffer. But when we tried
6180                          * to reset the main buffer to the original size, we
6181                          * failed there too. This is very unlikely to
6182                          * happen, but if it does, warn and kill all
6183                          * tracing.
6184                          */
6185                         WARN_ON(1);
6186                         tracing_disabled = 1;
6187                 }
6188                 return ret;
6189         }
6190
6191         if (cpu == RING_BUFFER_ALL_CPUS)
6192                 set_buffer_entries(&tr->max_buffer, size);
6193         else
6194                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6195
6196  out:
6197 #endif /* CONFIG_TRACER_MAX_TRACE */
6198
6199         if (cpu == RING_BUFFER_ALL_CPUS)
6200                 set_buffer_entries(&tr->array_buffer, size);
6201         else
6202                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6203
6204         return ret;
6205 }
6206
6207 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6208                                   unsigned long size, int cpu_id)
6209 {
6210         int ret;
6211
6212         mutex_lock(&trace_types_lock);
6213
6214         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6215                 /* make sure, this cpu is enabled in the mask */
6216                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6217                         ret = -EINVAL;
6218                         goto out;
6219                 }
6220         }
6221
6222         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6223         if (ret < 0)
6224                 ret = -ENOMEM;
6225
6226 out:
6227         mutex_unlock(&trace_types_lock);
6228
6229         return ret;
6230 }
6231
6232
6233 /**
6234  * tracing_update_buffers - used by tracing facility to expand ring buffers
6235  *
6236  * To save on memory when the tracing is never used on a system with it
6237  * configured in. The ring buffers are set to a minimum size. But once
6238  * a user starts to use the tracing facility, then they need to grow
6239  * to their default size.
6240  *
6241  * This function is to be called when a tracer is about to be used.
6242  */
6243 int tracing_update_buffers(void)
6244 {
6245         int ret = 0;
6246
6247         mutex_lock(&trace_types_lock);
6248         if (!ring_buffer_expanded)
6249                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6250                                                 RING_BUFFER_ALL_CPUS);
6251         mutex_unlock(&trace_types_lock);
6252
6253         return ret;
6254 }
6255
6256 struct trace_option_dentry;
6257
6258 static void
6259 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6260
6261 /*
6262  * Used to clear out the tracer before deletion of an instance.
6263  * Must have trace_types_lock held.
6264  */
6265 static void tracing_set_nop(struct trace_array *tr)
6266 {
6267         if (tr->current_trace == &nop_trace)
6268                 return;
6269         
6270         tr->current_trace->enabled--;
6271
6272         if (tr->current_trace->reset)
6273                 tr->current_trace->reset(tr);
6274
6275         tr->current_trace = &nop_trace;
6276 }
6277
6278 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6279 {
6280         /* Only enable if the directory has been created already. */
6281         if (!tr->dir)
6282                 return;
6283
6284         create_trace_option_files(tr, t);
6285 }
6286
6287 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6288 {
6289         struct tracer *t;
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291         bool had_max_tr;
6292 #endif
6293         int ret = 0;
6294
6295         mutex_lock(&trace_types_lock);
6296
6297         if (!ring_buffer_expanded) {
6298                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6299                                                 RING_BUFFER_ALL_CPUS);
6300                 if (ret < 0)
6301                         goto out;
6302                 ret = 0;
6303         }
6304
6305         for (t = trace_types; t; t = t->next) {
6306                 if (strcmp(t->name, buf) == 0)
6307                         break;
6308         }
6309         if (!t) {
6310                 ret = -EINVAL;
6311                 goto out;
6312         }
6313         if (t == tr->current_trace)
6314                 goto out;
6315
6316 #ifdef CONFIG_TRACER_SNAPSHOT
6317         if (t->use_max_tr) {
6318                 arch_spin_lock(&tr->max_lock);
6319                 if (tr->cond_snapshot)
6320                         ret = -EBUSY;
6321                 arch_spin_unlock(&tr->max_lock);
6322                 if (ret)
6323                         goto out;
6324         }
6325 #endif
6326         /* Some tracers won't work on kernel command line */
6327         if (system_state < SYSTEM_RUNNING && t->noboot) {
6328                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6329                         t->name);
6330                 goto out;
6331         }
6332
6333         /* Some tracers are only allowed for the top level buffer */
6334         if (!trace_ok_for_array(t, tr)) {
6335                 ret = -EINVAL;
6336                 goto out;
6337         }
6338
6339         /* If trace pipe files are being read, we can't change the tracer */
6340         if (tr->trace_ref) {
6341                 ret = -EBUSY;
6342                 goto out;
6343         }
6344
6345         trace_branch_disable();
6346
6347         tr->current_trace->enabled--;
6348
6349         if (tr->current_trace->reset)
6350                 tr->current_trace->reset(tr);
6351
6352         /* Current trace needs to be nop_trace before synchronize_rcu */
6353         tr->current_trace = &nop_trace;
6354
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356         had_max_tr = tr->allocated_snapshot;
6357
6358         if (had_max_tr && !t->use_max_tr) {
6359                 /*
6360                  * We need to make sure that the update_max_tr sees that
6361                  * current_trace changed to nop_trace to keep it from
6362                  * swapping the buffers after we resize it.
6363                  * The update_max_tr is called from interrupts disabled
6364                  * so a synchronized_sched() is sufficient.
6365                  */
6366                 synchronize_rcu();
6367                 free_snapshot(tr);
6368         }
6369 #endif
6370
6371 #ifdef CONFIG_TRACER_MAX_TRACE
6372         if (t->use_max_tr && !had_max_tr) {
6373                 ret = tracing_alloc_snapshot_instance(tr);
6374                 if (ret < 0)
6375                         goto out;
6376         }
6377 #endif
6378
6379         if (t->init) {
6380                 ret = tracer_init(t, tr);
6381                 if (ret)
6382                         goto out;
6383         }
6384
6385         tr->current_trace = t;
6386         tr->current_trace->enabled++;
6387         trace_branch_enable(tr);
6388  out:
6389         mutex_unlock(&trace_types_lock);
6390
6391         return ret;
6392 }
6393
6394 static ssize_t
6395 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6396                         size_t cnt, loff_t *ppos)
6397 {
6398         struct trace_array *tr = filp->private_data;
6399         char buf[MAX_TRACER_SIZE+1];
6400         int i;
6401         size_t ret;
6402         int err;
6403
6404         ret = cnt;
6405
6406         if (cnt > MAX_TRACER_SIZE)
6407                 cnt = MAX_TRACER_SIZE;
6408
6409         if (copy_from_user(buf, ubuf, cnt))
6410                 return -EFAULT;
6411
6412         buf[cnt] = 0;
6413
6414         /* strip ending whitespace. */
6415         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6416                 buf[i] = 0;
6417
6418         err = tracing_set_tracer(tr, buf);
6419         if (err)
6420                 return err;
6421
6422         *ppos += ret;
6423
6424         return ret;
6425 }
6426
6427 static ssize_t
6428 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6429                    size_t cnt, loff_t *ppos)
6430 {
6431         char buf[64];
6432         int r;
6433
6434         r = snprintf(buf, sizeof(buf), "%ld\n",
6435                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6436         if (r > sizeof(buf))
6437                 r = sizeof(buf);
6438         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6439 }
6440
6441 static ssize_t
6442 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6443                     size_t cnt, loff_t *ppos)
6444 {
6445         unsigned long val;
6446         int ret;
6447
6448         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6449         if (ret)
6450                 return ret;
6451
6452         *ptr = val * 1000;
6453
6454         return cnt;
6455 }
6456
6457 static ssize_t
6458 tracing_thresh_read(struct file *filp, char __user *ubuf,
6459                     size_t cnt, loff_t *ppos)
6460 {
6461         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6462 }
6463
6464 static ssize_t
6465 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6466                      size_t cnt, loff_t *ppos)
6467 {
6468         struct trace_array *tr = filp->private_data;
6469         int ret;
6470
6471         mutex_lock(&trace_types_lock);
6472         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6473         if (ret < 0)
6474                 goto out;
6475
6476         if (tr->current_trace->update_thresh) {
6477                 ret = tr->current_trace->update_thresh(tr);
6478                 if (ret < 0)
6479                         goto out;
6480         }
6481
6482         ret = cnt;
6483 out:
6484         mutex_unlock(&trace_types_lock);
6485
6486         return ret;
6487 }
6488
6489 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6490
6491 static ssize_t
6492 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6493                      size_t cnt, loff_t *ppos)
6494 {
6495         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6496 }
6497
6498 static ssize_t
6499 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6500                       size_t cnt, loff_t *ppos)
6501 {
6502         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6503 }
6504
6505 #endif
6506
6507 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6508 {
6509         struct trace_array *tr = inode->i_private;
6510         struct trace_iterator *iter;
6511         int ret;
6512
6513         ret = tracing_check_open_get_tr(tr);
6514         if (ret)
6515                 return ret;
6516
6517         mutex_lock(&trace_types_lock);
6518
6519         /* create a buffer to store the information to pass to userspace */
6520         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6521         if (!iter) {
6522                 ret = -ENOMEM;
6523                 __trace_array_put(tr);
6524                 goto out;
6525         }
6526
6527         trace_seq_init(&iter->seq);
6528         iter->trace = tr->current_trace;
6529
6530         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6531                 ret = -ENOMEM;
6532                 goto fail;
6533         }
6534
6535         /* trace pipe does not show start of buffer */
6536         cpumask_setall(iter->started);
6537
6538         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6539                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6540
6541         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6542         if (trace_clocks[tr->clock_id].in_ns)
6543                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6544
6545         iter->tr = tr;
6546         iter->array_buffer = &tr->array_buffer;
6547         iter->cpu_file = tracing_get_cpu(inode);
6548         mutex_init(&iter->mutex);
6549         filp->private_data = iter;
6550
6551         if (iter->trace->pipe_open)
6552                 iter->trace->pipe_open(iter);
6553
6554         nonseekable_open(inode, filp);
6555
6556         tr->trace_ref++;
6557 out:
6558         mutex_unlock(&trace_types_lock);
6559         return ret;
6560
6561 fail:
6562         kfree(iter);
6563         __trace_array_put(tr);
6564         mutex_unlock(&trace_types_lock);
6565         return ret;
6566 }
6567
6568 static int tracing_release_pipe(struct inode *inode, struct file *file)
6569 {
6570         struct trace_iterator *iter = file->private_data;
6571         struct trace_array *tr = inode->i_private;
6572
6573         mutex_lock(&trace_types_lock);
6574
6575         tr->trace_ref--;
6576
6577         if (iter->trace->pipe_close)
6578                 iter->trace->pipe_close(iter);
6579
6580         mutex_unlock(&trace_types_lock);
6581
6582         free_cpumask_var(iter->started);
6583         mutex_destroy(&iter->mutex);
6584         kfree(iter);
6585
6586         trace_array_put(tr);
6587
6588         return 0;
6589 }
6590
6591 static __poll_t
6592 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6593 {
6594         struct trace_array *tr = iter->tr;
6595
6596         /* Iterators are static, they should be filled or empty */
6597         if (trace_buffer_iter(iter, iter->cpu_file))
6598                 return EPOLLIN | EPOLLRDNORM;
6599
6600         if (tr->trace_flags & TRACE_ITER_BLOCK)
6601                 /*
6602                  * Always select as readable when in blocking mode
6603                  */
6604                 return EPOLLIN | EPOLLRDNORM;
6605         else
6606                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6607                                              filp, poll_table);
6608 }
6609
6610 static __poll_t
6611 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6612 {
6613         struct trace_iterator *iter = filp->private_data;
6614
6615         return trace_poll(iter, filp, poll_table);
6616 }
6617
6618 /* Must be called with iter->mutex held. */
6619 static int tracing_wait_pipe(struct file *filp)
6620 {
6621         struct trace_iterator *iter = filp->private_data;
6622         int ret;
6623
6624         while (trace_empty(iter)) {
6625
6626                 if ((filp->f_flags & O_NONBLOCK)) {
6627                         return -EAGAIN;
6628                 }
6629
6630                 /*
6631                  * We block until we read something and tracing is disabled.
6632                  * We still block if tracing is disabled, but we have never
6633                  * read anything. This allows a user to cat this file, and
6634                  * then enable tracing. But after we have read something,
6635                  * we give an EOF when tracing is again disabled.
6636                  *
6637                  * iter->pos will be 0 if we haven't read anything.
6638                  */
6639                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6640                         break;
6641
6642                 mutex_unlock(&iter->mutex);
6643
6644                 ret = wait_on_pipe(iter, 0);
6645
6646                 mutex_lock(&iter->mutex);
6647
6648                 if (ret)
6649                         return ret;
6650         }
6651
6652         return 1;
6653 }
6654
6655 /*
6656  * Consumer reader.
6657  */
6658 static ssize_t
6659 tracing_read_pipe(struct file *filp, char __user *ubuf,
6660                   size_t cnt, loff_t *ppos)
6661 {
6662         struct trace_iterator *iter = filp->private_data;
6663         ssize_t sret;
6664
6665         /*
6666          * Avoid more than one consumer on a single file descriptor
6667          * This is just a matter of traces coherency, the ring buffer itself
6668          * is protected.
6669          */
6670         mutex_lock(&iter->mutex);
6671
6672         /* return any leftover data */
6673         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6674         if (sret != -EBUSY)
6675                 goto out;
6676
6677         trace_seq_init(&iter->seq);
6678
6679         if (iter->trace->read) {
6680                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6681                 if (sret)
6682                         goto out;
6683         }
6684
6685 waitagain:
6686         sret = tracing_wait_pipe(filp);
6687         if (sret <= 0)
6688                 goto out;
6689
6690         /* stop when tracing is finished */
6691         if (trace_empty(iter)) {
6692                 sret = 0;
6693                 goto out;
6694         }
6695
6696         if (cnt >= PAGE_SIZE)
6697                 cnt = PAGE_SIZE - 1;
6698
6699         /* reset all but tr, trace, and overruns */
6700         memset(&iter->seq, 0,
6701                sizeof(struct trace_iterator) -
6702                offsetof(struct trace_iterator, seq));
6703         cpumask_clear(iter->started);
6704         trace_seq_init(&iter->seq);
6705         iter->pos = -1;
6706
6707         trace_event_read_lock();
6708         trace_access_lock(iter->cpu_file);
6709         while (trace_find_next_entry_inc(iter) != NULL) {
6710                 enum print_line_t ret;
6711                 int save_len = iter->seq.seq.len;
6712
6713                 ret = print_trace_line(iter);
6714                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6715                         /* don't print partial lines */
6716                         iter->seq.seq.len = save_len;
6717                         break;
6718                 }
6719                 if (ret != TRACE_TYPE_NO_CONSUME)
6720                         trace_consume(iter);
6721
6722                 if (trace_seq_used(&iter->seq) >= cnt)
6723                         break;
6724
6725                 /*
6726                  * Setting the full flag means we reached the trace_seq buffer
6727                  * size and we should leave by partial output condition above.
6728                  * One of the trace_seq_* functions is not used properly.
6729                  */
6730                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6731                           iter->ent->type);
6732         }
6733         trace_access_unlock(iter->cpu_file);
6734         trace_event_read_unlock();
6735
6736         /* Now copy what we have to the user */
6737         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6738         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6739                 trace_seq_init(&iter->seq);
6740
6741         /*
6742          * If there was nothing to send to user, in spite of consuming trace
6743          * entries, go back to wait for more entries.
6744          */
6745         if (sret == -EBUSY)
6746                 goto waitagain;
6747
6748 out:
6749         mutex_unlock(&iter->mutex);
6750
6751         return sret;
6752 }
6753
6754 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6755                                      unsigned int idx)
6756 {
6757         __free_page(spd->pages[idx]);
6758 }
6759
6760 static size_t
6761 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6762 {
6763         size_t count;
6764         int save_len;
6765         int ret;
6766
6767         /* Seq buffer is page-sized, exactly what we need. */
6768         for (;;) {
6769                 save_len = iter->seq.seq.len;
6770                 ret = print_trace_line(iter);
6771
6772                 if (trace_seq_has_overflowed(&iter->seq)) {
6773                         iter->seq.seq.len = save_len;
6774                         break;
6775                 }
6776
6777                 /*
6778                  * This should not be hit, because it should only
6779                  * be set if the iter->seq overflowed. But check it
6780                  * anyway to be safe.
6781                  */
6782                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6783                         iter->seq.seq.len = save_len;
6784                         break;
6785                 }
6786
6787                 count = trace_seq_used(&iter->seq) - save_len;
6788                 if (rem < count) {
6789                         rem = 0;
6790                         iter->seq.seq.len = save_len;
6791                         break;
6792                 }
6793
6794                 if (ret != TRACE_TYPE_NO_CONSUME)
6795                         trace_consume(iter);
6796                 rem -= count;
6797                 if (!trace_find_next_entry_inc(iter))   {
6798                         rem = 0;
6799                         iter->ent = NULL;
6800                         break;
6801                 }
6802         }
6803
6804         return rem;
6805 }
6806
6807 static ssize_t tracing_splice_read_pipe(struct file *filp,
6808                                         loff_t *ppos,
6809                                         struct pipe_inode_info *pipe,
6810                                         size_t len,
6811                                         unsigned int flags)
6812 {
6813         struct page *pages_def[PIPE_DEF_BUFFERS];
6814         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6815         struct trace_iterator *iter = filp->private_data;
6816         struct splice_pipe_desc spd = {
6817                 .pages          = pages_def,
6818                 .partial        = partial_def,
6819                 .nr_pages       = 0, /* This gets updated below. */
6820                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6821                 .ops            = &default_pipe_buf_ops,
6822                 .spd_release    = tracing_spd_release_pipe,
6823         };
6824         ssize_t ret;
6825         size_t rem;
6826         unsigned int i;
6827
6828         if (splice_grow_spd(pipe, &spd))
6829                 return -ENOMEM;
6830
6831         mutex_lock(&iter->mutex);
6832
6833         if (iter->trace->splice_read) {
6834                 ret = iter->trace->splice_read(iter, filp,
6835                                                ppos, pipe, len, flags);
6836                 if (ret)
6837                         goto out_err;
6838         }
6839
6840         ret = tracing_wait_pipe(filp);
6841         if (ret <= 0)
6842                 goto out_err;
6843
6844         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6845                 ret = -EFAULT;
6846                 goto out_err;
6847         }
6848
6849         trace_event_read_lock();
6850         trace_access_lock(iter->cpu_file);
6851
6852         /* Fill as many pages as possible. */
6853         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6854                 spd.pages[i] = alloc_page(GFP_KERNEL);
6855                 if (!spd.pages[i])
6856                         break;
6857
6858                 rem = tracing_fill_pipe_page(rem, iter);
6859
6860                 /* Copy the data into the page, so we can start over. */
6861                 ret = trace_seq_to_buffer(&iter->seq,
6862                                           page_address(spd.pages[i]),
6863                                           trace_seq_used(&iter->seq));
6864                 if (ret < 0) {
6865                         __free_page(spd.pages[i]);
6866                         break;
6867                 }
6868                 spd.partial[i].offset = 0;
6869                 spd.partial[i].len = trace_seq_used(&iter->seq);
6870
6871                 trace_seq_init(&iter->seq);
6872         }
6873
6874         trace_access_unlock(iter->cpu_file);
6875         trace_event_read_unlock();
6876         mutex_unlock(&iter->mutex);
6877
6878         spd.nr_pages = i;
6879
6880         if (i)
6881                 ret = splice_to_pipe(pipe, &spd);
6882         else
6883                 ret = 0;
6884 out:
6885         splice_shrink_spd(&spd);
6886         return ret;
6887
6888 out_err:
6889         mutex_unlock(&iter->mutex);
6890         goto out;
6891 }
6892
6893 static ssize_t
6894 tracing_entries_read(struct file *filp, char __user *ubuf,
6895                      size_t cnt, loff_t *ppos)
6896 {
6897         struct inode *inode = file_inode(filp);
6898         struct trace_array *tr = inode->i_private;
6899         int cpu = tracing_get_cpu(inode);
6900         char buf[64];
6901         int r = 0;
6902         ssize_t ret;
6903
6904         mutex_lock(&trace_types_lock);
6905
6906         if (cpu == RING_BUFFER_ALL_CPUS) {
6907                 int cpu, buf_size_same;
6908                 unsigned long size;
6909
6910                 size = 0;
6911                 buf_size_same = 1;
6912                 /* check if all cpu sizes are same */
6913                 for_each_tracing_cpu(cpu) {
6914                         /* fill in the size from first enabled cpu */
6915                         if (size == 0)
6916                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6917                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6918                                 buf_size_same = 0;
6919                                 break;
6920                         }
6921                 }
6922
6923                 if (buf_size_same) {
6924                         if (!ring_buffer_expanded)
6925                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6926                                             size >> 10,
6927                                             trace_buf_size >> 10);
6928                         else
6929                                 r = sprintf(buf, "%lu\n", size >> 10);
6930                 } else
6931                         r = sprintf(buf, "X\n");
6932         } else
6933                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6934
6935         mutex_unlock(&trace_types_lock);
6936
6937         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6938         return ret;
6939 }
6940
6941 static ssize_t
6942 tracing_entries_write(struct file *filp, const char __user *ubuf,
6943                       size_t cnt, loff_t *ppos)
6944 {
6945         struct inode *inode = file_inode(filp);
6946         struct trace_array *tr = inode->i_private;
6947         unsigned long val;
6948         int ret;
6949
6950         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6951         if (ret)
6952                 return ret;
6953
6954         /* must have at least 1 entry */
6955         if (!val)
6956                 return -EINVAL;
6957
6958         /* value is in KB */
6959         val <<= 10;
6960         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6961         if (ret < 0)
6962                 return ret;
6963
6964         *ppos += cnt;
6965
6966         return cnt;
6967 }
6968
6969 static ssize_t
6970 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6971                                 size_t cnt, loff_t *ppos)
6972 {
6973         struct trace_array *tr = filp->private_data;
6974         char buf[64];
6975         int r, cpu;
6976         unsigned long size = 0, expanded_size = 0;
6977
6978         mutex_lock(&trace_types_lock);
6979         for_each_tracing_cpu(cpu) {
6980                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6981                 if (!ring_buffer_expanded)
6982                         expanded_size += trace_buf_size >> 10;
6983         }
6984         if (ring_buffer_expanded)
6985                 r = sprintf(buf, "%lu\n", size);
6986         else
6987                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6988         mutex_unlock(&trace_types_lock);
6989
6990         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6991 }
6992
6993 static ssize_t
6994 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6995                           size_t cnt, loff_t *ppos)
6996 {
6997         /*
6998          * There is no need to read what the user has written, this function
6999          * is just to make sure that there is no error when "echo" is used
7000          */
7001
7002         *ppos += cnt;
7003
7004         return cnt;
7005 }
7006
7007 static int
7008 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7009 {
7010         struct trace_array *tr = inode->i_private;
7011
7012         /* disable tracing ? */
7013         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7014                 tracer_tracing_off(tr);
7015         /* resize the ring buffer to 0 */
7016         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7017
7018         trace_array_put(tr);
7019
7020         return 0;
7021 }
7022
7023 static ssize_t
7024 tracing_mark_write(struct file *filp, const char __user *ubuf,
7025                                         size_t cnt, loff_t *fpos)
7026 {
7027         struct trace_array *tr = filp->private_data;
7028         struct ring_buffer_event *event;
7029         enum event_trigger_type tt = ETT_NONE;
7030         struct trace_buffer *buffer;
7031         struct print_entry *entry;
7032         ssize_t written;
7033         int size;
7034         int len;
7035
7036 /* Used in tracing_mark_raw_write() as well */
7037 #define FAULTED_STR "<faulted>"
7038 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7039
7040         if (tracing_disabled)
7041                 return -EINVAL;
7042
7043         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7044                 return -EINVAL;
7045
7046         if (cnt > TRACE_BUF_SIZE)
7047                 cnt = TRACE_BUF_SIZE;
7048
7049         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7050
7051         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7052
7053         /* If less than "<faulted>", then make sure we can still add that */
7054         if (cnt < FAULTED_SIZE)
7055                 size += FAULTED_SIZE - cnt;
7056
7057         buffer = tr->array_buffer.buffer;
7058         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7059                                             tracing_gen_ctx());
7060         if (unlikely(!event))
7061                 /* Ring buffer disabled, return as if not open for write */
7062                 return -EBADF;
7063
7064         entry = ring_buffer_event_data(event);
7065         entry->ip = _THIS_IP_;
7066
7067         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7068         if (len) {
7069                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7070                 cnt = FAULTED_SIZE;
7071                 written = -EFAULT;
7072         } else
7073                 written = cnt;
7074
7075         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7076                 /* do not add \n before testing triggers, but add \0 */
7077                 entry->buf[cnt] = '\0';
7078                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7079         }
7080
7081         if (entry->buf[cnt - 1] != '\n') {
7082                 entry->buf[cnt] = '\n';
7083                 entry->buf[cnt + 1] = '\0';
7084         } else
7085                 entry->buf[cnt] = '\0';
7086
7087         if (static_branch_unlikely(&trace_marker_exports_enabled))
7088                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7089         __buffer_unlock_commit(buffer, event);
7090
7091         if (tt)
7092                 event_triggers_post_call(tr->trace_marker_file, tt);
7093
7094         if (written > 0)
7095                 *fpos += written;
7096
7097         return written;
7098 }
7099
7100 /* Limit it for now to 3K (including tag) */
7101 #define RAW_DATA_MAX_SIZE (1024*3)
7102
7103 static ssize_t
7104 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7105                                         size_t cnt, loff_t *fpos)
7106 {
7107         struct trace_array *tr = filp->private_data;
7108         struct ring_buffer_event *event;
7109         struct trace_buffer *buffer;
7110         struct raw_data_entry *entry;
7111         ssize_t written;
7112         int size;
7113         int len;
7114
7115 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7116
7117         if (tracing_disabled)
7118                 return -EINVAL;
7119
7120         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7121                 return -EINVAL;
7122
7123         /* The marker must at least have a tag id */
7124         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7125                 return -EINVAL;
7126
7127         if (cnt > TRACE_BUF_SIZE)
7128                 cnt = TRACE_BUF_SIZE;
7129
7130         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7131
7132         size = sizeof(*entry) + cnt;
7133         if (cnt < FAULT_SIZE_ID)
7134                 size += FAULT_SIZE_ID - cnt;
7135
7136         buffer = tr->array_buffer.buffer;
7137         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7138                                             tracing_gen_ctx());
7139         if (!event)
7140                 /* Ring buffer disabled, return as if not open for write */
7141                 return -EBADF;
7142
7143         entry = ring_buffer_event_data(event);
7144
7145         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7146         if (len) {
7147                 entry->id = -1;
7148                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7149                 written = -EFAULT;
7150         } else
7151                 written = cnt;
7152
7153         __buffer_unlock_commit(buffer, event);
7154
7155         if (written > 0)
7156                 *fpos += written;
7157
7158         return written;
7159 }
7160
7161 static int tracing_clock_show(struct seq_file *m, void *v)
7162 {
7163         struct trace_array *tr = m->private;
7164         int i;
7165
7166         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7167                 seq_printf(m,
7168                         "%s%s%s%s", i ? " " : "",
7169                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7170                         i == tr->clock_id ? "]" : "");
7171         seq_putc(m, '\n');
7172
7173         return 0;
7174 }
7175
7176 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7177 {
7178         int i;
7179
7180         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7181                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7182                         break;
7183         }
7184         if (i == ARRAY_SIZE(trace_clocks))
7185                 return -EINVAL;
7186
7187         mutex_lock(&trace_types_lock);
7188
7189         tr->clock_id = i;
7190
7191         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7192
7193         /*
7194          * New clock may not be consistent with the previous clock.
7195          * Reset the buffer so that it doesn't have incomparable timestamps.
7196          */
7197         tracing_reset_online_cpus(&tr->array_buffer);
7198
7199 #ifdef CONFIG_TRACER_MAX_TRACE
7200         if (tr->max_buffer.buffer)
7201                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7202         tracing_reset_online_cpus(&tr->max_buffer);
7203 #endif
7204
7205         mutex_unlock(&trace_types_lock);
7206
7207         return 0;
7208 }
7209
7210 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7211                                    size_t cnt, loff_t *fpos)
7212 {
7213         struct seq_file *m = filp->private_data;
7214         struct trace_array *tr = m->private;
7215         char buf[64];
7216         const char *clockstr;
7217         int ret;
7218
7219         if (cnt >= sizeof(buf))
7220                 return -EINVAL;
7221
7222         if (copy_from_user(buf, ubuf, cnt))
7223                 return -EFAULT;
7224
7225         buf[cnt] = 0;
7226
7227         clockstr = strstrip(buf);
7228
7229         ret = tracing_set_clock(tr, clockstr);
7230         if (ret)
7231                 return ret;
7232
7233         *fpos += cnt;
7234
7235         return cnt;
7236 }
7237
7238 static int tracing_clock_open(struct inode *inode, struct file *file)
7239 {
7240         struct trace_array *tr = inode->i_private;
7241         int ret;
7242
7243         ret = tracing_check_open_get_tr(tr);
7244         if (ret)
7245                 return ret;
7246
7247         ret = single_open(file, tracing_clock_show, inode->i_private);
7248         if (ret < 0)
7249                 trace_array_put(tr);
7250
7251         return ret;
7252 }
7253
7254 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7255 {
7256         struct trace_array *tr = m->private;
7257
7258         mutex_lock(&trace_types_lock);
7259
7260         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7261                 seq_puts(m, "delta [absolute]\n");
7262         else
7263                 seq_puts(m, "[delta] absolute\n");
7264
7265         mutex_unlock(&trace_types_lock);
7266
7267         return 0;
7268 }
7269
7270 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7271 {
7272         struct trace_array *tr = inode->i_private;
7273         int ret;
7274
7275         ret = tracing_check_open_get_tr(tr);
7276         if (ret)
7277                 return ret;
7278
7279         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7280         if (ret < 0)
7281                 trace_array_put(tr);
7282
7283         return ret;
7284 }
7285
7286 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7287 {
7288         if (rbe == this_cpu_read(trace_buffered_event))
7289                 return ring_buffer_time_stamp(buffer);
7290
7291         return ring_buffer_event_time_stamp(buffer, rbe);
7292 }
7293
7294 /*
7295  * Set or disable using the per CPU trace_buffer_event when possible.
7296  */
7297 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7298 {
7299         int ret = 0;
7300
7301         mutex_lock(&trace_types_lock);
7302
7303         if (set && tr->no_filter_buffering_ref++)
7304                 goto out;
7305
7306         if (!set) {
7307                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7308                         ret = -EINVAL;
7309                         goto out;
7310                 }
7311
7312                 --tr->no_filter_buffering_ref;
7313         }
7314  out:
7315         mutex_unlock(&trace_types_lock);
7316
7317         return ret;
7318 }
7319
7320 struct ftrace_buffer_info {
7321         struct trace_iterator   iter;
7322         void                    *spare;
7323         unsigned int            spare_cpu;
7324         unsigned int            read;
7325 };
7326
7327 #ifdef CONFIG_TRACER_SNAPSHOT
7328 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7329 {
7330         struct trace_array *tr = inode->i_private;
7331         struct trace_iterator *iter;
7332         struct seq_file *m;
7333         int ret;
7334
7335         ret = tracing_check_open_get_tr(tr);
7336         if (ret)
7337                 return ret;
7338
7339         if (file->f_mode & FMODE_READ) {
7340                 iter = __tracing_open(inode, file, true);
7341                 if (IS_ERR(iter))
7342                         ret = PTR_ERR(iter);
7343         } else {
7344                 /* Writes still need the seq_file to hold the private data */
7345                 ret = -ENOMEM;
7346                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7347                 if (!m)
7348                         goto out;
7349                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7350                 if (!iter) {
7351                         kfree(m);
7352                         goto out;
7353                 }
7354                 ret = 0;
7355
7356                 iter->tr = tr;
7357                 iter->array_buffer = &tr->max_buffer;
7358                 iter->cpu_file = tracing_get_cpu(inode);
7359                 m->private = iter;
7360                 file->private_data = m;
7361         }
7362 out:
7363         if (ret < 0)
7364                 trace_array_put(tr);
7365
7366         return ret;
7367 }
7368
7369 static ssize_t
7370 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7371                        loff_t *ppos)
7372 {
7373         struct seq_file *m = filp->private_data;
7374         struct trace_iterator *iter = m->private;
7375         struct trace_array *tr = iter->tr;
7376         unsigned long val;
7377         int ret;
7378
7379         ret = tracing_update_buffers();
7380         if (ret < 0)
7381                 return ret;
7382
7383         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7384         if (ret)
7385                 return ret;
7386
7387         mutex_lock(&trace_types_lock);
7388
7389         if (tr->current_trace->use_max_tr) {
7390                 ret = -EBUSY;
7391                 goto out;
7392         }
7393
7394         arch_spin_lock(&tr->max_lock);
7395         if (tr->cond_snapshot)
7396                 ret = -EBUSY;
7397         arch_spin_unlock(&tr->max_lock);
7398         if (ret)
7399                 goto out;
7400
7401         switch (val) {
7402         case 0:
7403                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7404                         ret = -EINVAL;
7405                         break;
7406                 }
7407                 if (tr->allocated_snapshot)
7408                         free_snapshot(tr);
7409                 break;
7410         case 1:
7411 /* Only allow per-cpu swap if the ring buffer supports it */
7412 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7413                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7414                         ret = -EINVAL;
7415                         break;
7416                 }
7417 #endif
7418                 if (tr->allocated_snapshot)
7419                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7420                                         &tr->array_buffer, iter->cpu_file);
7421                 else
7422                         ret = tracing_alloc_snapshot_instance(tr);
7423                 if (ret < 0)
7424                         break;
7425                 local_irq_disable();
7426                 /* Now, we're going to swap */
7427                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7428                         update_max_tr(tr, current, smp_processor_id(), NULL);
7429                 else
7430                         update_max_tr_single(tr, current, iter->cpu_file);
7431                 local_irq_enable();
7432                 break;
7433         default:
7434                 if (tr->allocated_snapshot) {
7435                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7436                                 tracing_reset_online_cpus(&tr->max_buffer);
7437                         else
7438                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7439                 }
7440                 break;
7441         }
7442
7443         if (ret >= 0) {
7444                 *ppos += cnt;
7445                 ret = cnt;
7446         }
7447 out:
7448         mutex_unlock(&trace_types_lock);
7449         return ret;
7450 }
7451
7452 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7453 {
7454         struct seq_file *m = file->private_data;
7455         int ret;
7456
7457         ret = tracing_release(inode, file);
7458
7459         if (file->f_mode & FMODE_READ)
7460                 return ret;
7461
7462         /* If write only, the seq_file is just a stub */
7463         if (m)
7464                 kfree(m->private);
7465         kfree(m);
7466
7467         return 0;
7468 }
7469
7470 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7471 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7472                                     size_t count, loff_t *ppos);
7473 static int tracing_buffers_release(struct inode *inode, struct file *file);
7474 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7475                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7476
7477 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7478 {
7479         struct ftrace_buffer_info *info;
7480         int ret;
7481
7482         /* The following checks for tracefs lockdown */
7483         ret = tracing_buffers_open(inode, filp);
7484         if (ret < 0)
7485                 return ret;
7486
7487         info = filp->private_data;
7488
7489         if (info->iter.trace->use_max_tr) {
7490                 tracing_buffers_release(inode, filp);
7491                 return -EBUSY;
7492         }
7493
7494         info->iter.snapshot = true;
7495         info->iter.array_buffer = &info->iter.tr->max_buffer;
7496
7497         return ret;
7498 }
7499
7500 #endif /* CONFIG_TRACER_SNAPSHOT */
7501
7502
7503 static const struct file_operations tracing_thresh_fops = {
7504         .open           = tracing_open_generic,
7505         .read           = tracing_thresh_read,
7506         .write          = tracing_thresh_write,
7507         .llseek         = generic_file_llseek,
7508 };
7509
7510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7511 static const struct file_operations tracing_max_lat_fops = {
7512         .open           = tracing_open_generic,
7513         .read           = tracing_max_lat_read,
7514         .write          = tracing_max_lat_write,
7515         .llseek         = generic_file_llseek,
7516 };
7517 #endif
7518
7519 static const struct file_operations set_tracer_fops = {
7520         .open           = tracing_open_generic,
7521         .read           = tracing_set_trace_read,
7522         .write          = tracing_set_trace_write,
7523         .llseek         = generic_file_llseek,
7524 };
7525
7526 static const struct file_operations tracing_pipe_fops = {
7527         .open           = tracing_open_pipe,
7528         .poll           = tracing_poll_pipe,
7529         .read           = tracing_read_pipe,
7530         .splice_read    = tracing_splice_read_pipe,
7531         .release        = tracing_release_pipe,
7532         .llseek         = no_llseek,
7533 };
7534
7535 static const struct file_operations tracing_entries_fops = {
7536         .open           = tracing_open_generic_tr,
7537         .read           = tracing_entries_read,
7538         .write          = tracing_entries_write,
7539         .llseek         = generic_file_llseek,
7540         .release        = tracing_release_generic_tr,
7541 };
7542
7543 static const struct file_operations tracing_total_entries_fops = {
7544         .open           = tracing_open_generic_tr,
7545         .read           = tracing_total_entries_read,
7546         .llseek         = generic_file_llseek,
7547         .release        = tracing_release_generic_tr,
7548 };
7549
7550 static const struct file_operations tracing_free_buffer_fops = {
7551         .open           = tracing_open_generic_tr,
7552         .write          = tracing_free_buffer_write,
7553         .release        = tracing_free_buffer_release,
7554 };
7555
7556 static const struct file_operations tracing_mark_fops = {
7557         .open           = tracing_open_generic_tr,
7558         .write          = tracing_mark_write,
7559         .llseek         = generic_file_llseek,
7560         .release        = tracing_release_generic_tr,
7561 };
7562
7563 static const struct file_operations tracing_mark_raw_fops = {
7564         .open           = tracing_open_generic_tr,
7565         .write          = tracing_mark_raw_write,
7566         .llseek         = generic_file_llseek,
7567         .release        = tracing_release_generic_tr,
7568 };
7569
7570 static const struct file_operations trace_clock_fops = {
7571         .open           = tracing_clock_open,
7572         .read           = seq_read,
7573         .llseek         = seq_lseek,
7574         .release        = tracing_single_release_tr,
7575         .write          = tracing_clock_write,
7576 };
7577
7578 static const struct file_operations trace_time_stamp_mode_fops = {
7579         .open           = tracing_time_stamp_mode_open,
7580         .read           = seq_read,
7581         .llseek         = seq_lseek,
7582         .release        = tracing_single_release_tr,
7583 };
7584
7585 #ifdef CONFIG_TRACER_SNAPSHOT
7586 static const struct file_operations snapshot_fops = {
7587         .open           = tracing_snapshot_open,
7588         .read           = seq_read,
7589         .write          = tracing_snapshot_write,
7590         .llseek         = tracing_lseek,
7591         .release        = tracing_snapshot_release,
7592 };
7593
7594 static const struct file_operations snapshot_raw_fops = {
7595         .open           = snapshot_raw_open,
7596         .read           = tracing_buffers_read,
7597         .release        = tracing_buffers_release,
7598         .splice_read    = tracing_buffers_splice_read,
7599         .llseek         = no_llseek,
7600 };
7601
7602 #endif /* CONFIG_TRACER_SNAPSHOT */
7603
7604 /*
7605  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7606  * @filp: The active open file structure
7607  * @ubuf: The userspace provided buffer to read value into
7608  * @cnt: The maximum number of bytes to read
7609  * @ppos: The current "file" position
7610  *
7611  * This function implements the write interface for a struct trace_min_max_param.
7612  * The filp->private_data must point to a trace_min_max_param structure that
7613  * defines where to write the value, the min and the max acceptable values,
7614  * and a lock to protect the write.
7615  */
7616 static ssize_t
7617 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7618 {
7619         struct trace_min_max_param *param = filp->private_data;
7620         u64 val;
7621         int err;
7622
7623         if (!param)
7624                 return -EFAULT;
7625
7626         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7627         if (err)
7628                 return err;
7629
7630         if (param->lock)
7631                 mutex_lock(param->lock);
7632
7633         if (param->min && val < *param->min)
7634                 err = -EINVAL;
7635
7636         if (param->max && val > *param->max)
7637                 err = -EINVAL;
7638
7639         if (!err)
7640                 *param->val = val;
7641
7642         if (param->lock)
7643                 mutex_unlock(param->lock);
7644
7645         if (err)
7646                 return err;
7647
7648         return cnt;
7649 }
7650
7651 /*
7652  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7653  * @filp: The active open file structure
7654  * @ubuf: The userspace provided buffer to read value into
7655  * @cnt: The maximum number of bytes to read
7656  * @ppos: The current "file" position
7657  *
7658  * This function implements the read interface for a struct trace_min_max_param.
7659  * The filp->private_data must point to a trace_min_max_param struct with valid
7660  * data.
7661  */
7662 static ssize_t
7663 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7664 {
7665         struct trace_min_max_param *param = filp->private_data;
7666         char buf[U64_STR_SIZE];
7667         int len;
7668         u64 val;
7669
7670         if (!param)
7671                 return -EFAULT;
7672
7673         val = *param->val;
7674
7675         if (cnt > sizeof(buf))
7676                 cnt = sizeof(buf);
7677
7678         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7679
7680         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7681 }
7682
7683 const struct file_operations trace_min_max_fops = {
7684         .open           = tracing_open_generic,
7685         .read           = trace_min_max_read,
7686         .write          = trace_min_max_write,
7687 };
7688
7689 #define TRACING_LOG_ERRS_MAX    8
7690 #define TRACING_LOG_LOC_MAX     128
7691
7692 #define CMD_PREFIX "  Command: "
7693
7694 struct err_info {
7695         const char      **errs; /* ptr to loc-specific array of err strings */
7696         u8              type;   /* index into errs -> specific err string */
7697         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7698         u64             ts;
7699 };
7700
7701 struct tracing_log_err {
7702         struct list_head        list;
7703         struct err_info         info;
7704         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7705         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7706 };
7707
7708 static DEFINE_MUTEX(tracing_err_log_lock);
7709
7710 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7711 {
7712         struct tracing_log_err *err;
7713
7714         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7715                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7716                 if (!err)
7717                         err = ERR_PTR(-ENOMEM);
7718                 tr->n_err_log_entries++;
7719
7720                 return err;
7721         }
7722
7723         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7724         list_del(&err->list);
7725
7726         return err;
7727 }
7728
7729 /**
7730  * err_pos - find the position of a string within a command for error careting
7731  * @cmd: The tracing command that caused the error
7732  * @str: The string to position the caret at within @cmd
7733  *
7734  * Finds the position of the first occurrence of @str within @cmd.  The
7735  * return value can be passed to tracing_log_err() for caret placement
7736  * within @cmd.
7737  *
7738  * Returns the index within @cmd of the first occurrence of @str or 0
7739  * if @str was not found.
7740  */
7741 unsigned int err_pos(char *cmd, const char *str)
7742 {
7743         char *found;
7744
7745         if (WARN_ON(!strlen(cmd)))
7746                 return 0;
7747
7748         found = strstr(cmd, str);
7749         if (found)
7750                 return found - cmd;
7751
7752         return 0;
7753 }
7754
7755 /**
7756  * tracing_log_err - write an error to the tracing error log
7757  * @tr: The associated trace array for the error (NULL for top level array)
7758  * @loc: A string describing where the error occurred
7759  * @cmd: The tracing command that caused the error
7760  * @errs: The array of loc-specific static error strings
7761  * @type: The index into errs[], which produces the specific static err string
7762  * @pos: The position the caret should be placed in the cmd
7763  *
7764  * Writes an error into tracing/error_log of the form:
7765  *
7766  * <loc>: error: <text>
7767  *   Command: <cmd>
7768  *              ^
7769  *
7770  * tracing/error_log is a small log file containing the last
7771  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7772  * unless there has been a tracing error, and the error log can be
7773  * cleared and have its memory freed by writing the empty string in
7774  * truncation mode to it i.e. echo > tracing/error_log.
7775  *
7776  * NOTE: the @errs array along with the @type param are used to
7777  * produce a static error string - this string is not copied and saved
7778  * when the error is logged - only a pointer to it is saved.  See
7779  * existing callers for examples of how static strings are typically
7780  * defined for use with tracing_log_err().
7781  */
7782 void tracing_log_err(struct trace_array *tr,
7783                      const char *loc, const char *cmd,
7784                      const char **errs, u8 type, u8 pos)
7785 {
7786         struct tracing_log_err *err;
7787
7788         if (!tr)
7789                 tr = &global_trace;
7790
7791         mutex_lock(&tracing_err_log_lock);
7792         err = get_tracing_log_err(tr);
7793         if (PTR_ERR(err) == -ENOMEM) {
7794                 mutex_unlock(&tracing_err_log_lock);
7795                 return;
7796         }
7797
7798         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7799         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7800
7801         err->info.errs = errs;
7802         err->info.type = type;
7803         err->info.pos = pos;
7804         err->info.ts = local_clock();
7805
7806         list_add_tail(&err->list, &tr->err_log);
7807         mutex_unlock(&tracing_err_log_lock);
7808 }
7809
7810 static void clear_tracing_err_log(struct trace_array *tr)
7811 {
7812         struct tracing_log_err *err, *next;
7813
7814         mutex_lock(&tracing_err_log_lock);
7815         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7816                 list_del(&err->list);
7817                 kfree(err);
7818         }
7819
7820         tr->n_err_log_entries = 0;
7821         mutex_unlock(&tracing_err_log_lock);
7822 }
7823
7824 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7825 {
7826         struct trace_array *tr = m->private;
7827
7828         mutex_lock(&tracing_err_log_lock);
7829
7830         return seq_list_start(&tr->err_log, *pos);
7831 }
7832
7833 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7834 {
7835         struct trace_array *tr = m->private;
7836
7837         return seq_list_next(v, &tr->err_log, pos);
7838 }
7839
7840 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7841 {
7842         mutex_unlock(&tracing_err_log_lock);
7843 }
7844
7845 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7846 {
7847         u8 i;
7848
7849         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7850                 seq_putc(m, ' ');
7851         for (i = 0; i < pos; i++)
7852                 seq_putc(m, ' ');
7853         seq_puts(m, "^\n");
7854 }
7855
7856 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7857 {
7858         struct tracing_log_err *err = v;
7859
7860         if (err) {
7861                 const char *err_text = err->info.errs[err->info.type];
7862                 u64 sec = err->info.ts;
7863                 u32 nsec;
7864
7865                 nsec = do_div(sec, NSEC_PER_SEC);
7866                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7867                            err->loc, err_text);
7868                 seq_printf(m, "%s", err->cmd);
7869                 tracing_err_log_show_pos(m, err->info.pos);
7870         }
7871
7872         return 0;
7873 }
7874
7875 static const struct seq_operations tracing_err_log_seq_ops = {
7876         .start  = tracing_err_log_seq_start,
7877         .next   = tracing_err_log_seq_next,
7878         .stop   = tracing_err_log_seq_stop,
7879         .show   = tracing_err_log_seq_show
7880 };
7881
7882 static int tracing_err_log_open(struct inode *inode, struct file *file)
7883 {
7884         struct trace_array *tr = inode->i_private;
7885         int ret = 0;
7886
7887         ret = tracing_check_open_get_tr(tr);
7888         if (ret)
7889                 return ret;
7890
7891         /* If this file was opened for write, then erase contents */
7892         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7893                 clear_tracing_err_log(tr);
7894
7895         if (file->f_mode & FMODE_READ) {
7896                 ret = seq_open(file, &tracing_err_log_seq_ops);
7897                 if (!ret) {
7898                         struct seq_file *m = file->private_data;
7899                         m->private = tr;
7900                 } else {
7901                         trace_array_put(tr);
7902                 }
7903         }
7904         return ret;
7905 }
7906
7907 static ssize_t tracing_err_log_write(struct file *file,
7908                                      const char __user *buffer,
7909                                      size_t count, loff_t *ppos)
7910 {
7911         return count;
7912 }
7913
7914 static int tracing_err_log_release(struct inode *inode, struct file *file)
7915 {
7916         struct trace_array *tr = inode->i_private;
7917
7918         trace_array_put(tr);
7919
7920         if (file->f_mode & FMODE_READ)
7921                 seq_release(inode, file);
7922
7923         return 0;
7924 }
7925
7926 static const struct file_operations tracing_err_log_fops = {
7927         .open           = tracing_err_log_open,
7928         .write          = tracing_err_log_write,
7929         .read           = seq_read,
7930         .llseek         = seq_lseek,
7931         .release        = tracing_err_log_release,
7932 };
7933
7934 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7935 {
7936         struct trace_array *tr = inode->i_private;
7937         struct ftrace_buffer_info *info;
7938         int ret;
7939
7940         ret = tracing_check_open_get_tr(tr);
7941         if (ret)
7942                 return ret;
7943
7944         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7945         if (!info) {
7946                 trace_array_put(tr);
7947                 return -ENOMEM;
7948         }
7949
7950         mutex_lock(&trace_types_lock);
7951
7952         info->iter.tr           = tr;
7953         info->iter.cpu_file     = tracing_get_cpu(inode);
7954         info->iter.trace        = tr->current_trace;
7955         info->iter.array_buffer = &tr->array_buffer;
7956         info->spare             = NULL;
7957         /* Force reading ring buffer for first read */
7958         info->read              = (unsigned int)-1;
7959
7960         filp->private_data = info;
7961
7962         tr->trace_ref++;
7963
7964         mutex_unlock(&trace_types_lock);
7965
7966         ret = nonseekable_open(inode, filp);
7967         if (ret < 0)
7968                 trace_array_put(tr);
7969
7970         return ret;
7971 }
7972
7973 static __poll_t
7974 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7975 {
7976         struct ftrace_buffer_info *info = filp->private_data;
7977         struct trace_iterator *iter = &info->iter;
7978
7979         return trace_poll(iter, filp, poll_table);
7980 }
7981
7982 static ssize_t
7983 tracing_buffers_read(struct file *filp, char __user *ubuf,
7984                      size_t count, loff_t *ppos)
7985 {
7986         struct ftrace_buffer_info *info = filp->private_data;
7987         struct trace_iterator *iter = &info->iter;
7988         ssize_t ret = 0;
7989         ssize_t size;
7990
7991         if (!count)
7992                 return 0;
7993
7994 #ifdef CONFIG_TRACER_MAX_TRACE
7995         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7996                 return -EBUSY;
7997 #endif
7998
7999         if (!info->spare) {
8000                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8001                                                           iter->cpu_file);
8002                 if (IS_ERR(info->spare)) {
8003                         ret = PTR_ERR(info->spare);
8004                         info->spare = NULL;
8005                 } else {
8006                         info->spare_cpu = iter->cpu_file;
8007                 }
8008         }
8009         if (!info->spare)
8010                 return ret;
8011
8012         /* Do we have previous read data to read? */
8013         if (info->read < PAGE_SIZE)
8014                 goto read;
8015
8016  again:
8017         trace_access_lock(iter->cpu_file);
8018         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8019                                     &info->spare,
8020                                     count,
8021                                     iter->cpu_file, 0);
8022         trace_access_unlock(iter->cpu_file);
8023
8024         if (ret < 0) {
8025                 if (trace_empty(iter)) {
8026                         if ((filp->f_flags & O_NONBLOCK))
8027                                 return -EAGAIN;
8028
8029                         ret = wait_on_pipe(iter, 0);
8030                         if (ret)
8031                                 return ret;
8032
8033                         goto again;
8034                 }
8035                 return 0;
8036         }
8037
8038         info->read = 0;
8039  read:
8040         size = PAGE_SIZE - info->read;
8041         if (size > count)
8042                 size = count;
8043
8044         ret = copy_to_user(ubuf, info->spare + info->read, size);
8045         if (ret == size)
8046                 return -EFAULT;
8047
8048         size -= ret;
8049
8050         *ppos += size;
8051         info->read += size;
8052
8053         return size;
8054 }
8055
8056 static int tracing_buffers_release(struct inode *inode, struct file *file)
8057 {
8058         struct ftrace_buffer_info *info = file->private_data;
8059         struct trace_iterator *iter = &info->iter;
8060
8061         mutex_lock(&trace_types_lock);
8062
8063         iter->tr->trace_ref--;
8064
8065         __trace_array_put(iter->tr);
8066
8067         if (info->spare)
8068                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8069                                            info->spare_cpu, info->spare);
8070         kvfree(info);
8071
8072         mutex_unlock(&trace_types_lock);
8073
8074         return 0;
8075 }
8076
8077 struct buffer_ref {
8078         struct trace_buffer     *buffer;
8079         void                    *page;
8080         int                     cpu;
8081         refcount_t              refcount;
8082 };
8083
8084 static void buffer_ref_release(struct buffer_ref *ref)
8085 {
8086         if (!refcount_dec_and_test(&ref->refcount))
8087                 return;
8088         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8089         kfree(ref);
8090 }
8091
8092 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8093                                     struct pipe_buffer *buf)
8094 {
8095         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8096
8097         buffer_ref_release(ref);
8098         buf->private = 0;
8099 }
8100
8101 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8102                                 struct pipe_buffer *buf)
8103 {
8104         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8105
8106         if (refcount_read(&ref->refcount) > INT_MAX/2)
8107                 return false;
8108
8109         refcount_inc(&ref->refcount);
8110         return true;
8111 }
8112
8113 /* Pipe buffer operations for a buffer. */
8114 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8115         .release                = buffer_pipe_buf_release,
8116         .get                    = buffer_pipe_buf_get,
8117 };
8118
8119 /*
8120  * Callback from splice_to_pipe(), if we need to release some pages
8121  * at the end of the spd in case we error'ed out in filling the pipe.
8122  */
8123 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8124 {
8125         struct buffer_ref *ref =
8126                 (struct buffer_ref *)spd->partial[i].private;
8127
8128         buffer_ref_release(ref);
8129         spd->partial[i].private = 0;
8130 }
8131
8132 static ssize_t
8133 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8134                             struct pipe_inode_info *pipe, size_t len,
8135                             unsigned int flags)
8136 {
8137         struct ftrace_buffer_info *info = file->private_data;
8138         struct trace_iterator *iter = &info->iter;
8139         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8140         struct page *pages_def[PIPE_DEF_BUFFERS];
8141         struct splice_pipe_desc spd = {
8142                 .pages          = pages_def,
8143                 .partial        = partial_def,
8144                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8145                 .ops            = &buffer_pipe_buf_ops,
8146                 .spd_release    = buffer_spd_release,
8147         };
8148         struct buffer_ref *ref;
8149         int entries, i;
8150         ssize_t ret = 0;
8151
8152 #ifdef CONFIG_TRACER_MAX_TRACE
8153         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8154                 return -EBUSY;
8155 #endif
8156
8157         if (*ppos & (PAGE_SIZE - 1))
8158                 return -EINVAL;
8159
8160         if (len & (PAGE_SIZE - 1)) {
8161                 if (len < PAGE_SIZE)
8162                         return -EINVAL;
8163                 len &= PAGE_MASK;
8164         }
8165
8166         if (splice_grow_spd(pipe, &spd))
8167                 return -ENOMEM;
8168
8169  again:
8170         trace_access_lock(iter->cpu_file);
8171         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8172
8173         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8174                 struct page *page;
8175                 int r;
8176
8177                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8178                 if (!ref) {
8179                         ret = -ENOMEM;
8180                         break;
8181                 }
8182
8183                 refcount_set(&ref->refcount, 1);
8184                 ref->buffer = iter->array_buffer->buffer;
8185                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8186                 if (IS_ERR(ref->page)) {
8187                         ret = PTR_ERR(ref->page);
8188                         ref->page = NULL;
8189                         kfree(ref);
8190                         break;
8191                 }
8192                 ref->cpu = iter->cpu_file;
8193
8194                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8195                                           len, iter->cpu_file, 1);
8196                 if (r < 0) {
8197                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8198                                                    ref->page);
8199                         kfree(ref);
8200                         break;
8201                 }
8202
8203                 page = virt_to_page(ref->page);
8204
8205                 spd.pages[i] = page;
8206                 spd.partial[i].len = PAGE_SIZE;
8207                 spd.partial[i].offset = 0;
8208                 spd.partial[i].private = (unsigned long)ref;
8209                 spd.nr_pages++;
8210                 *ppos += PAGE_SIZE;
8211
8212                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8213         }
8214
8215         trace_access_unlock(iter->cpu_file);
8216         spd.nr_pages = i;
8217
8218         /* did we read anything? */
8219         if (!spd.nr_pages) {
8220                 if (ret)
8221                         goto out;
8222
8223                 ret = -EAGAIN;
8224                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8225                         goto out;
8226
8227                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8228                 if (ret)
8229                         goto out;
8230
8231                 goto again;
8232         }
8233
8234         ret = splice_to_pipe(pipe, &spd);
8235 out:
8236         splice_shrink_spd(&spd);
8237
8238         return ret;
8239 }
8240
8241 static const struct file_operations tracing_buffers_fops = {
8242         .open           = tracing_buffers_open,
8243         .read           = tracing_buffers_read,
8244         .poll           = tracing_buffers_poll,
8245         .release        = tracing_buffers_release,
8246         .splice_read    = tracing_buffers_splice_read,
8247         .llseek         = no_llseek,
8248 };
8249
8250 static ssize_t
8251 tracing_stats_read(struct file *filp, char __user *ubuf,
8252                    size_t count, loff_t *ppos)
8253 {
8254         struct inode *inode = file_inode(filp);
8255         struct trace_array *tr = inode->i_private;
8256         struct array_buffer *trace_buf = &tr->array_buffer;
8257         int cpu = tracing_get_cpu(inode);
8258         struct trace_seq *s;
8259         unsigned long cnt;
8260         unsigned long long t;
8261         unsigned long usec_rem;
8262
8263         s = kmalloc(sizeof(*s), GFP_KERNEL);
8264         if (!s)
8265                 return -ENOMEM;
8266
8267         trace_seq_init(s);
8268
8269         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8270         trace_seq_printf(s, "entries: %ld\n", cnt);
8271
8272         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8273         trace_seq_printf(s, "overrun: %ld\n", cnt);
8274
8275         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8276         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8277
8278         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8279         trace_seq_printf(s, "bytes: %ld\n", cnt);
8280
8281         if (trace_clocks[tr->clock_id].in_ns) {
8282                 /* local or global for trace_clock */
8283                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8284                 usec_rem = do_div(t, USEC_PER_SEC);
8285                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8286                                                                 t, usec_rem);
8287
8288                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8289                 usec_rem = do_div(t, USEC_PER_SEC);
8290                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8291         } else {
8292                 /* counter or tsc mode for trace_clock */
8293                 trace_seq_printf(s, "oldest event ts: %llu\n",
8294                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8295
8296                 trace_seq_printf(s, "now ts: %llu\n",
8297                                 ring_buffer_time_stamp(trace_buf->buffer));
8298         }
8299
8300         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8301         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8302
8303         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8304         trace_seq_printf(s, "read events: %ld\n", cnt);
8305
8306         count = simple_read_from_buffer(ubuf, count, ppos,
8307                                         s->buffer, trace_seq_used(s));
8308
8309         kfree(s);
8310
8311         return count;
8312 }
8313
8314 static const struct file_operations tracing_stats_fops = {
8315         .open           = tracing_open_generic_tr,
8316         .read           = tracing_stats_read,
8317         .llseek         = generic_file_llseek,
8318         .release        = tracing_release_generic_tr,
8319 };
8320
8321 #ifdef CONFIG_DYNAMIC_FTRACE
8322
8323 static ssize_t
8324 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8325                   size_t cnt, loff_t *ppos)
8326 {
8327         ssize_t ret;
8328         char *buf;
8329         int r;
8330
8331         /* 256 should be plenty to hold the amount needed */
8332         buf = kmalloc(256, GFP_KERNEL);
8333         if (!buf)
8334                 return -ENOMEM;
8335
8336         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8337                       ftrace_update_tot_cnt,
8338                       ftrace_number_of_pages,
8339                       ftrace_number_of_groups);
8340
8341         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8342         kfree(buf);
8343         return ret;
8344 }
8345
8346 static const struct file_operations tracing_dyn_info_fops = {
8347         .open           = tracing_open_generic,
8348         .read           = tracing_read_dyn_info,
8349         .llseek         = generic_file_llseek,
8350 };
8351 #endif /* CONFIG_DYNAMIC_FTRACE */
8352
8353 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8354 static void
8355 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8356                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8357                 void *data)
8358 {
8359         tracing_snapshot_instance(tr);
8360 }
8361
8362 static void
8363 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8364                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8365                       void *data)
8366 {
8367         struct ftrace_func_mapper *mapper = data;
8368         long *count = NULL;
8369
8370         if (mapper)
8371                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8372
8373         if (count) {
8374
8375                 if (*count <= 0)
8376                         return;
8377
8378                 (*count)--;
8379         }
8380
8381         tracing_snapshot_instance(tr);
8382 }
8383
8384 static int
8385 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8386                       struct ftrace_probe_ops *ops, void *data)
8387 {
8388         struct ftrace_func_mapper *mapper = data;
8389         long *count = NULL;
8390
8391         seq_printf(m, "%ps:", (void *)ip);
8392
8393         seq_puts(m, "snapshot");
8394
8395         if (mapper)
8396                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8397
8398         if (count)
8399                 seq_printf(m, ":count=%ld\n", *count);
8400         else
8401                 seq_puts(m, ":unlimited\n");
8402
8403         return 0;
8404 }
8405
8406 static int
8407 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8408                      unsigned long ip, void *init_data, void **data)
8409 {
8410         struct ftrace_func_mapper *mapper = *data;
8411
8412         if (!mapper) {
8413                 mapper = allocate_ftrace_func_mapper();
8414                 if (!mapper)
8415                         return -ENOMEM;
8416                 *data = mapper;
8417         }
8418
8419         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8420 }
8421
8422 static void
8423 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8424                      unsigned long ip, void *data)
8425 {
8426         struct ftrace_func_mapper *mapper = data;
8427
8428         if (!ip) {
8429                 if (!mapper)
8430                         return;
8431                 free_ftrace_func_mapper(mapper, NULL);
8432                 return;
8433         }
8434
8435         ftrace_func_mapper_remove_ip(mapper, ip);
8436 }
8437
8438 static struct ftrace_probe_ops snapshot_probe_ops = {
8439         .func                   = ftrace_snapshot,
8440         .print                  = ftrace_snapshot_print,
8441 };
8442
8443 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8444         .func                   = ftrace_count_snapshot,
8445         .print                  = ftrace_snapshot_print,
8446         .init                   = ftrace_snapshot_init,
8447         .free                   = ftrace_snapshot_free,
8448 };
8449
8450 static int
8451 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8452                                char *glob, char *cmd, char *param, int enable)
8453 {
8454         struct ftrace_probe_ops *ops;
8455         void *count = (void *)-1;
8456         char *number;
8457         int ret;
8458
8459         if (!tr)
8460                 return -ENODEV;
8461
8462         /* hash funcs only work with set_ftrace_filter */
8463         if (!enable)
8464                 return -EINVAL;
8465
8466         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8467
8468         if (glob[0] == '!')
8469                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8470
8471         if (!param)
8472                 goto out_reg;
8473
8474         number = strsep(&param, ":");
8475
8476         if (!strlen(number))
8477                 goto out_reg;
8478
8479         /*
8480          * We use the callback data field (which is a pointer)
8481          * as our counter.
8482          */
8483         ret = kstrtoul(number, 0, (unsigned long *)&count);
8484         if (ret)
8485                 return ret;
8486
8487  out_reg:
8488         ret = tracing_alloc_snapshot_instance(tr);
8489         if (ret < 0)
8490                 goto out;
8491
8492         ret = register_ftrace_function_probe(glob, tr, ops, count);
8493
8494  out:
8495         return ret < 0 ? ret : 0;
8496 }
8497
8498 static struct ftrace_func_command ftrace_snapshot_cmd = {
8499         .name                   = "snapshot",
8500         .func                   = ftrace_trace_snapshot_callback,
8501 };
8502
8503 static __init int register_snapshot_cmd(void)
8504 {
8505         return register_ftrace_command(&ftrace_snapshot_cmd);
8506 }
8507 #else
8508 static inline __init int register_snapshot_cmd(void) { return 0; }
8509 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8510
8511 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8512 {
8513         if (WARN_ON(!tr->dir))
8514                 return ERR_PTR(-ENODEV);
8515
8516         /* Top directory uses NULL as the parent */
8517         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8518                 return NULL;
8519
8520         /* All sub buffers have a descriptor */
8521         return tr->dir;
8522 }
8523
8524 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8525 {
8526         struct dentry *d_tracer;
8527
8528         if (tr->percpu_dir)
8529                 return tr->percpu_dir;
8530
8531         d_tracer = tracing_get_dentry(tr);
8532         if (IS_ERR(d_tracer))
8533                 return NULL;
8534
8535         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8536
8537         MEM_FAIL(!tr->percpu_dir,
8538                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8539
8540         return tr->percpu_dir;
8541 }
8542
8543 static struct dentry *
8544 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8545                       void *data, long cpu, const struct file_operations *fops)
8546 {
8547         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8548
8549         if (ret) /* See tracing_get_cpu() */
8550                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8551         return ret;
8552 }
8553
8554 static void
8555 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8556 {
8557         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8558         struct dentry *d_cpu;
8559         char cpu_dir[30]; /* 30 characters should be more than enough */
8560
8561         if (!d_percpu)
8562                 return;
8563
8564         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8565         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8566         if (!d_cpu) {
8567                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8568                 return;
8569         }
8570
8571         /* per cpu trace_pipe */
8572         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8573                                 tr, cpu, &tracing_pipe_fops);
8574
8575         /* per cpu trace */
8576         trace_create_cpu_file("trace", 0644, d_cpu,
8577                                 tr, cpu, &tracing_fops);
8578
8579         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8580                                 tr, cpu, &tracing_buffers_fops);
8581
8582         trace_create_cpu_file("stats", 0444, d_cpu,
8583                                 tr, cpu, &tracing_stats_fops);
8584
8585         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8586                                 tr, cpu, &tracing_entries_fops);
8587
8588 #ifdef CONFIG_TRACER_SNAPSHOT
8589         trace_create_cpu_file("snapshot", 0644, d_cpu,
8590                                 tr, cpu, &snapshot_fops);
8591
8592         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8593                                 tr, cpu, &snapshot_raw_fops);
8594 #endif
8595 }
8596
8597 #ifdef CONFIG_FTRACE_SELFTEST
8598 /* Let selftest have access to static functions in this file */
8599 #include "trace_selftest.c"
8600 #endif
8601
8602 static ssize_t
8603 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8604                         loff_t *ppos)
8605 {
8606         struct trace_option_dentry *topt = filp->private_data;
8607         char *buf;
8608
8609         if (topt->flags->val & topt->opt->bit)
8610                 buf = "1\n";
8611         else
8612                 buf = "0\n";
8613
8614         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8615 }
8616
8617 static ssize_t
8618 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8619                          loff_t *ppos)
8620 {
8621         struct trace_option_dentry *topt = filp->private_data;
8622         unsigned long val;
8623         int ret;
8624
8625         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8626         if (ret)
8627                 return ret;
8628
8629         if (val != 0 && val != 1)
8630                 return -EINVAL;
8631
8632         if (!!(topt->flags->val & topt->opt->bit) != val) {
8633                 mutex_lock(&trace_types_lock);
8634                 ret = __set_tracer_option(topt->tr, topt->flags,
8635                                           topt->opt, !val);
8636                 mutex_unlock(&trace_types_lock);
8637                 if (ret)
8638                         return ret;
8639         }
8640
8641         *ppos += cnt;
8642
8643         return cnt;
8644 }
8645
8646
8647 static const struct file_operations trace_options_fops = {
8648         .open = tracing_open_generic,
8649         .read = trace_options_read,
8650         .write = trace_options_write,
8651         .llseek = generic_file_llseek,
8652 };
8653
8654 /*
8655  * In order to pass in both the trace_array descriptor as well as the index
8656  * to the flag that the trace option file represents, the trace_array
8657  * has a character array of trace_flags_index[], which holds the index
8658  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8659  * The address of this character array is passed to the flag option file
8660  * read/write callbacks.
8661  *
8662  * In order to extract both the index and the trace_array descriptor,
8663  * get_tr_index() uses the following algorithm.
8664  *
8665  *   idx = *ptr;
8666  *
8667  * As the pointer itself contains the address of the index (remember
8668  * index[1] == 1).
8669  *
8670  * Then to get the trace_array descriptor, by subtracting that index
8671  * from the ptr, we get to the start of the index itself.
8672  *
8673  *   ptr - idx == &index[0]
8674  *
8675  * Then a simple container_of() from that pointer gets us to the
8676  * trace_array descriptor.
8677  */
8678 static void get_tr_index(void *data, struct trace_array **ptr,
8679                          unsigned int *pindex)
8680 {
8681         *pindex = *(unsigned char *)data;
8682
8683         *ptr = container_of(data - *pindex, struct trace_array,
8684                             trace_flags_index);
8685 }
8686
8687 static ssize_t
8688 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8689                         loff_t *ppos)
8690 {
8691         void *tr_index = filp->private_data;
8692         struct trace_array *tr;
8693         unsigned int index;
8694         char *buf;
8695
8696         get_tr_index(tr_index, &tr, &index);
8697
8698         if (tr->trace_flags & (1 << index))
8699                 buf = "1\n";
8700         else
8701                 buf = "0\n";
8702
8703         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8704 }
8705
8706 static ssize_t
8707 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8708                          loff_t *ppos)
8709 {
8710         void *tr_index = filp->private_data;
8711         struct trace_array *tr;
8712         unsigned int index;
8713         unsigned long val;
8714         int ret;
8715
8716         get_tr_index(tr_index, &tr, &index);
8717
8718         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8719         if (ret)
8720                 return ret;
8721
8722         if (val != 0 && val != 1)
8723                 return -EINVAL;
8724
8725         mutex_lock(&event_mutex);
8726         mutex_lock(&trace_types_lock);
8727         ret = set_tracer_flag(tr, 1 << index, val);
8728         mutex_unlock(&trace_types_lock);
8729         mutex_unlock(&event_mutex);
8730
8731         if (ret < 0)
8732                 return ret;
8733
8734         *ppos += cnt;
8735
8736         return cnt;
8737 }
8738
8739 static const struct file_operations trace_options_core_fops = {
8740         .open = tracing_open_generic,
8741         .read = trace_options_core_read,
8742         .write = trace_options_core_write,
8743         .llseek = generic_file_llseek,
8744 };
8745
8746 struct dentry *trace_create_file(const char *name,
8747                                  umode_t mode,
8748                                  struct dentry *parent,
8749                                  void *data,
8750                                  const struct file_operations *fops)
8751 {
8752         struct dentry *ret;
8753
8754         ret = tracefs_create_file(name, mode, parent, data, fops);
8755         if (!ret)
8756                 pr_warn("Could not create tracefs '%s' entry\n", name);
8757
8758         return ret;
8759 }
8760
8761
8762 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8763 {
8764         struct dentry *d_tracer;
8765
8766         if (tr->options)
8767                 return tr->options;
8768
8769         d_tracer = tracing_get_dentry(tr);
8770         if (IS_ERR(d_tracer))
8771                 return NULL;
8772
8773         tr->options = tracefs_create_dir("options", d_tracer);
8774         if (!tr->options) {
8775                 pr_warn("Could not create tracefs directory 'options'\n");
8776                 return NULL;
8777         }
8778
8779         return tr->options;
8780 }
8781
8782 static void
8783 create_trace_option_file(struct trace_array *tr,
8784                          struct trace_option_dentry *topt,
8785                          struct tracer_flags *flags,
8786                          struct tracer_opt *opt)
8787 {
8788         struct dentry *t_options;
8789
8790         t_options = trace_options_init_dentry(tr);
8791         if (!t_options)
8792                 return;
8793
8794         topt->flags = flags;
8795         topt->opt = opt;
8796         topt->tr = tr;
8797
8798         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8799                                     &trace_options_fops);
8800
8801 }
8802
8803 static void
8804 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8805 {
8806         struct trace_option_dentry *topts;
8807         struct trace_options *tr_topts;
8808         struct tracer_flags *flags;
8809         struct tracer_opt *opts;
8810         int cnt;
8811         int i;
8812
8813         if (!tracer)
8814                 return;
8815
8816         flags = tracer->flags;
8817
8818         if (!flags || !flags->opts)
8819                 return;
8820
8821         /*
8822          * If this is an instance, only create flags for tracers
8823          * the instance may have.
8824          */
8825         if (!trace_ok_for_array(tracer, tr))
8826                 return;
8827
8828         for (i = 0; i < tr->nr_topts; i++) {
8829                 /* Make sure there's no duplicate flags. */
8830                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8831                         return;
8832         }
8833
8834         opts = flags->opts;
8835
8836         for (cnt = 0; opts[cnt].name; cnt++)
8837                 ;
8838
8839         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8840         if (!topts)
8841                 return;
8842
8843         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8844                             GFP_KERNEL);
8845         if (!tr_topts) {
8846                 kfree(topts);
8847                 return;
8848         }
8849
8850         tr->topts = tr_topts;
8851         tr->topts[tr->nr_topts].tracer = tracer;
8852         tr->topts[tr->nr_topts].topts = topts;
8853         tr->nr_topts++;
8854
8855         for (cnt = 0; opts[cnt].name; cnt++) {
8856                 create_trace_option_file(tr, &topts[cnt], flags,
8857                                          &opts[cnt]);
8858                 MEM_FAIL(topts[cnt].entry == NULL,
8859                           "Failed to create trace option: %s",
8860                           opts[cnt].name);
8861         }
8862 }
8863
8864 static struct dentry *
8865 create_trace_option_core_file(struct trace_array *tr,
8866                               const char *option, long index)
8867 {
8868         struct dentry *t_options;
8869
8870         t_options = trace_options_init_dentry(tr);
8871         if (!t_options)
8872                 return NULL;
8873
8874         return trace_create_file(option, 0644, t_options,
8875                                  (void *)&tr->trace_flags_index[index],
8876                                  &trace_options_core_fops);
8877 }
8878
8879 static void create_trace_options_dir(struct trace_array *tr)
8880 {
8881         struct dentry *t_options;
8882         bool top_level = tr == &global_trace;
8883         int i;
8884
8885         t_options = trace_options_init_dentry(tr);
8886         if (!t_options)
8887                 return;
8888
8889         for (i = 0; trace_options[i]; i++) {
8890                 if (top_level ||
8891                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8892                         create_trace_option_core_file(tr, trace_options[i], i);
8893         }
8894 }
8895
8896 static ssize_t
8897 rb_simple_read(struct file *filp, char __user *ubuf,
8898                size_t cnt, loff_t *ppos)
8899 {
8900         struct trace_array *tr = filp->private_data;
8901         char buf[64];
8902         int r;
8903
8904         r = tracer_tracing_is_on(tr);
8905         r = sprintf(buf, "%d\n", r);
8906
8907         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8908 }
8909
8910 static ssize_t
8911 rb_simple_write(struct file *filp, const char __user *ubuf,
8912                 size_t cnt, loff_t *ppos)
8913 {
8914         struct trace_array *tr = filp->private_data;
8915         struct trace_buffer *buffer = tr->array_buffer.buffer;
8916         unsigned long val;
8917         int ret;
8918
8919         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8920         if (ret)
8921                 return ret;
8922
8923         if (buffer) {
8924                 mutex_lock(&trace_types_lock);
8925                 if (!!val == tracer_tracing_is_on(tr)) {
8926                         val = 0; /* do nothing */
8927                 } else if (val) {
8928                         tracer_tracing_on(tr);
8929                         if (tr->current_trace->start)
8930                                 tr->current_trace->start(tr);
8931                 } else {
8932                         tracer_tracing_off(tr);
8933                         if (tr->current_trace->stop)
8934                                 tr->current_trace->stop(tr);
8935                 }
8936                 mutex_unlock(&trace_types_lock);
8937         }
8938
8939         (*ppos)++;
8940
8941         return cnt;
8942 }
8943
8944 static const struct file_operations rb_simple_fops = {
8945         .open           = tracing_open_generic_tr,
8946         .read           = rb_simple_read,
8947         .write          = rb_simple_write,
8948         .release        = tracing_release_generic_tr,
8949         .llseek         = default_llseek,
8950 };
8951
8952 static ssize_t
8953 buffer_percent_read(struct file *filp, char __user *ubuf,
8954                     size_t cnt, loff_t *ppos)
8955 {
8956         struct trace_array *tr = filp->private_data;
8957         char buf[64];
8958         int r;
8959
8960         r = tr->buffer_percent;
8961         r = sprintf(buf, "%d\n", r);
8962
8963         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8964 }
8965
8966 static ssize_t
8967 buffer_percent_write(struct file *filp, const char __user *ubuf,
8968                      size_t cnt, loff_t *ppos)
8969 {
8970         struct trace_array *tr = filp->private_data;
8971         unsigned long val;
8972         int ret;
8973
8974         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8975         if (ret)
8976                 return ret;
8977
8978         if (val > 100)
8979                 return -EINVAL;
8980
8981         if (!val)
8982                 val = 1;
8983
8984         tr->buffer_percent = val;
8985
8986         (*ppos)++;
8987
8988         return cnt;
8989 }
8990
8991 static const struct file_operations buffer_percent_fops = {
8992         .open           = tracing_open_generic_tr,
8993         .read           = buffer_percent_read,
8994         .write          = buffer_percent_write,
8995         .release        = tracing_release_generic_tr,
8996         .llseek         = default_llseek,
8997 };
8998
8999 static struct dentry *trace_instance_dir;
9000
9001 static void
9002 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9003
9004 static int
9005 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9006 {
9007         enum ring_buffer_flags rb_flags;
9008
9009         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9010
9011         buf->tr = tr;
9012
9013         buf->buffer = ring_buffer_alloc(size, rb_flags);
9014         if (!buf->buffer)
9015                 return -ENOMEM;
9016
9017         buf->data = alloc_percpu(struct trace_array_cpu);
9018         if (!buf->data) {
9019                 ring_buffer_free(buf->buffer);
9020                 buf->buffer = NULL;
9021                 return -ENOMEM;
9022         }
9023
9024         /* Allocate the first page for all buffers */
9025         set_buffer_entries(&tr->array_buffer,
9026                            ring_buffer_size(tr->array_buffer.buffer, 0));
9027
9028         return 0;
9029 }
9030
9031 static int allocate_trace_buffers(struct trace_array *tr, int size)
9032 {
9033         int ret;
9034
9035         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9036         if (ret)
9037                 return ret;
9038
9039 #ifdef CONFIG_TRACER_MAX_TRACE
9040         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9041                                     allocate_snapshot ? size : 1);
9042         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9043                 ring_buffer_free(tr->array_buffer.buffer);
9044                 tr->array_buffer.buffer = NULL;
9045                 free_percpu(tr->array_buffer.data);
9046                 tr->array_buffer.data = NULL;
9047                 return -ENOMEM;
9048         }
9049         tr->allocated_snapshot = allocate_snapshot;
9050
9051         /*
9052          * Only the top level trace array gets its snapshot allocated
9053          * from the kernel command line.
9054          */
9055         allocate_snapshot = false;
9056 #endif
9057
9058         return 0;
9059 }
9060
9061 static void free_trace_buffer(struct array_buffer *buf)
9062 {
9063         if (buf->buffer) {
9064                 ring_buffer_free(buf->buffer);
9065                 buf->buffer = NULL;
9066                 free_percpu(buf->data);
9067                 buf->data = NULL;
9068         }
9069 }
9070
9071 static void free_trace_buffers(struct trace_array *tr)
9072 {
9073         if (!tr)
9074                 return;
9075
9076         free_trace_buffer(&tr->array_buffer);
9077
9078 #ifdef CONFIG_TRACER_MAX_TRACE
9079         free_trace_buffer(&tr->max_buffer);
9080 #endif
9081 }
9082
9083 static void init_trace_flags_index(struct trace_array *tr)
9084 {
9085         int i;
9086
9087         /* Used by the trace options files */
9088         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9089                 tr->trace_flags_index[i] = i;
9090 }
9091
9092 static void __update_tracer_options(struct trace_array *tr)
9093 {
9094         struct tracer *t;
9095
9096         for (t = trace_types; t; t = t->next)
9097                 add_tracer_options(tr, t);
9098 }
9099
9100 static void update_tracer_options(struct trace_array *tr)
9101 {
9102         mutex_lock(&trace_types_lock);
9103         __update_tracer_options(tr);
9104         mutex_unlock(&trace_types_lock);
9105 }
9106
9107 /* Must have trace_types_lock held */
9108 struct trace_array *trace_array_find(const char *instance)
9109 {
9110         struct trace_array *tr, *found = NULL;
9111
9112         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9113                 if (tr->name && strcmp(tr->name, instance) == 0) {
9114                         found = tr;
9115                         break;
9116                 }
9117         }
9118
9119         return found;
9120 }
9121
9122 struct trace_array *trace_array_find_get(const char *instance)
9123 {
9124         struct trace_array *tr;
9125
9126         mutex_lock(&trace_types_lock);
9127         tr = trace_array_find(instance);
9128         if (tr)
9129                 tr->ref++;
9130         mutex_unlock(&trace_types_lock);
9131
9132         return tr;
9133 }
9134
9135 static int trace_array_create_dir(struct trace_array *tr)
9136 {
9137         int ret;
9138
9139         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9140         if (!tr->dir)
9141                 return -EINVAL;
9142
9143         ret = event_trace_add_tracer(tr->dir, tr);
9144         if (ret)
9145                 tracefs_remove(tr->dir);
9146
9147         init_tracer_tracefs(tr, tr->dir);
9148         __update_tracer_options(tr);
9149
9150         return ret;
9151 }
9152
9153 static struct trace_array *trace_array_create(const char *name)
9154 {
9155         struct trace_array *tr;
9156         int ret;
9157
9158         ret = -ENOMEM;
9159         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9160         if (!tr)
9161                 return ERR_PTR(ret);
9162
9163         tr->name = kstrdup(name, GFP_KERNEL);
9164         if (!tr->name)
9165                 goto out_free_tr;
9166
9167         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9168                 goto out_free_tr;
9169
9170         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9171
9172         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9173
9174         raw_spin_lock_init(&tr->start_lock);
9175
9176         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9177
9178         tr->current_trace = &nop_trace;
9179
9180         INIT_LIST_HEAD(&tr->systems);
9181         INIT_LIST_HEAD(&tr->events);
9182         INIT_LIST_HEAD(&tr->hist_vars);
9183         INIT_LIST_HEAD(&tr->err_log);
9184
9185         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9186                 goto out_free_tr;
9187
9188         if (ftrace_allocate_ftrace_ops(tr) < 0)
9189                 goto out_free_tr;
9190
9191         ftrace_init_trace_array(tr);
9192
9193         init_trace_flags_index(tr);
9194
9195         if (trace_instance_dir) {
9196                 ret = trace_array_create_dir(tr);
9197                 if (ret)
9198                         goto out_free_tr;
9199         } else
9200                 __trace_early_add_events(tr);
9201
9202         list_add(&tr->list, &ftrace_trace_arrays);
9203
9204         tr->ref++;
9205
9206         return tr;
9207
9208  out_free_tr:
9209         ftrace_free_ftrace_ops(tr);
9210         free_trace_buffers(tr);
9211         free_cpumask_var(tr->tracing_cpumask);
9212         kfree(tr->name);
9213         kfree(tr);
9214
9215         return ERR_PTR(ret);
9216 }
9217
9218 static int instance_mkdir(const char *name)
9219 {
9220         struct trace_array *tr;
9221         int ret;
9222
9223         mutex_lock(&event_mutex);
9224         mutex_lock(&trace_types_lock);
9225
9226         ret = -EEXIST;
9227         if (trace_array_find(name))
9228                 goto out_unlock;
9229
9230         tr = trace_array_create(name);
9231
9232         ret = PTR_ERR_OR_ZERO(tr);
9233
9234 out_unlock:
9235         mutex_unlock(&trace_types_lock);
9236         mutex_unlock(&event_mutex);
9237         return ret;
9238 }
9239
9240 /**
9241  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9242  * @name: The name of the trace array to be looked up/created.
9243  *
9244  * Returns pointer to trace array with given name.
9245  * NULL, if it cannot be created.
9246  *
9247  * NOTE: This function increments the reference counter associated with the
9248  * trace array returned. This makes sure it cannot be freed while in use.
9249  * Use trace_array_put() once the trace array is no longer needed.
9250  * If the trace_array is to be freed, trace_array_destroy() needs to
9251  * be called after the trace_array_put(), or simply let user space delete
9252  * it from the tracefs instances directory. But until the
9253  * trace_array_put() is called, user space can not delete it.
9254  *
9255  */
9256 struct trace_array *trace_array_get_by_name(const char *name)
9257 {
9258         struct trace_array *tr;
9259
9260         mutex_lock(&event_mutex);
9261         mutex_lock(&trace_types_lock);
9262
9263         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9264                 if (tr->name && strcmp(tr->name, name) == 0)
9265                         goto out_unlock;
9266         }
9267
9268         tr = trace_array_create(name);
9269
9270         if (IS_ERR(tr))
9271                 tr = NULL;
9272 out_unlock:
9273         if (tr)
9274                 tr->ref++;
9275
9276         mutex_unlock(&trace_types_lock);
9277         mutex_unlock(&event_mutex);
9278         return tr;
9279 }
9280 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9281
9282 static int __remove_instance(struct trace_array *tr)
9283 {
9284         int i;
9285
9286         /* Reference counter for a newly created trace array = 1. */
9287         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9288                 return -EBUSY;
9289
9290         list_del(&tr->list);
9291
9292         /* Disable all the flags that were enabled coming in */
9293         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9294                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9295                         set_tracer_flag(tr, 1 << i, 0);
9296         }
9297
9298         tracing_set_nop(tr);
9299         clear_ftrace_function_probes(tr);
9300         event_trace_del_tracer(tr);
9301         ftrace_clear_pids(tr);
9302         ftrace_destroy_function_files(tr);
9303         tracefs_remove(tr->dir);
9304         free_percpu(tr->last_func_repeats);
9305         free_trace_buffers(tr);
9306
9307         for (i = 0; i < tr->nr_topts; i++) {
9308                 kfree(tr->topts[i].topts);
9309         }
9310         kfree(tr->topts);
9311
9312         free_cpumask_var(tr->tracing_cpumask);
9313         kfree(tr->name);
9314         kfree(tr);
9315
9316         return 0;
9317 }
9318
9319 int trace_array_destroy(struct trace_array *this_tr)
9320 {
9321         struct trace_array *tr;
9322         int ret;
9323
9324         if (!this_tr)
9325                 return -EINVAL;
9326
9327         mutex_lock(&event_mutex);
9328         mutex_lock(&trace_types_lock);
9329
9330         ret = -ENODEV;
9331
9332         /* Making sure trace array exists before destroying it. */
9333         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9334                 if (tr == this_tr) {
9335                         ret = __remove_instance(tr);
9336                         break;
9337                 }
9338         }
9339
9340         mutex_unlock(&trace_types_lock);
9341         mutex_unlock(&event_mutex);
9342
9343         return ret;
9344 }
9345 EXPORT_SYMBOL_GPL(trace_array_destroy);
9346
9347 static int instance_rmdir(const char *name)
9348 {
9349         struct trace_array *tr;
9350         int ret;
9351
9352         mutex_lock(&event_mutex);
9353         mutex_lock(&trace_types_lock);
9354
9355         ret = -ENODEV;
9356         tr = trace_array_find(name);
9357         if (tr)
9358                 ret = __remove_instance(tr);
9359
9360         mutex_unlock(&trace_types_lock);
9361         mutex_unlock(&event_mutex);
9362
9363         return ret;
9364 }
9365
9366 static __init void create_trace_instances(struct dentry *d_tracer)
9367 {
9368         struct trace_array *tr;
9369
9370         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9371                                                          instance_mkdir,
9372                                                          instance_rmdir);
9373         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9374                 return;
9375
9376         mutex_lock(&event_mutex);
9377         mutex_lock(&trace_types_lock);
9378
9379         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9380                 if (!tr->name)
9381                         continue;
9382                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9383                              "Failed to create instance directory\n"))
9384                         break;
9385         }
9386
9387         mutex_unlock(&trace_types_lock);
9388         mutex_unlock(&event_mutex);
9389 }
9390
9391 static void
9392 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9393 {
9394         struct trace_event_file *file;
9395         int cpu;
9396
9397         trace_create_file("available_tracers", 0444, d_tracer,
9398                         tr, &show_traces_fops);
9399
9400         trace_create_file("current_tracer", 0644, d_tracer,
9401                         tr, &set_tracer_fops);
9402
9403         trace_create_file("tracing_cpumask", 0644, d_tracer,
9404                           tr, &tracing_cpumask_fops);
9405
9406         trace_create_file("trace_options", 0644, d_tracer,
9407                           tr, &tracing_iter_fops);
9408
9409         trace_create_file("trace", 0644, d_tracer,
9410                           tr, &tracing_fops);
9411
9412         trace_create_file("trace_pipe", 0444, d_tracer,
9413                           tr, &tracing_pipe_fops);
9414
9415         trace_create_file("buffer_size_kb", 0644, d_tracer,
9416                           tr, &tracing_entries_fops);
9417
9418         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9419                           tr, &tracing_total_entries_fops);
9420
9421         trace_create_file("free_buffer", 0200, d_tracer,
9422                           tr, &tracing_free_buffer_fops);
9423
9424         trace_create_file("trace_marker", 0220, d_tracer,
9425                           tr, &tracing_mark_fops);
9426
9427         file = __find_event_file(tr, "ftrace", "print");
9428         if (file && file->dir)
9429                 trace_create_file("trigger", 0644, file->dir, file,
9430                                   &event_trigger_fops);
9431         tr->trace_marker_file = file;
9432
9433         trace_create_file("trace_marker_raw", 0220, d_tracer,
9434                           tr, &tracing_mark_raw_fops);
9435
9436         trace_create_file("trace_clock", 0644, d_tracer, tr,
9437                           &trace_clock_fops);
9438
9439         trace_create_file("tracing_on", 0644, d_tracer,
9440                           tr, &rb_simple_fops);
9441
9442         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9443                           &trace_time_stamp_mode_fops);
9444
9445         tr->buffer_percent = 50;
9446
9447         trace_create_file("buffer_percent", 0444, d_tracer,
9448                         tr, &buffer_percent_fops);
9449
9450         create_trace_options_dir(tr);
9451
9452 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9453         trace_create_maxlat_file(tr, d_tracer);
9454 #endif
9455
9456         if (ftrace_create_function_files(tr, d_tracer))
9457                 MEM_FAIL(1, "Could not allocate function filter files");
9458
9459 #ifdef CONFIG_TRACER_SNAPSHOT
9460         trace_create_file("snapshot", 0644, d_tracer,
9461                           tr, &snapshot_fops);
9462 #endif
9463
9464         trace_create_file("error_log", 0644, d_tracer,
9465                           tr, &tracing_err_log_fops);
9466
9467         for_each_tracing_cpu(cpu)
9468                 tracing_init_tracefs_percpu(tr, cpu);
9469
9470         ftrace_init_tracefs(tr, d_tracer);
9471 }
9472
9473 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9474 {
9475         struct vfsmount *mnt;
9476         struct file_system_type *type;
9477
9478         /*
9479          * To maintain backward compatibility for tools that mount
9480          * debugfs to get to the tracing facility, tracefs is automatically
9481          * mounted to the debugfs/tracing directory.
9482          */
9483         type = get_fs_type("tracefs");
9484         if (!type)
9485                 return NULL;
9486         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9487         put_filesystem(type);
9488         if (IS_ERR(mnt))
9489                 return NULL;
9490         mntget(mnt);
9491
9492         return mnt;
9493 }
9494
9495 /**
9496  * tracing_init_dentry - initialize top level trace array
9497  *
9498  * This is called when creating files or directories in the tracing
9499  * directory. It is called via fs_initcall() by any of the boot up code
9500  * and expects to return the dentry of the top level tracing directory.
9501  */
9502 int tracing_init_dentry(void)
9503 {
9504         struct trace_array *tr = &global_trace;
9505
9506         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9507                 pr_warn("Tracing disabled due to lockdown\n");
9508                 return -EPERM;
9509         }
9510
9511         /* The top level trace array uses  NULL as parent */
9512         if (tr->dir)
9513                 return 0;
9514
9515         if (WARN_ON(!tracefs_initialized()))
9516                 return -ENODEV;
9517
9518         /*
9519          * As there may still be users that expect the tracing
9520          * files to exist in debugfs/tracing, we must automount
9521          * the tracefs file system there, so older tools still
9522          * work with the newer kernel.
9523          */
9524         tr->dir = debugfs_create_automount("tracing", NULL,
9525                                            trace_automount, NULL);
9526
9527         return 0;
9528 }
9529
9530 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9531 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9532
9533 static struct workqueue_struct *eval_map_wq __initdata;
9534 static struct work_struct eval_map_work __initdata;
9535
9536 static void __init eval_map_work_func(struct work_struct *work)
9537 {
9538         int len;
9539
9540         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9541         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9542 }
9543
9544 static int __init trace_eval_init(void)
9545 {
9546         INIT_WORK(&eval_map_work, eval_map_work_func);
9547
9548         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9549         if (!eval_map_wq) {
9550                 pr_err("Unable to allocate eval_map_wq\n");
9551                 /* Do work here */
9552                 eval_map_work_func(&eval_map_work);
9553                 return -ENOMEM;
9554         }
9555
9556         queue_work(eval_map_wq, &eval_map_work);
9557         return 0;
9558 }
9559
9560 static int __init trace_eval_sync(void)
9561 {
9562         /* Make sure the eval map updates are finished */
9563         if (eval_map_wq)
9564                 destroy_workqueue(eval_map_wq);
9565         return 0;
9566 }
9567
9568 late_initcall_sync(trace_eval_sync);
9569
9570
9571 #ifdef CONFIG_MODULES
9572 static void trace_module_add_evals(struct module *mod)
9573 {
9574         if (!mod->num_trace_evals)
9575                 return;
9576
9577         /*
9578          * Modules with bad taint do not have events created, do
9579          * not bother with enums either.
9580          */
9581         if (trace_module_has_bad_taint(mod))
9582                 return;
9583
9584         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9585 }
9586
9587 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9588 static void trace_module_remove_evals(struct module *mod)
9589 {
9590         union trace_eval_map_item *map;
9591         union trace_eval_map_item **last = &trace_eval_maps;
9592
9593         if (!mod->num_trace_evals)
9594                 return;
9595
9596         mutex_lock(&trace_eval_mutex);
9597
9598         map = trace_eval_maps;
9599
9600         while (map) {
9601                 if (map->head.mod == mod)
9602                         break;
9603                 map = trace_eval_jmp_to_tail(map);
9604                 last = &map->tail.next;
9605                 map = map->tail.next;
9606         }
9607         if (!map)
9608                 goto out;
9609
9610         *last = trace_eval_jmp_to_tail(map)->tail.next;
9611         kfree(map);
9612  out:
9613         mutex_unlock(&trace_eval_mutex);
9614 }
9615 #else
9616 static inline void trace_module_remove_evals(struct module *mod) { }
9617 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9618
9619 static int trace_module_notify(struct notifier_block *self,
9620                                unsigned long val, void *data)
9621 {
9622         struct module *mod = data;
9623
9624         switch (val) {
9625         case MODULE_STATE_COMING:
9626                 trace_module_add_evals(mod);
9627                 break;
9628         case MODULE_STATE_GOING:
9629                 trace_module_remove_evals(mod);
9630                 break;
9631         }
9632
9633         return NOTIFY_OK;
9634 }
9635
9636 static struct notifier_block trace_module_nb = {
9637         .notifier_call = trace_module_notify,
9638         .priority = 0,
9639 };
9640 #endif /* CONFIG_MODULES */
9641
9642 static __init int tracer_init_tracefs(void)
9643 {
9644         int ret;
9645
9646         trace_access_lock_init();
9647
9648         ret = tracing_init_dentry();
9649         if (ret)
9650                 return 0;
9651
9652         event_trace_init();
9653
9654         init_tracer_tracefs(&global_trace, NULL);
9655         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9656
9657         trace_create_file("tracing_thresh", 0644, NULL,
9658                         &global_trace, &tracing_thresh_fops);
9659
9660         trace_create_file("README", 0444, NULL,
9661                         NULL, &tracing_readme_fops);
9662
9663         trace_create_file("saved_cmdlines", 0444, NULL,
9664                         NULL, &tracing_saved_cmdlines_fops);
9665
9666         trace_create_file("saved_cmdlines_size", 0644, NULL,
9667                           NULL, &tracing_saved_cmdlines_size_fops);
9668
9669         trace_create_file("saved_tgids", 0444, NULL,
9670                         NULL, &tracing_saved_tgids_fops);
9671
9672         trace_eval_init();
9673
9674         trace_create_eval_file(NULL);
9675
9676 #ifdef CONFIG_MODULES
9677         register_module_notifier(&trace_module_nb);
9678 #endif
9679
9680 #ifdef CONFIG_DYNAMIC_FTRACE
9681         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9682                         NULL, &tracing_dyn_info_fops);
9683 #endif
9684
9685         create_trace_instances(NULL);
9686
9687         update_tracer_options(&global_trace);
9688
9689         return 0;
9690 }
9691
9692 fs_initcall(tracer_init_tracefs);
9693
9694 static int trace_panic_handler(struct notifier_block *this,
9695                                unsigned long event, void *unused)
9696 {
9697         if (ftrace_dump_on_oops)
9698                 ftrace_dump(ftrace_dump_on_oops);
9699         return NOTIFY_OK;
9700 }
9701
9702 static struct notifier_block trace_panic_notifier = {
9703         .notifier_call  = trace_panic_handler,
9704         .next           = NULL,
9705         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9706 };
9707
9708 static int trace_die_handler(struct notifier_block *self,
9709                              unsigned long val,
9710                              void *data)
9711 {
9712         switch (val) {
9713         case DIE_OOPS:
9714                 if (ftrace_dump_on_oops)
9715                         ftrace_dump(ftrace_dump_on_oops);
9716                 break;
9717         default:
9718                 break;
9719         }
9720         return NOTIFY_OK;
9721 }
9722
9723 static struct notifier_block trace_die_notifier = {
9724         .notifier_call = trace_die_handler,
9725         .priority = 200
9726 };
9727
9728 /*
9729  * printk is set to max of 1024, we really don't need it that big.
9730  * Nothing should be printing 1000 characters anyway.
9731  */
9732 #define TRACE_MAX_PRINT         1000
9733
9734 /*
9735  * Define here KERN_TRACE so that we have one place to modify
9736  * it if we decide to change what log level the ftrace dump
9737  * should be at.
9738  */
9739 #define KERN_TRACE              KERN_EMERG
9740
9741 void
9742 trace_printk_seq(struct trace_seq *s)
9743 {
9744         /* Probably should print a warning here. */
9745         if (s->seq.len >= TRACE_MAX_PRINT)
9746                 s->seq.len = TRACE_MAX_PRINT;
9747
9748         /*
9749          * More paranoid code. Although the buffer size is set to
9750          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9751          * an extra layer of protection.
9752          */
9753         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9754                 s->seq.len = s->seq.size - 1;
9755
9756         /* should be zero ended, but we are paranoid. */
9757         s->buffer[s->seq.len] = 0;
9758
9759         printk(KERN_TRACE "%s", s->buffer);
9760
9761         trace_seq_init(s);
9762 }
9763
9764 void trace_init_global_iter(struct trace_iterator *iter)
9765 {
9766         iter->tr = &global_trace;
9767         iter->trace = iter->tr->current_trace;
9768         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9769         iter->array_buffer = &global_trace.array_buffer;
9770
9771         if (iter->trace && iter->trace->open)
9772                 iter->trace->open(iter);
9773
9774         /* Annotate start of buffers if we had overruns */
9775         if (ring_buffer_overruns(iter->array_buffer->buffer))
9776                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9777
9778         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9779         if (trace_clocks[iter->tr->clock_id].in_ns)
9780                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9781 }
9782
9783 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9784 {
9785         /* use static because iter can be a bit big for the stack */
9786         static struct trace_iterator iter;
9787         static atomic_t dump_running;
9788         struct trace_array *tr = &global_trace;
9789         unsigned int old_userobj;
9790         unsigned long flags;
9791         int cnt = 0, cpu;
9792
9793         /* Only allow one dump user at a time. */
9794         if (atomic_inc_return(&dump_running) != 1) {
9795                 atomic_dec(&dump_running);
9796                 return;
9797         }
9798
9799         /*
9800          * Always turn off tracing when we dump.
9801          * We don't need to show trace output of what happens
9802          * between multiple crashes.
9803          *
9804          * If the user does a sysrq-z, then they can re-enable
9805          * tracing with echo 1 > tracing_on.
9806          */
9807         tracing_off();
9808
9809         local_irq_save(flags);
9810         printk_nmi_direct_enter();
9811
9812         /* Simulate the iterator */
9813         trace_init_global_iter(&iter);
9814         /* Can not use kmalloc for iter.temp and iter.fmt */
9815         iter.temp = static_temp_buf;
9816         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9817         iter.fmt = static_fmt_buf;
9818         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9819
9820         for_each_tracing_cpu(cpu) {
9821                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9822         }
9823
9824         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9825
9826         /* don't look at user memory in panic mode */
9827         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9828
9829         switch (oops_dump_mode) {
9830         case DUMP_ALL:
9831                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9832                 break;
9833         case DUMP_ORIG:
9834                 iter.cpu_file = raw_smp_processor_id();
9835                 break;
9836         case DUMP_NONE:
9837                 goto out_enable;
9838         default:
9839                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9840                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9841         }
9842
9843         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9844
9845         /* Did function tracer already get disabled? */
9846         if (ftrace_is_dead()) {
9847                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9848                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9849         }
9850
9851         /*
9852          * We need to stop all tracing on all CPUS to read
9853          * the next buffer. This is a bit expensive, but is
9854          * not done often. We fill all what we can read,
9855          * and then release the locks again.
9856          */
9857
9858         while (!trace_empty(&iter)) {
9859
9860                 if (!cnt)
9861                         printk(KERN_TRACE "---------------------------------\n");
9862
9863                 cnt++;
9864
9865                 trace_iterator_reset(&iter);
9866                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9867
9868                 if (trace_find_next_entry_inc(&iter) != NULL) {
9869                         int ret;
9870
9871                         ret = print_trace_line(&iter);
9872                         if (ret != TRACE_TYPE_NO_CONSUME)
9873                                 trace_consume(&iter);
9874                 }
9875                 touch_nmi_watchdog();
9876
9877                 trace_printk_seq(&iter.seq);
9878         }
9879
9880         if (!cnt)
9881                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9882         else
9883                 printk(KERN_TRACE "---------------------------------\n");
9884
9885  out_enable:
9886         tr->trace_flags |= old_userobj;
9887
9888         for_each_tracing_cpu(cpu) {
9889                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9890         }
9891         atomic_dec(&dump_running);
9892         printk_nmi_direct_exit();
9893         local_irq_restore(flags);
9894 }
9895 EXPORT_SYMBOL_GPL(ftrace_dump);
9896
9897 #define WRITE_BUFSIZE  4096
9898
9899 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9900                                 size_t count, loff_t *ppos,
9901                                 int (*createfn)(const char *))
9902 {
9903         char *kbuf, *buf, *tmp;
9904         int ret = 0;
9905         size_t done = 0;
9906         size_t size;
9907
9908         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9909         if (!kbuf)
9910                 return -ENOMEM;
9911
9912         while (done < count) {
9913                 size = count - done;
9914
9915                 if (size >= WRITE_BUFSIZE)
9916                         size = WRITE_BUFSIZE - 1;
9917
9918                 if (copy_from_user(kbuf, buffer + done, size)) {
9919                         ret = -EFAULT;
9920                         goto out;
9921                 }
9922                 kbuf[size] = '\0';
9923                 buf = kbuf;
9924                 do {
9925                         tmp = strchr(buf, '\n');
9926                         if (tmp) {
9927                                 *tmp = '\0';
9928                                 size = tmp - buf + 1;
9929                         } else {
9930                                 size = strlen(buf);
9931                                 if (done + size < count) {
9932                                         if (buf != kbuf)
9933                                                 break;
9934                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9935                                         pr_warn("Line length is too long: Should be less than %d\n",
9936                                                 WRITE_BUFSIZE - 2);
9937                                         ret = -EINVAL;
9938                                         goto out;
9939                                 }
9940                         }
9941                         done += size;
9942
9943                         /* Remove comments */
9944                         tmp = strchr(buf, '#');
9945
9946                         if (tmp)
9947                                 *tmp = '\0';
9948
9949                         ret = createfn(buf);
9950                         if (ret)
9951                                 goto out;
9952                         buf += size;
9953
9954                 } while (done < count);
9955         }
9956         ret = done;
9957
9958 out:
9959         kfree(kbuf);
9960
9961         return ret;
9962 }
9963
9964 __init static int tracer_alloc_buffers(void)
9965 {
9966         int ring_buf_size;
9967         int ret = -ENOMEM;
9968
9969
9970         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9971                 pr_warn("Tracing disabled due to lockdown\n");
9972                 return -EPERM;
9973         }
9974
9975         /*
9976          * Make sure we don't accidentally add more trace options
9977          * than we have bits for.
9978          */
9979         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9980
9981         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9982                 goto out;
9983
9984         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9985                 goto out_free_buffer_mask;
9986
9987         /* Only allocate trace_printk buffers if a trace_printk exists */
9988         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9989                 /* Must be called before global_trace.buffer is allocated */
9990                 trace_printk_init_buffers();
9991
9992         /* To save memory, keep the ring buffer size to its minimum */
9993         if (ring_buffer_expanded)
9994                 ring_buf_size = trace_buf_size;
9995         else
9996                 ring_buf_size = 1;
9997
9998         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9999         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10000
10001         raw_spin_lock_init(&global_trace.start_lock);
10002
10003         /*
10004          * The prepare callbacks allocates some memory for the ring buffer. We
10005          * don't free the buffer if the CPU goes down. If we were to free
10006          * the buffer, then the user would lose any trace that was in the
10007          * buffer. The memory will be removed once the "instance" is removed.
10008          */
10009         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10010                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10011                                       NULL);
10012         if (ret < 0)
10013                 goto out_free_cpumask;
10014         /* Used for event triggers */
10015         ret = -ENOMEM;
10016         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10017         if (!temp_buffer)
10018                 goto out_rm_hp_state;
10019
10020         if (trace_create_savedcmd() < 0)
10021                 goto out_free_temp_buffer;
10022
10023         /* TODO: make the number of buffers hot pluggable with CPUS */
10024         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10025                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10026                 goto out_free_savedcmd;
10027         }
10028
10029         if (global_trace.buffer_disabled)
10030                 tracing_off();
10031
10032         if (trace_boot_clock) {
10033                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10034                 if (ret < 0)
10035                         pr_warn("Trace clock %s not defined, going back to default\n",
10036                                 trace_boot_clock);
10037         }
10038
10039         /*
10040          * register_tracer() might reference current_trace, so it
10041          * needs to be set before we register anything. This is
10042          * just a bootstrap of current_trace anyway.
10043          */
10044         global_trace.current_trace = &nop_trace;
10045
10046         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10047
10048         ftrace_init_global_array_ops(&global_trace);
10049
10050         init_trace_flags_index(&global_trace);
10051
10052         register_tracer(&nop_trace);
10053
10054         /* Function tracing may start here (via kernel command line) */
10055         init_function_trace();
10056
10057         /* All seems OK, enable tracing */
10058         tracing_disabled = 0;
10059
10060         atomic_notifier_chain_register(&panic_notifier_list,
10061                                        &trace_panic_notifier);
10062
10063         register_die_notifier(&trace_die_notifier);
10064
10065         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10066
10067         INIT_LIST_HEAD(&global_trace.systems);
10068         INIT_LIST_HEAD(&global_trace.events);
10069         INIT_LIST_HEAD(&global_trace.hist_vars);
10070         INIT_LIST_HEAD(&global_trace.err_log);
10071         list_add(&global_trace.list, &ftrace_trace_arrays);
10072
10073         apply_trace_boot_options();
10074
10075         register_snapshot_cmd();
10076
10077         test_can_verify();
10078
10079         return 0;
10080
10081 out_free_savedcmd:
10082         free_saved_cmdlines_buffer(savedcmd);
10083 out_free_temp_buffer:
10084         ring_buffer_free(temp_buffer);
10085 out_rm_hp_state:
10086         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10087 out_free_cpumask:
10088         free_cpumask_var(global_trace.tracing_cpumask);
10089 out_free_buffer_mask:
10090         free_cpumask_var(tracing_buffer_mask);
10091 out:
10092         return ret;
10093 }
10094
10095 void __init early_trace_init(void)
10096 {
10097         if (tracepoint_printk) {
10098                 tracepoint_print_iter =
10099                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10100                 if (MEM_FAIL(!tracepoint_print_iter,
10101                              "Failed to allocate trace iterator\n"))
10102                         tracepoint_printk = 0;
10103                 else
10104                         static_key_enable(&tracepoint_printk_key.key);
10105         }
10106         tracer_alloc_buffers();
10107 }
10108
10109 void __init trace_init(void)
10110 {
10111         trace_event_init();
10112 }
10113
10114 __init static void clear_boot_tracer(void)
10115 {
10116         /*
10117          * The default tracer at boot buffer is an init section.
10118          * This function is called in lateinit. If we did not
10119          * find the boot tracer, then clear it out, to prevent
10120          * later registration from accessing the buffer that is
10121          * about to be freed.
10122          */
10123         if (!default_bootup_tracer)
10124                 return;
10125
10126         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10127                default_bootup_tracer);
10128         default_bootup_tracer = NULL;
10129 }
10130
10131 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10132 __init static void tracing_set_default_clock(void)
10133 {
10134         /* sched_clock_stable() is determined in late_initcall */
10135         if (!trace_boot_clock && !sched_clock_stable()) {
10136                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10137                         pr_warn("Can not set tracing clock due to lockdown\n");
10138                         return;
10139                 }
10140
10141                 printk(KERN_WARNING
10142                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10143                        "If you want to keep using the local clock, then add:\n"
10144                        "  \"trace_clock=local\"\n"
10145                        "on the kernel command line\n");
10146                 tracing_set_clock(&global_trace, "global");
10147         }
10148 }
10149 #else
10150 static inline void tracing_set_default_clock(void) { }
10151 #endif
10152
10153 __init static int late_trace_init(void)
10154 {
10155         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10156                 static_key_disable(&tracepoint_printk_key.key);
10157                 tracepoint_printk = 0;
10158         }
10159
10160         tracing_set_default_clock();
10161         clear_boot_tracer();
10162         return 0;
10163 }
10164
10165 late_initcall_sync(late_trace_init);