60492464281e22e4daf881699578967227cc4a98
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187
188 static int __init set_cmdline_ftrace(char *str)
189 {
190         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
191         default_bootup_tracer = bootup_tracer_buf;
192         /* We are using ftrace early, expand it */
193         ring_buffer_expanded = true;
194         return 1;
195 }
196 __setup("ftrace=", set_cmdline_ftrace);
197
198 static int __init set_ftrace_dump_on_oops(char *str)
199 {
200         if (*str++ != '=' || !*str || !strcmp("1", str)) {
201                 ftrace_dump_on_oops = DUMP_ALL;
202                 return 1;
203         }
204
205         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
206                 ftrace_dump_on_oops = DUMP_ORIG;
207                 return 1;
208         }
209
210         return 0;
211 }
212 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213
214 static int __init stop_trace_on_warning(char *str)
215 {
216         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
217                 __disable_trace_on_warning = 1;
218         return 1;
219 }
220 __setup("traceoff_on_warning", stop_trace_on_warning);
221
222 static int __init boot_alloc_snapshot(char *str)
223 {
224         allocate_snapshot = true;
225         /* We also need the main ring buffer expanded */
226         ring_buffer_expanded = true;
227         return 1;
228 }
229 __setup("alloc_snapshot", boot_alloc_snapshot);
230
231
232 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233
234 static int __init set_trace_boot_options(char *str)
235 {
236         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
237         return 0;
238 }
239 __setup("trace_options=", set_trace_boot_options);
240
241 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
242 static char *trace_boot_clock __initdata;
243
244 static int __init set_trace_boot_clock(char *str)
245 {
246         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
247         trace_boot_clock = trace_boot_clock_buf;
248         return 0;
249 }
250 __setup("trace_clock=", set_trace_boot_clock);
251
252 static int __init set_tracepoint_printk(char *str)
253 {
254         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
255                 tracepoint_printk = 1;
256         return 1;
257 }
258 __setup("tp_printk", set_tracepoint_printk);
259
260 static int __init set_tracepoint_printk_stop(char *str)
261 {
262         tracepoint_printk_stop_on_boot = true;
263         return 1;
264 }
265 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
266
267 unsigned long long ns2usecs(u64 nsec)
268 {
269         nsec += 500;
270         do_div(nsec, 1000);
271         return nsec;
272 }
273
274 static void
275 trace_process_export(struct trace_export *export,
276                struct ring_buffer_event *event, int flag)
277 {
278         struct trace_entry *entry;
279         unsigned int size = 0;
280
281         if (export->flags & flag) {
282                 entry = ring_buffer_event_data(event);
283                 size = ring_buffer_event_length(event);
284                 export->write(export, entry, size);
285         }
286 }
287
288 static DEFINE_MUTEX(ftrace_export_lock);
289
290 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
291
292 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
293 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
295
296 static inline void ftrace_exports_enable(struct trace_export *export)
297 {
298         if (export->flags & TRACE_EXPORT_FUNCTION)
299                 static_branch_inc(&trace_function_exports_enabled);
300
301         if (export->flags & TRACE_EXPORT_EVENT)
302                 static_branch_inc(&trace_event_exports_enabled);
303
304         if (export->flags & TRACE_EXPORT_MARKER)
305                 static_branch_inc(&trace_marker_exports_enabled);
306 }
307
308 static inline void ftrace_exports_disable(struct trace_export *export)
309 {
310         if (export->flags & TRACE_EXPORT_FUNCTION)
311                 static_branch_dec(&trace_function_exports_enabled);
312
313         if (export->flags & TRACE_EXPORT_EVENT)
314                 static_branch_dec(&trace_event_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_MARKER)
317                 static_branch_dec(&trace_marker_exports_enabled);
318 }
319
320 static void ftrace_exports(struct ring_buffer_event *event, int flag)
321 {
322         struct trace_export *export;
323
324         preempt_disable_notrace();
325
326         export = rcu_dereference_raw_check(ftrace_exports_list);
327         while (export) {
328                 trace_process_export(export, event, flag);
329                 export = rcu_dereference_raw_check(export->next);
330         }
331
332         preempt_enable_notrace();
333 }
334
335 static inline void
336 add_trace_export(struct trace_export **list, struct trace_export *export)
337 {
338         rcu_assign_pointer(export->next, *list);
339         /*
340          * We are entering export into the list but another
341          * CPU might be walking that list. We need to make sure
342          * the export->next pointer is valid before another CPU sees
343          * the export pointer included into the list.
344          */
345         rcu_assign_pointer(*list, export);
346 }
347
348 static inline int
349 rm_trace_export(struct trace_export **list, struct trace_export *export)
350 {
351         struct trace_export **p;
352
353         for (p = list; *p != NULL; p = &(*p)->next)
354                 if (*p == export)
355                         break;
356
357         if (*p != export)
358                 return -1;
359
360         rcu_assign_pointer(*p, (*p)->next);
361
362         return 0;
363 }
364
365 static inline void
366 add_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         ftrace_exports_enable(export);
369
370         add_trace_export(list, export);
371 }
372
373 static inline int
374 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
375 {
376         int ret;
377
378         ret = rm_trace_export(list, export);
379         ftrace_exports_disable(export);
380
381         return ret;
382 }
383
384 int register_ftrace_export(struct trace_export *export)
385 {
386         if (WARN_ON_ONCE(!export->write))
387                 return -1;
388
389         mutex_lock(&ftrace_export_lock);
390
391         add_ftrace_export(&ftrace_exports_list, export);
392
393         mutex_unlock(&ftrace_export_lock);
394
395         return 0;
396 }
397 EXPORT_SYMBOL_GPL(register_ftrace_export);
398
399 int unregister_ftrace_export(struct trace_export *export)
400 {
401         int ret;
402
403         mutex_lock(&ftrace_export_lock);
404
405         ret = rm_ftrace_export(&ftrace_exports_list, export);
406
407         mutex_unlock(&ftrace_export_lock);
408
409         return ret;
410 }
411 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
412
413 /* trace_flags holds trace_options default values */
414 #define TRACE_DEFAULT_FLAGS                                             \
415         (FUNCTION_DEFAULT_FLAGS |                                       \
416          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
417          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
418          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
419          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
420          TRACE_ITER_HASH_PTR)
421
422 /* trace_options that are only supported by global_trace */
423 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
424                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
425
426 /* trace_flags that are default zero for instances */
427 #define ZEROED_TRACE_FLAGS \
428         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
429
430 /*
431  * The global_trace is the descriptor that holds the top-level tracing
432  * buffers for the live tracing.
433  */
434 static struct trace_array global_trace = {
435         .trace_flags = TRACE_DEFAULT_FLAGS,
436 };
437
438 LIST_HEAD(ftrace_trace_arrays);
439
440 int trace_array_get(struct trace_array *this_tr)
441 {
442         struct trace_array *tr;
443         int ret = -ENODEV;
444
445         mutex_lock(&trace_types_lock);
446         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
447                 if (tr == this_tr) {
448                         tr->ref++;
449                         ret = 0;
450                         break;
451                 }
452         }
453         mutex_unlock(&trace_types_lock);
454
455         return ret;
456 }
457
458 static void __trace_array_put(struct trace_array *this_tr)
459 {
460         WARN_ON(!this_tr->ref);
461         this_tr->ref--;
462 }
463
464 /**
465  * trace_array_put - Decrement the reference counter for this trace array.
466  * @this_tr : pointer to the trace array
467  *
468  * NOTE: Use this when we no longer need the trace array returned by
469  * trace_array_get_by_name(). This ensures the trace array can be later
470  * destroyed.
471  *
472  */
473 void trace_array_put(struct trace_array *this_tr)
474 {
475         if (!this_tr)
476                 return;
477
478         mutex_lock(&trace_types_lock);
479         __trace_array_put(this_tr);
480         mutex_unlock(&trace_types_lock);
481 }
482 EXPORT_SYMBOL_GPL(trace_array_put);
483
484 int tracing_check_open_get_tr(struct trace_array *tr)
485 {
486         int ret;
487
488         ret = security_locked_down(LOCKDOWN_TRACEFS);
489         if (ret)
490                 return ret;
491
492         if (tracing_disabled)
493                 return -ENODEV;
494
495         if (tr && trace_array_get(tr) < 0)
496                 return -ENODEV;
497
498         return 0;
499 }
500
501 int call_filter_check_discard(struct trace_event_call *call, void *rec,
502                               struct trace_buffer *buffer,
503                               struct ring_buffer_event *event)
504 {
505         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
506             !filter_match_preds(call->filter, rec)) {
507                 __trace_event_discard_commit(buffer, event);
508                 return 1;
509         }
510
511         return 0;
512 }
513
514 void trace_free_pid_list(struct trace_pid_list *pid_list)
515 {
516         vfree(pid_list->pids);
517         kfree(pid_list);
518 }
519
520 /**
521  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
522  * @filtered_pids: The list of pids to check
523  * @search_pid: The PID to find in @filtered_pids
524  *
525  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
526  */
527 bool
528 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
529 {
530         /*
531          * If pid_max changed after filtered_pids was created, we
532          * by default ignore all pids greater than the previous pid_max.
533          */
534         if (search_pid >= filtered_pids->pid_max)
535                 return false;
536
537         return test_bit(search_pid, filtered_pids->pids);
538 }
539
540 /**
541  * trace_ignore_this_task - should a task be ignored for tracing
542  * @filtered_pids: The list of pids to check
543  * @filtered_no_pids: The list of pids not to be traced
544  * @task: The task that should be ignored if not filtered
545  *
546  * Checks if @task should be traced or not from @filtered_pids.
547  * Returns true if @task should *NOT* be traced.
548  * Returns false if @task should be traced.
549  */
550 bool
551 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
552                        struct trace_pid_list *filtered_no_pids,
553                        struct task_struct *task)
554 {
555         /*
556          * If filtered_no_pids is not empty, and the task's pid is listed
557          * in filtered_no_pids, then return true.
558          * Otherwise, if filtered_pids is empty, that means we can
559          * trace all tasks. If it has content, then only trace pids
560          * within filtered_pids.
561          */
562
563         return (filtered_pids &&
564                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
565                 (filtered_no_pids &&
566                  trace_find_filtered_pid(filtered_no_pids, task->pid));
567 }
568
569 /**
570  * trace_filter_add_remove_task - Add or remove a task from a pid_list
571  * @pid_list: The list to modify
572  * @self: The current task for fork or NULL for exit
573  * @task: The task to add or remove
574  *
575  * If adding a task, if @self is defined, the task is only added if @self
576  * is also included in @pid_list. This happens on fork and tasks should
577  * only be added when the parent is listed. If @self is NULL, then the
578  * @task pid will be removed from the list, which would happen on exit
579  * of a task.
580  */
581 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
582                                   struct task_struct *self,
583                                   struct task_struct *task)
584 {
585         if (!pid_list)
586                 return;
587
588         /* For forks, we only add if the forking task is listed */
589         if (self) {
590                 if (!trace_find_filtered_pid(pid_list, self->pid))
591                         return;
592         }
593
594         /* Sorry, but we don't support pid_max changing after setting */
595         if (task->pid >= pid_list->pid_max)
596                 return;
597
598         /* "self" is set for forks, and NULL for exits */
599         if (self)
600                 set_bit(task->pid, pid_list->pids);
601         else
602                 clear_bit(task->pid, pid_list->pids);
603 }
604
605 /**
606  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
607  * @pid_list: The pid list to show
608  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
609  * @pos: The position of the file
610  *
611  * This is used by the seq_file "next" operation to iterate the pids
612  * listed in a trace_pid_list structure.
613  *
614  * Returns the pid+1 as we want to display pid of zero, but NULL would
615  * stop the iteration.
616  */
617 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
618 {
619         unsigned long pid = (unsigned long)v;
620
621         (*pos)++;
622
623         /* pid already is +1 of the actual previous bit */
624         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
625
626         /* Return pid + 1 to allow zero to be represented */
627         if (pid < pid_list->pid_max)
628                 return (void *)(pid + 1);
629
630         return NULL;
631 }
632
633 /**
634  * trace_pid_start - Used for seq_file to start reading pid lists
635  * @pid_list: The pid list to show
636  * @pos: The position of the file
637  *
638  * This is used by seq_file "start" operation to start the iteration
639  * of listing pids.
640  *
641  * Returns the pid+1 as we want to display pid of zero, but NULL would
642  * stop the iteration.
643  */
644 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
645 {
646         unsigned long pid;
647         loff_t l = 0;
648
649         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
650         if (pid >= pid_list->pid_max)
651                 return NULL;
652
653         /* Return pid + 1 so that zero can be the exit value */
654         for (pid++; pid && l < *pos;
655              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
656                 ;
657         return (void *)pid;
658 }
659
660 /**
661  * trace_pid_show - show the current pid in seq_file processing
662  * @m: The seq_file structure to write into
663  * @v: A void pointer of the pid (+1) value to display
664  *
665  * Can be directly used by seq_file operations to display the current
666  * pid value.
667  */
668 int trace_pid_show(struct seq_file *m, void *v)
669 {
670         unsigned long pid = (unsigned long)v - 1;
671
672         seq_printf(m, "%lu\n", pid);
673         return 0;
674 }
675
676 /* 128 should be much more than enough */
677 #define PID_BUF_SIZE            127
678
679 int trace_pid_write(struct trace_pid_list *filtered_pids,
680                     struct trace_pid_list **new_pid_list,
681                     const char __user *ubuf, size_t cnt)
682 {
683         struct trace_pid_list *pid_list;
684         struct trace_parser parser;
685         unsigned long val;
686         int nr_pids = 0;
687         ssize_t read = 0;
688         ssize_t ret = 0;
689         loff_t pos;
690         pid_t pid;
691
692         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
693                 return -ENOMEM;
694
695         /*
696          * Always recreate a new array. The write is an all or nothing
697          * operation. Always create a new array when adding new pids by
698          * the user. If the operation fails, then the current list is
699          * not modified.
700          */
701         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
702         if (!pid_list) {
703                 trace_parser_put(&parser);
704                 return -ENOMEM;
705         }
706
707         pid_list->pid_max = READ_ONCE(pid_max);
708
709         /* Only truncating will shrink pid_max */
710         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
711                 pid_list->pid_max = filtered_pids->pid_max;
712
713         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
714         if (!pid_list->pids) {
715                 trace_parser_put(&parser);
716                 kfree(pid_list);
717                 return -ENOMEM;
718         }
719
720         if (filtered_pids) {
721                 /* copy the current bits to the new max */
722                 for_each_set_bit(pid, filtered_pids->pids,
723                                  filtered_pids->pid_max) {
724                         set_bit(pid, pid_list->pids);
725                         nr_pids++;
726                 }
727         }
728
729         while (cnt > 0) {
730
731                 pos = 0;
732
733                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
734                 if (ret < 0 || !trace_parser_loaded(&parser))
735                         break;
736
737                 read += ret;
738                 ubuf += ret;
739                 cnt -= ret;
740
741                 ret = -EINVAL;
742                 if (kstrtoul(parser.buffer, 0, &val))
743                         break;
744                 if (val >= pid_list->pid_max)
745                         break;
746
747                 pid = (pid_t)val;
748
749                 set_bit(pid, pid_list->pids);
750                 nr_pids++;
751
752                 trace_parser_clear(&parser);
753                 ret = 0;
754         }
755         trace_parser_put(&parser);
756
757         if (ret < 0) {
758                 trace_free_pid_list(pid_list);
759                 return ret;
760         }
761
762         if (!nr_pids) {
763                 /* Cleared the list of pids */
764                 trace_free_pid_list(pid_list);
765                 read = ret;
766                 pid_list = NULL;
767         }
768
769         *new_pid_list = pid_list;
770
771         return read;
772 }
773
774 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
775 {
776         u64 ts;
777
778         /* Early boot up does not have a buffer yet */
779         if (!buf->buffer)
780                 return trace_clock_local();
781
782         ts = ring_buffer_time_stamp(buf->buffer);
783         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
784
785         return ts;
786 }
787
788 u64 ftrace_now(int cpu)
789 {
790         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
791 }
792
793 /**
794  * tracing_is_enabled - Show if global_trace has been enabled
795  *
796  * Shows if the global trace has been enabled or not. It uses the
797  * mirror flag "buffer_disabled" to be used in fast paths such as for
798  * the irqsoff tracer. But it may be inaccurate due to races. If you
799  * need to know the accurate state, use tracing_is_on() which is a little
800  * slower, but accurate.
801  */
802 int tracing_is_enabled(void)
803 {
804         /*
805          * For quick access (irqsoff uses this in fast path), just
806          * return the mirror variable of the state of the ring buffer.
807          * It's a little racy, but we don't really care.
808          */
809         smp_rmb();
810         return !global_trace.buffer_disabled;
811 }
812
813 /*
814  * trace_buf_size is the size in bytes that is allocated
815  * for a buffer. Note, the number of bytes is always rounded
816  * to page size.
817  *
818  * This number is purposely set to a low number of 16384.
819  * If the dump on oops happens, it will be much appreciated
820  * to not have to wait for all that output. Anyway this can be
821  * boot time and run time configurable.
822  */
823 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
824
825 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
826
827 /* trace_types holds a link list of available tracers. */
828 static struct tracer            *trace_types __read_mostly;
829
830 /*
831  * trace_types_lock is used to protect the trace_types list.
832  */
833 DEFINE_MUTEX(trace_types_lock);
834
835 /*
836  * serialize the access of the ring buffer
837  *
838  * ring buffer serializes readers, but it is low level protection.
839  * The validity of the events (which returns by ring_buffer_peek() ..etc)
840  * are not protected by ring buffer.
841  *
842  * The content of events may become garbage if we allow other process consumes
843  * these events concurrently:
844  *   A) the page of the consumed events may become a normal page
845  *      (not reader page) in ring buffer, and this page will be rewritten
846  *      by events producer.
847  *   B) The page of the consumed events may become a page for splice_read,
848  *      and this page will be returned to system.
849  *
850  * These primitives allow multi process access to different cpu ring buffer
851  * concurrently.
852  *
853  * These primitives don't distinguish read-only and read-consume access.
854  * Multi read-only access are also serialized.
855  */
856
857 #ifdef CONFIG_SMP
858 static DECLARE_RWSEM(all_cpu_access_lock);
859 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
860
861 static inline void trace_access_lock(int cpu)
862 {
863         if (cpu == RING_BUFFER_ALL_CPUS) {
864                 /* gain it for accessing the whole ring buffer. */
865                 down_write(&all_cpu_access_lock);
866         } else {
867                 /* gain it for accessing a cpu ring buffer. */
868
869                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
870                 down_read(&all_cpu_access_lock);
871
872                 /* Secondly block other access to this @cpu ring buffer. */
873                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
874         }
875 }
876
877 static inline void trace_access_unlock(int cpu)
878 {
879         if (cpu == RING_BUFFER_ALL_CPUS) {
880                 up_write(&all_cpu_access_lock);
881         } else {
882                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
883                 up_read(&all_cpu_access_lock);
884         }
885 }
886
887 static inline void trace_access_lock_init(void)
888 {
889         int cpu;
890
891         for_each_possible_cpu(cpu)
892                 mutex_init(&per_cpu(cpu_access_lock, cpu));
893 }
894
895 #else
896
897 static DEFINE_MUTEX(access_lock);
898
899 static inline void trace_access_lock(int cpu)
900 {
901         (void)cpu;
902         mutex_lock(&access_lock);
903 }
904
905 static inline void trace_access_unlock(int cpu)
906 {
907         (void)cpu;
908         mutex_unlock(&access_lock);
909 }
910
911 static inline void trace_access_lock_init(void)
912 {
913 }
914
915 #endif
916
917 #ifdef CONFIG_STACKTRACE
918 static void __ftrace_trace_stack(struct trace_buffer *buffer,
919                                  unsigned int trace_ctx,
920                                  int skip, struct pt_regs *regs);
921 static inline void ftrace_trace_stack(struct trace_array *tr,
922                                       struct trace_buffer *buffer,
923                                       unsigned int trace_ctx,
924                                       int skip, struct pt_regs *regs);
925
926 #else
927 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
928                                         unsigned int trace_ctx,
929                                         int skip, struct pt_regs *regs)
930 {
931 }
932 static inline void ftrace_trace_stack(struct trace_array *tr,
933                                       struct trace_buffer *buffer,
934                                       unsigned long trace_ctx,
935                                       int skip, struct pt_regs *regs)
936 {
937 }
938
939 #endif
940
941 static __always_inline void
942 trace_event_setup(struct ring_buffer_event *event,
943                   int type, unsigned int trace_ctx)
944 {
945         struct trace_entry *ent = ring_buffer_event_data(event);
946
947         tracing_generic_entry_update(ent, type, trace_ctx);
948 }
949
950 static __always_inline struct ring_buffer_event *
951 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
952                           int type,
953                           unsigned long len,
954                           unsigned int trace_ctx)
955 {
956         struct ring_buffer_event *event;
957
958         event = ring_buffer_lock_reserve(buffer, len);
959         if (event != NULL)
960                 trace_event_setup(event, type, trace_ctx);
961
962         return event;
963 }
964
965 void tracer_tracing_on(struct trace_array *tr)
966 {
967         if (tr->array_buffer.buffer)
968                 ring_buffer_record_on(tr->array_buffer.buffer);
969         /*
970          * This flag is looked at when buffers haven't been allocated
971          * yet, or by some tracers (like irqsoff), that just want to
972          * know if the ring buffer has been disabled, but it can handle
973          * races of where it gets disabled but we still do a record.
974          * As the check is in the fast path of the tracers, it is more
975          * important to be fast than accurate.
976          */
977         tr->buffer_disabled = 0;
978         /* Make the flag seen by readers */
979         smp_wmb();
980 }
981
982 /**
983  * tracing_on - enable tracing buffers
984  *
985  * This function enables tracing buffers that may have been
986  * disabled with tracing_off.
987  */
988 void tracing_on(void)
989 {
990         tracer_tracing_on(&global_trace);
991 }
992 EXPORT_SYMBOL_GPL(tracing_on);
993
994
995 static __always_inline void
996 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
997 {
998         __this_cpu_write(trace_taskinfo_save, true);
999
1000         /* If this is the temp buffer, we need to commit fully */
1001         if (this_cpu_read(trace_buffered_event) == event) {
1002                 /* Length is in event->array[0] */
1003                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1004                 /* Release the temp buffer */
1005                 this_cpu_dec(trace_buffered_event_cnt);
1006         } else
1007                 ring_buffer_unlock_commit(buffer, event);
1008 }
1009
1010 /**
1011  * __trace_puts - write a constant string into the trace buffer.
1012  * @ip:    The address of the caller
1013  * @str:   The constant string to write
1014  * @size:  The size of the string.
1015  */
1016 int __trace_puts(unsigned long ip, const char *str, int size)
1017 {
1018         struct ring_buffer_event *event;
1019         struct trace_buffer *buffer;
1020         struct print_entry *entry;
1021         unsigned int trace_ctx;
1022         int alloc;
1023
1024         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1025                 return 0;
1026
1027         if (unlikely(tracing_selftest_running || tracing_disabled))
1028                 return 0;
1029
1030         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1031
1032         trace_ctx = tracing_gen_ctx();
1033         buffer = global_trace.array_buffer.buffer;
1034         ring_buffer_nest_start(buffer);
1035         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1036                                             trace_ctx);
1037         if (!event) {
1038                 size = 0;
1039                 goto out;
1040         }
1041
1042         entry = ring_buffer_event_data(event);
1043         entry->ip = ip;
1044
1045         memcpy(&entry->buf, str, size);
1046
1047         /* Add a newline if necessary */
1048         if (entry->buf[size - 1] != '\n') {
1049                 entry->buf[size] = '\n';
1050                 entry->buf[size + 1] = '\0';
1051         } else
1052                 entry->buf[size] = '\0';
1053
1054         __buffer_unlock_commit(buffer, event);
1055         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1056  out:
1057         ring_buffer_nest_end(buffer);
1058         return size;
1059 }
1060 EXPORT_SYMBOL_GPL(__trace_puts);
1061
1062 /**
1063  * __trace_bputs - write the pointer to a constant string into trace buffer
1064  * @ip:    The address of the caller
1065  * @str:   The constant string to write to the buffer to
1066  */
1067 int __trace_bputs(unsigned long ip, const char *str)
1068 {
1069         struct ring_buffer_event *event;
1070         struct trace_buffer *buffer;
1071         struct bputs_entry *entry;
1072         unsigned int trace_ctx;
1073         int size = sizeof(struct bputs_entry);
1074         int ret = 0;
1075
1076         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1077                 return 0;
1078
1079         if (unlikely(tracing_selftest_running || tracing_disabled))
1080                 return 0;
1081
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = global_trace.array_buffer.buffer;
1084
1085         ring_buffer_nest_start(buffer);
1086         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1087                                             trace_ctx);
1088         if (!event)
1089                 goto out;
1090
1091         entry = ring_buffer_event_data(event);
1092         entry->ip                       = ip;
1093         entry->str                      = str;
1094
1095         __buffer_unlock_commit(buffer, event);
1096         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1097
1098         ret = 1;
1099  out:
1100         ring_buffer_nest_end(buffer);
1101         return ret;
1102 }
1103 EXPORT_SYMBOL_GPL(__trace_bputs);
1104
1105 #ifdef CONFIG_TRACER_SNAPSHOT
1106 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107                                            void *cond_data)
1108 {
1109         struct tracer *tracer = tr->current_trace;
1110         unsigned long flags;
1111
1112         if (in_nmi()) {
1113                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1114                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1115                 return;
1116         }
1117
1118         if (!tr->allocated_snapshot) {
1119                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1120                 internal_trace_puts("*** stopping trace here!   ***\n");
1121                 tracing_off();
1122                 return;
1123         }
1124
1125         /* Note, snapshot can not be used when the tracer uses it */
1126         if (tracer->use_max_tr) {
1127                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1128                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1129                 return;
1130         }
1131
1132         local_irq_save(flags);
1133         update_max_tr(tr, current, smp_processor_id(), cond_data);
1134         local_irq_restore(flags);
1135 }
1136
1137 void tracing_snapshot_instance(struct trace_array *tr)
1138 {
1139         tracing_snapshot_instance_cond(tr, NULL);
1140 }
1141
1142 /**
1143  * tracing_snapshot - take a snapshot of the current buffer.
1144  *
1145  * This causes a swap between the snapshot buffer and the current live
1146  * tracing buffer. You can use this to take snapshots of the live
1147  * trace when some condition is triggered, but continue to trace.
1148  *
1149  * Note, make sure to allocate the snapshot with either
1150  * a tracing_snapshot_alloc(), or by doing it manually
1151  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1152  *
1153  * If the snapshot buffer is not allocated, it will stop tracing.
1154  * Basically making a permanent snapshot.
1155  */
1156 void tracing_snapshot(void)
1157 {
1158         struct trace_array *tr = &global_trace;
1159
1160         tracing_snapshot_instance(tr);
1161 }
1162 EXPORT_SYMBOL_GPL(tracing_snapshot);
1163
1164 /**
1165  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166  * @tr:         The tracing instance to snapshot
1167  * @cond_data:  The data to be tested conditionally, and possibly saved
1168  *
1169  * This is the same as tracing_snapshot() except that the snapshot is
1170  * conditional - the snapshot will only happen if the
1171  * cond_snapshot.update() implementation receiving the cond_data
1172  * returns true, which means that the trace array's cond_snapshot
1173  * update() operation used the cond_data to determine whether the
1174  * snapshot should be taken, and if it was, presumably saved it along
1175  * with the snapshot.
1176  */
1177 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178 {
1179         tracing_snapshot_instance_cond(tr, cond_data);
1180 }
1181 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182
1183 /**
1184  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185  * @tr:         The tracing instance
1186  *
1187  * When the user enables a conditional snapshot using
1188  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189  * with the snapshot.  This accessor is used to retrieve it.
1190  *
1191  * Should not be called from cond_snapshot.update(), since it takes
1192  * the tr->max_lock lock, which the code calling
1193  * cond_snapshot.update() has already done.
1194  *
1195  * Returns the cond_data associated with the trace array's snapshot.
1196  */
1197 void *tracing_cond_snapshot_data(struct trace_array *tr)
1198 {
1199         void *cond_data = NULL;
1200
1201         arch_spin_lock(&tr->max_lock);
1202
1203         if (tr->cond_snapshot)
1204                 cond_data = tr->cond_snapshot->cond_data;
1205
1206         arch_spin_unlock(&tr->max_lock);
1207
1208         return cond_data;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1211
1212 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1213                                         struct array_buffer *size_buf, int cpu_id);
1214 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1215
1216 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1217 {
1218         int ret;
1219
1220         if (!tr->allocated_snapshot) {
1221
1222                 /* allocate spare buffer */
1223                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1224                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1225                 if (ret < 0)
1226                         return ret;
1227
1228                 tr->allocated_snapshot = true;
1229         }
1230
1231         return 0;
1232 }
1233
1234 static void free_snapshot(struct trace_array *tr)
1235 {
1236         /*
1237          * We don't free the ring buffer. instead, resize it because
1238          * The max_tr ring buffer has some state (e.g. ring->clock) and
1239          * we want preserve it.
1240          */
1241         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1242         set_buffer_entries(&tr->max_buffer, 1);
1243         tracing_reset_online_cpus(&tr->max_buffer);
1244         tr->allocated_snapshot = false;
1245 }
1246
1247 /**
1248  * tracing_alloc_snapshot - allocate snapshot buffer.
1249  *
1250  * This only allocates the snapshot buffer if it isn't already
1251  * allocated - it doesn't also take a snapshot.
1252  *
1253  * This is meant to be used in cases where the snapshot buffer needs
1254  * to be set up for events that can't sleep but need to be able to
1255  * trigger a snapshot.
1256  */
1257 int tracing_alloc_snapshot(void)
1258 {
1259         struct trace_array *tr = &global_trace;
1260         int ret;
1261
1262         ret = tracing_alloc_snapshot_instance(tr);
1263         WARN_ON(ret < 0);
1264
1265         return ret;
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1268
1269 /**
1270  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1271  *
1272  * This is similar to tracing_snapshot(), but it will allocate the
1273  * snapshot buffer if it isn't already allocated. Use this only
1274  * where it is safe to sleep, as the allocation may sleep.
1275  *
1276  * This causes a swap between the snapshot buffer and the current live
1277  * tracing buffer. You can use this to take snapshots of the live
1278  * trace when some condition is triggered, but continue to trace.
1279  */
1280 void tracing_snapshot_alloc(void)
1281 {
1282         int ret;
1283
1284         ret = tracing_alloc_snapshot();
1285         if (ret < 0)
1286                 return;
1287
1288         tracing_snapshot();
1289 }
1290 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1291
1292 /**
1293  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1294  * @tr:         The tracing instance
1295  * @cond_data:  User data to associate with the snapshot
1296  * @update:     Implementation of the cond_snapshot update function
1297  *
1298  * Check whether the conditional snapshot for the given instance has
1299  * already been enabled, or if the current tracer is already using a
1300  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1301  * save the cond_data and update function inside.
1302  *
1303  * Returns 0 if successful, error otherwise.
1304  */
1305 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1306                                  cond_update_fn_t update)
1307 {
1308         struct cond_snapshot *cond_snapshot;
1309         int ret = 0;
1310
1311         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1312         if (!cond_snapshot)
1313                 return -ENOMEM;
1314
1315         cond_snapshot->cond_data = cond_data;
1316         cond_snapshot->update = update;
1317
1318         mutex_lock(&trace_types_lock);
1319
1320         ret = tracing_alloc_snapshot_instance(tr);
1321         if (ret)
1322                 goto fail_unlock;
1323
1324         if (tr->current_trace->use_max_tr) {
1325                 ret = -EBUSY;
1326                 goto fail_unlock;
1327         }
1328
1329         /*
1330          * The cond_snapshot can only change to NULL without the
1331          * trace_types_lock. We don't care if we race with it going
1332          * to NULL, but we want to make sure that it's not set to
1333          * something other than NULL when we get here, which we can
1334          * do safely with only holding the trace_types_lock and not
1335          * having to take the max_lock.
1336          */
1337         if (tr->cond_snapshot) {
1338                 ret = -EBUSY;
1339                 goto fail_unlock;
1340         }
1341
1342         arch_spin_lock(&tr->max_lock);
1343         tr->cond_snapshot = cond_snapshot;
1344         arch_spin_unlock(&tr->max_lock);
1345
1346         mutex_unlock(&trace_types_lock);
1347
1348         return ret;
1349
1350  fail_unlock:
1351         mutex_unlock(&trace_types_lock);
1352         kfree(cond_snapshot);
1353         return ret;
1354 }
1355 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1356
1357 /**
1358  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1359  * @tr:         The tracing instance
1360  *
1361  * Check whether the conditional snapshot for the given instance is
1362  * enabled; if so, free the cond_snapshot associated with it,
1363  * otherwise return -EINVAL.
1364  *
1365  * Returns 0 if successful, error otherwise.
1366  */
1367 int tracing_snapshot_cond_disable(struct trace_array *tr)
1368 {
1369         int ret = 0;
1370
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381
1382         return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1385 #else
1386 void tracing_snapshot(void)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot);
1391 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1396 int tracing_alloc_snapshot(void)
1397 {
1398         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1399         return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1402 void tracing_snapshot_alloc(void)
1403 {
1404         /* Give warning */
1405         tracing_snapshot();
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1408 void *tracing_cond_snapshot_data(struct trace_array *tr)
1409 {
1410         return NULL;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1413 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1414 {
1415         return -ENODEV;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420         return false;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1423 #endif /* CONFIG_TRACER_SNAPSHOT */
1424
1425 void tracer_tracing_off(struct trace_array *tr)
1426 {
1427         if (tr->array_buffer.buffer)
1428                 ring_buffer_record_off(tr->array_buffer.buffer);
1429         /*
1430          * This flag is looked at when buffers haven't been allocated
1431          * yet, or by some tracers (like irqsoff), that just want to
1432          * know if the ring buffer has been disabled, but it can handle
1433          * races of where it gets disabled but we still do a record.
1434          * As the check is in the fast path of the tracers, it is more
1435          * important to be fast than accurate.
1436          */
1437         tr->buffer_disabled = 1;
1438         /* Make the flag seen by readers */
1439         smp_wmb();
1440 }
1441
1442 /**
1443  * tracing_off - turn off tracing buffers
1444  *
1445  * This function stops the tracing buffers from recording data.
1446  * It does not disable any overhead the tracers themselves may
1447  * be causing. This function simply causes all recording to
1448  * the ring buffers to fail.
1449  */
1450 void tracing_off(void)
1451 {
1452         tracer_tracing_off(&global_trace);
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_off);
1455
1456 void disable_trace_on_warning(void)
1457 {
1458         if (__disable_trace_on_warning) {
1459                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1460                         "Disabling tracing due to warning\n");
1461                 tracing_off();
1462         }
1463 }
1464
1465 /**
1466  * tracer_tracing_is_on - show real state of ring buffer enabled
1467  * @tr : the trace array to know if ring buffer is enabled
1468  *
1469  * Shows real state of the ring buffer if it is enabled or not.
1470  */
1471 bool tracer_tracing_is_on(struct trace_array *tr)
1472 {
1473         if (tr->array_buffer.buffer)
1474                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1475         return !tr->buffer_disabled;
1476 }
1477
1478 /**
1479  * tracing_is_on - show state of ring buffers enabled
1480  */
1481 int tracing_is_on(void)
1482 {
1483         return tracer_tracing_is_on(&global_trace);
1484 }
1485 EXPORT_SYMBOL_GPL(tracing_is_on);
1486
1487 static int __init set_buf_size(char *str)
1488 {
1489         unsigned long buf_size;
1490
1491         if (!str)
1492                 return 0;
1493         buf_size = memparse(str, &str);
1494         /* nr_entries can not be zero */
1495         if (buf_size == 0)
1496                 return 0;
1497         trace_buf_size = buf_size;
1498         return 1;
1499 }
1500 __setup("trace_buf_size=", set_buf_size);
1501
1502 static int __init set_tracing_thresh(char *str)
1503 {
1504         unsigned long threshold;
1505         int ret;
1506
1507         if (!str)
1508                 return 0;
1509         ret = kstrtoul(str, 0, &threshold);
1510         if (ret < 0)
1511                 return 0;
1512         tracing_thresh = threshold * 1000;
1513         return 1;
1514 }
1515 __setup("tracing_thresh=", set_tracing_thresh);
1516
1517 unsigned long nsecs_to_usecs(unsigned long nsecs)
1518 {
1519         return nsecs / 1000;
1520 }
1521
1522 /*
1523  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1524  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1525  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1526  * of strings in the order that the evals (enum) were defined.
1527  */
1528 #undef C
1529 #define C(a, b) b
1530
1531 /* These must match the bit positions in trace_iterator_flags */
1532 static const char *trace_options[] = {
1533         TRACE_FLAGS
1534         NULL
1535 };
1536
1537 static struct {
1538         u64 (*func)(void);
1539         const char *name;
1540         int in_ns;              /* is this clock in nanoseconds? */
1541 } trace_clocks[] = {
1542         { trace_clock_local,            "local",        1 },
1543         { trace_clock_global,           "global",       1 },
1544         { trace_clock_counter,          "counter",      0 },
1545         { trace_clock_jiffies,          "uptime",       0 },
1546         { trace_clock,                  "perf",         1 },
1547         { ktime_get_mono_fast_ns,       "mono",         1 },
1548         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1549         { ktime_get_boot_fast_ns,       "boot",         1 },
1550         ARCH_TRACE_CLOCKS
1551 };
1552
1553 bool trace_clock_in_ns(struct trace_array *tr)
1554 {
1555         if (trace_clocks[tr->clock_id].in_ns)
1556                 return true;
1557
1558         return false;
1559 }
1560
1561 /*
1562  * trace_parser_get_init - gets the buffer for trace parser
1563  */
1564 int trace_parser_get_init(struct trace_parser *parser, int size)
1565 {
1566         memset(parser, 0, sizeof(*parser));
1567
1568         parser->buffer = kmalloc(size, GFP_KERNEL);
1569         if (!parser->buffer)
1570                 return 1;
1571
1572         parser->size = size;
1573         return 0;
1574 }
1575
1576 /*
1577  * trace_parser_put - frees the buffer for trace parser
1578  */
1579 void trace_parser_put(struct trace_parser *parser)
1580 {
1581         kfree(parser->buffer);
1582         parser->buffer = NULL;
1583 }
1584
1585 /*
1586  * trace_get_user - reads the user input string separated by  space
1587  * (matched by isspace(ch))
1588  *
1589  * For each string found the 'struct trace_parser' is updated,
1590  * and the function returns.
1591  *
1592  * Returns number of bytes read.
1593  *
1594  * See kernel/trace/trace.h for 'struct trace_parser' details.
1595  */
1596 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1597         size_t cnt, loff_t *ppos)
1598 {
1599         char ch;
1600         size_t read = 0;
1601         ssize_t ret;
1602
1603         if (!*ppos)
1604                 trace_parser_clear(parser);
1605
1606         ret = get_user(ch, ubuf++);
1607         if (ret)
1608                 goto out;
1609
1610         read++;
1611         cnt--;
1612
1613         /*
1614          * The parser is not finished with the last write,
1615          * continue reading the user input without skipping spaces.
1616          */
1617         if (!parser->cont) {
1618                 /* skip white space */
1619                 while (cnt && isspace(ch)) {
1620                         ret = get_user(ch, ubuf++);
1621                         if (ret)
1622                                 goto out;
1623                         read++;
1624                         cnt--;
1625                 }
1626
1627                 parser->idx = 0;
1628
1629                 /* only spaces were written */
1630                 if (isspace(ch) || !ch) {
1631                         *ppos += read;
1632                         ret = read;
1633                         goto out;
1634                 }
1635         }
1636
1637         /* read the non-space input */
1638         while (cnt && !isspace(ch) && ch) {
1639                 if (parser->idx < parser->size - 1)
1640                         parser->buffer[parser->idx++] = ch;
1641                 else {
1642                         ret = -EINVAL;
1643                         goto out;
1644                 }
1645                 ret = get_user(ch, ubuf++);
1646                 if (ret)
1647                         goto out;
1648                 read++;
1649                 cnt--;
1650         }
1651
1652         /* We either got finished input or we have to wait for another call. */
1653         if (isspace(ch) || !ch) {
1654                 parser->buffer[parser->idx] = 0;
1655                 parser->cont = false;
1656         } else if (parser->idx < parser->size - 1) {
1657                 parser->cont = true;
1658                 parser->buffer[parser->idx++] = ch;
1659                 /* Make sure the parsed string always terminates with '\0'. */
1660                 parser->buffer[parser->idx] = 0;
1661         } else {
1662                 ret = -EINVAL;
1663                 goto out;
1664         }
1665
1666         *ppos += read;
1667         ret = read;
1668
1669 out:
1670         return ret;
1671 }
1672
1673 /* TODO add a seq_buf_to_buffer() */
1674 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1675 {
1676         int len;
1677
1678         if (trace_seq_used(s) <= s->seq.readpos)
1679                 return -EBUSY;
1680
1681         len = trace_seq_used(s) - s->seq.readpos;
1682         if (cnt > len)
1683                 cnt = len;
1684         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1685
1686         s->seq.readpos += cnt;
1687         return cnt;
1688 }
1689
1690 unsigned long __read_mostly     tracing_thresh;
1691 static const struct file_operations tracing_max_lat_fops;
1692
1693 #ifdef LATENCY_FS_NOTIFY
1694
1695 static struct workqueue_struct *fsnotify_wq;
1696
1697 static void latency_fsnotify_workfn(struct work_struct *work)
1698 {
1699         struct trace_array *tr = container_of(work, struct trace_array,
1700                                               fsnotify_work);
1701         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1702 }
1703
1704 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1705 {
1706         struct trace_array *tr = container_of(iwork, struct trace_array,
1707                                               fsnotify_irqwork);
1708         queue_work(fsnotify_wq, &tr->fsnotify_work);
1709 }
1710
1711 static void trace_create_maxlat_file(struct trace_array *tr,
1712                                      struct dentry *d_tracer)
1713 {
1714         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1715         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1716         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1717                                               d_tracer, &tr->max_latency,
1718                                               &tracing_max_lat_fops);
1719 }
1720
1721 __init static int latency_fsnotify_init(void)
1722 {
1723         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1724                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1725         if (!fsnotify_wq) {
1726                 pr_err("Unable to allocate tr_max_lat_wq\n");
1727                 return -ENOMEM;
1728         }
1729         return 0;
1730 }
1731
1732 late_initcall_sync(latency_fsnotify_init);
1733
1734 void latency_fsnotify(struct trace_array *tr)
1735 {
1736         if (!fsnotify_wq)
1737                 return;
1738         /*
1739          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1740          * possible that we are called from __schedule() or do_idle(), which
1741          * could cause a deadlock.
1742          */
1743         irq_work_queue(&tr->fsnotify_irqwork);
1744 }
1745
1746 /*
1747  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1748  *  defined(CONFIG_FSNOTIFY)
1749  */
1750 #else
1751
1752 #define trace_create_maxlat_file(tr, d_tracer)                          \
1753         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1754                           &tr->max_latency, &tracing_max_lat_fops)
1755
1756 #endif
1757
1758 #ifdef CONFIG_TRACER_MAX_TRACE
1759 /*
1760  * Copy the new maximum trace into the separate maximum-trace
1761  * structure. (this way the maximum trace is permanently saved,
1762  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763  */
1764 static void
1765 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766 {
1767         struct array_buffer *trace_buf = &tr->array_buffer;
1768         struct array_buffer *max_buf = &tr->max_buffer;
1769         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771
1772         max_buf->cpu = cpu;
1773         max_buf->time_start = data->preempt_timestamp;
1774
1775         max_data->saved_latency = tr->max_latency;
1776         max_data->critical_start = data->critical_start;
1777         max_data->critical_end = data->critical_end;
1778
1779         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780         max_data->pid = tsk->pid;
1781         /*
1782          * If tsk == current, then use current_uid(), as that does not use
1783          * RCU. The irq tracer can be called out of RCU scope.
1784          */
1785         if (tsk == current)
1786                 max_data->uid = current_uid();
1787         else
1788                 max_data->uid = task_uid(tsk);
1789
1790         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791         max_data->policy = tsk->policy;
1792         max_data->rt_priority = tsk->rt_priority;
1793
1794         /* record this tasks comm */
1795         tracing_record_cmdline(tsk);
1796         latency_fsnotify(tr);
1797 }
1798
1799 /**
1800  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801  * @tr: tracer
1802  * @tsk: the task with the latency
1803  * @cpu: The cpu that initiated the trace.
1804  * @cond_data: User data associated with a conditional snapshot
1805  *
1806  * Flip the buffers between the @tr and the max_tr and record information
1807  * about which task was the cause of this latency.
1808  */
1809 void
1810 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811               void *cond_data)
1812 {
1813         if (tr->stop_count)
1814                 return;
1815
1816         WARN_ON_ONCE(!irqs_disabled());
1817
1818         if (!tr->allocated_snapshot) {
1819                 /* Only the nop tracer should hit this when disabling */
1820                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821                 return;
1822         }
1823
1824         arch_spin_lock(&tr->max_lock);
1825
1826         /* Inherit the recordable setting from array_buffer */
1827         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828                 ring_buffer_record_on(tr->max_buffer.buffer);
1829         else
1830                 ring_buffer_record_off(tr->max_buffer.buffer);
1831
1832 #ifdef CONFIG_TRACER_SNAPSHOT
1833         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834                 goto out_unlock;
1835 #endif
1836         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837
1838         __update_max_tr(tr, tsk, cpu);
1839
1840  out_unlock:
1841         arch_spin_unlock(&tr->max_lock);
1842 }
1843
1844 /**
1845  * update_max_tr_single - only copy one trace over, and reset the rest
1846  * @tr: tracer
1847  * @tsk: task with the latency
1848  * @cpu: the cpu of the buffer to copy.
1849  *
1850  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851  */
1852 void
1853 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854 {
1855         int ret;
1856
1857         if (tr->stop_count)
1858                 return;
1859
1860         WARN_ON_ONCE(!irqs_disabled());
1861         if (!tr->allocated_snapshot) {
1862                 /* Only the nop tracer should hit this when disabling */
1863                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864                 return;
1865         }
1866
1867         arch_spin_lock(&tr->max_lock);
1868
1869         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870
1871         if (ret == -EBUSY) {
1872                 /*
1873                  * We failed to swap the buffer due to a commit taking
1874                  * place on this CPU. We fail to record, but we reset
1875                  * the max trace buffer (no one writes directly to it)
1876                  * and flag that it failed.
1877                  */
1878                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879                         "Failed to swap buffers due to commit in progress\n");
1880         }
1881
1882         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883
1884         __update_max_tr(tr, tsk, cpu);
1885         arch_spin_unlock(&tr->max_lock);
1886 }
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891         /* Iterators are static, they should be filled or empty */
1892         if (trace_buffer_iter(iter, iter->cpu_file))
1893                 return 0;
1894
1895         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896                                 full);
1897 }
1898
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901
1902 struct trace_selftests {
1903         struct list_head                list;
1904         struct tracer                   *type;
1905 };
1906
1907 static LIST_HEAD(postponed_selftests);
1908
1909 static int save_selftest(struct tracer *type)
1910 {
1911         struct trace_selftests *selftest;
1912
1913         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914         if (!selftest)
1915                 return -ENOMEM;
1916
1917         selftest->type = type;
1918         list_add(&selftest->list, &postponed_selftests);
1919         return 0;
1920 }
1921
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924         struct trace_array *tr = &global_trace;
1925         struct tracer *saved_tracer = tr->current_trace;
1926         int ret;
1927
1928         if (!type->selftest || tracing_selftest_disabled)
1929                 return 0;
1930
1931         /*
1932          * If a tracer registers early in boot up (before scheduling is
1933          * initialized and such), then do not run its selftests yet.
1934          * Instead, run it a little later in the boot process.
1935          */
1936         if (!selftests_can_run)
1937                 return save_selftest(type);
1938
1939         if (!tracing_is_on()) {
1940                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941                         type->name);
1942                 return 0;
1943         }
1944
1945         /*
1946          * Run a selftest on this tracer.
1947          * Here we reset the trace buffer, and set the current
1948          * tracer to be this tracer. The tracer can then run some
1949          * internal tracing to verify that everything is in order.
1950          * If we fail, we do not register this tracer.
1951          */
1952         tracing_reset_online_cpus(&tr->array_buffer);
1953
1954         tr->current_trace = type;
1955
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957         if (type->use_max_tr) {
1958                 /* If we expanded the buffers, make sure the max is expanded too */
1959                 if (ring_buffer_expanded)
1960                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961                                            RING_BUFFER_ALL_CPUS);
1962                 tr->allocated_snapshot = true;
1963         }
1964 #endif
1965
1966         /* the test is responsible for initializing and enabling */
1967         pr_info("Testing tracer %s: ", type->name);
1968         ret = type->selftest(type, tr);
1969         /* the test is responsible for resetting too */
1970         tr->current_trace = saved_tracer;
1971         if (ret) {
1972                 printk(KERN_CONT "FAILED!\n");
1973                 /* Add the warning after printing 'FAILED' */
1974                 WARN_ON(1);
1975                 return -1;
1976         }
1977         /* Only reset on passing, to avoid touching corrupted buffers */
1978         tracing_reset_online_cpus(&tr->array_buffer);
1979
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981         if (type->use_max_tr) {
1982                 tr->allocated_snapshot = false;
1983
1984                 /* Shrink the max buffer again */
1985                 if (ring_buffer_expanded)
1986                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1987                                            RING_BUFFER_ALL_CPUS);
1988         }
1989 #endif
1990
1991         printk(KERN_CONT "PASSED\n");
1992         return 0;
1993 }
1994
1995 static __init int init_trace_selftests(void)
1996 {
1997         struct trace_selftests *p, *n;
1998         struct tracer *t, **last;
1999         int ret;
2000
2001         selftests_can_run = true;
2002
2003         mutex_lock(&trace_types_lock);
2004
2005         if (list_empty(&postponed_selftests))
2006                 goto out;
2007
2008         pr_info("Running postponed tracer tests:\n");
2009
2010         tracing_selftest_running = true;
2011         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012                 /* This loop can take minutes when sanitizers are enabled, so
2013                  * lets make sure we allow RCU processing.
2014                  */
2015                 cond_resched();
2016                 ret = run_tracer_selftest(p->type);
2017                 /* If the test fails, then warn and remove from available_tracers */
2018                 if (ret < 0) {
2019                         WARN(1, "tracer: %s failed selftest, disabling\n",
2020                              p->type->name);
2021                         last = &trace_types;
2022                         for (t = trace_types; t; t = t->next) {
2023                                 if (t == p->type) {
2024                                         *last = t->next;
2025                                         break;
2026                                 }
2027                                 last = &t->next;
2028                         }
2029                 }
2030                 list_del(&p->list);
2031                 kfree(p);
2032         }
2033         tracing_selftest_running = false;
2034
2035  out:
2036         mutex_unlock(&trace_types_lock);
2037
2038         return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044         return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049
2050 static void __init apply_trace_boot_options(void);
2051
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060         struct tracer *t;
2061         int ret = 0;
2062
2063         if (!type->name) {
2064                 pr_info("Tracer must have a name\n");
2065                 return -1;
2066         }
2067
2068         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070                 return -1;
2071         }
2072
2073         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074                 pr_warn("Can not register tracer %s due to lockdown\n",
2075                            type->name);
2076                 return -EPERM;
2077         }
2078
2079         mutex_lock(&trace_types_lock);
2080
2081         tracing_selftest_running = true;
2082
2083         for (t = trace_types; t; t = t->next) {
2084                 if (strcmp(type->name, t->name) == 0) {
2085                         /* already found */
2086                         pr_info("Tracer %s already registered\n",
2087                                 type->name);
2088                         ret = -1;
2089                         goto out;
2090                 }
2091         }
2092
2093         if (!type->set_flag)
2094                 type->set_flag = &dummy_set_flag;
2095         if (!type->flags) {
2096                 /*allocate a dummy tracer_flags*/
2097                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098                 if (!type->flags) {
2099                         ret = -ENOMEM;
2100                         goto out;
2101                 }
2102                 type->flags->val = 0;
2103                 type->flags->opts = dummy_tracer_opt;
2104         } else
2105                 if (!type->flags->opts)
2106                         type->flags->opts = dummy_tracer_opt;
2107
2108         /* store the tracer for __set_tracer_option */
2109         type->flags->trace = type;
2110
2111         ret = run_tracer_selftest(type);
2112         if (ret < 0)
2113                 goto out;
2114
2115         type->next = trace_types;
2116         trace_types = type;
2117         add_tracer_options(&global_trace, type);
2118
2119  out:
2120         tracing_selftest_running = false;
2121         mutex_unlock(&trace_types_lock);
2122
2123         if (ret || !default_bootup_tracer)
2124                 goto out_unlock;
2125
2126         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127                 goto out_unlock;
2128
2129         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130         /* Do we want this tracer to start on bootup? */
2131         tracing_set_tracer(&global_trace, type->name);
2132         default_bootup_tracer = NULL;
2133
2134         apply_trace_boot_options();
2135
2136         /* disable other selftests, since this will break it. */
2137         disable_tracing_selftest("running a tracer");
2138
2139  out_unlock:
2140         return ret;
2141 }
2142
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145         struct trace_buffer *buffer = buf->buffer;
2146
2147         if (!buffer)
2148                 return;
2149
2150         ring_buffer_record_disable(buffer);
2151
2152         /* Make sure all commits have finished */
2153         synchronize_rcu();
2154         ring_buffer_reset_cpu(buffer, cpu);
2155
2156         ring_buffer_record_enable(buffer);
2157 }
2158
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161         struct trace_buffer *buffer = buf->buffer;
2162
2163         if (!buffer)
2164                 return;
2165
2166         ring_buffer_record_disable(buffer);
2167
2168         /* Make sure all commits have finished */
2169         synchronize_rcu();
2170
2171         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172
2173         ring_buffer_reset_online_cpus(buffer);
2174
2175         ring_buffer_record_enable(buffer);
2176 }
2177
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus(void)
2180 {
2181         struct trace_array *tr;
2182
2183         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184                 if (!tr->clear_trace)
2185                         continue;
2186                 tr->clear_trace = false;
2187                 tracing_reset_online_cpus(&tr->array_buffer);
2188 #ifdef CONFIG_TRACER_MAX_TRACE
2189                 tracing_reset_online_cpus(&tr->max_buffer);
2190 #endif
2191         }
2192 }
2193
2194 static int *tgid_map;
2195
2196 #define SAVED_CMDLINES_DEFAULT 128
2197 #define NO_CMDLINE_MAP UINT_MAX
2198 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2199 struct saved_cmdlines_buffer {
2200         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2201         unsigned *map_cmdline_to_pid;
2202         unsigned cmdline_num;
2203         int cmdline_idx;
2204         char *saved_cmdlines;
2205 };
2206 static struct saved_cmdlines_buffer *savedcmd;
2207
2208 /* temporary disable recording */
2209 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2210
2211 static inline char *get_saved_cmdlines(int idx)
2212 {
2213         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2214 }
2215
2216 static inline void set_cmdline(int idx, const char *cmdline)
2217 {
2218         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2219 }
2220
2221 static int allocate_cmdlines_buffer(unsigned int val,
2222                                     struct saved_cmdlines_buffer *s)
2223 {
2224         s->map_cmdline_to_pid = kmalloc_array(val,
2225                                               sizeof(*s->map_cmdline_to_pid),
2226                                               GFP_KERNEL);
2227         if (!s->map_cmdline_to_pid)
2228                 return -ENOMEM;
2229
2230         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2231         if (!s->saved_cmdlines) {
2232                 kfree(s->map_cmdline_to_pid);
2233                 return -ENOMEM;
2234         }
2235
2236         s->cmdline_idx = 0;
2237         s->cmdline_num = val;
2238         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2239                sizeof(s->map_pid_to_cmdline));
2240         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2241                val * sizeof(*s->map_cmdline_to_pid));
2242
2243         return 0;
2244 }
2245
2246 static int trace_create_savedcmd(void)
2247 {
2248         int ret;
2249
2250         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2251         if (!savedcmd)
2252                 return -ENOMEM;
2253
2254         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2255         if (ret < 0) {
2256                 kfree(savedcmd);
2257                 savedcmd = NULL;
2258                 return -ENOMEM;
2259         }
2260
2261         return 0;
2262 }
2263
2264 int is_tracing_stopped(void)
2265 {
2266         return global_trace.stop_count;
2267 }
2268
2269 /**
2270  * tracing_start - quick start of the tracer
2271  *
2272  * If tracing is enabled but was stopped by tracing_stop,
2273  * this will start the tracer back up.
2274  */
2275 void tracing_start(void)
2276 {
2277         struct trace_buffer *buffer;
2278         unsigned long flags;
2279
2280         if (tracing_disabled)
2281                 return;
2282
2283         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2284         if (--global_trace.stop_count) {
2285                 if (global_trace.stop_count < 0) {
2286                         /* Someone screwed up their debugging */
2287                         WARN_ON_ONCE(1);
2288                         global_trace.stop_count = 0;
2289                 }
2290                 goto out;
2291         }
2292
2293         /* Prevent the buffers from switching */
2294         arch_spin_lock(&global_trace.max_lock);
2295
2296         buffer = global_trace.array_buffer.buffer;
2297         if (buffer)
2298                 ring_buffer_record_enable(buffer);
2299
2300 #ifdef CONFIG_TRACER_MAX_TRACE
2301         buffer = global_trace.max_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304 #endif
2305
2306         arch_spin_unlock(&global_trace.max_lock);
2307
2308  out:
2309         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2310 }
2311
2312 static void tracing_start_tr(struct trace_array *tr)
2313 {
2314         struct trace_buffer *buffer;
2315         unsigned long flags;
2316
2317         if (tracing_disabled)
2318                 return;
2319
2320         /* If global, we need to also start the max tracer */
2321         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2322                 return tracing_start();
2323
2324         raw_spin_lock_irqsave(&tr->start_lock, flags);
2325
2326         if (--tr->stop_count) {
2327                 if (tr->stop_count < 0) {
2328                         /* Someone screwed up their debugging */
2329                         WARN_ON_ONCE(1);
2330                         tr->stop_count = 0;
2331                 }
2332                 goto out;
2333         }
2334
2335         buffer = tr->array_buffer.buffer;
2336         if (buffer)
2337                 ring_buffer_record_enable(buffer);
2338
2339  out:
2340         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2341 }
2342
2343 /**
2344  * tracing_stop - quick stop of the tracer
2345  *
2346  * Light weight way to stop tracing. Use in conjunction with
2347  * tracing_start.
2348  */
2349 void tracing_stop(void)
2350 {
2351         struct trace_buffer *buffer;
2352         unsigned long flags;
2353
2354         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2355         if (global_trace.stop_count++)
2356                 goto out;
2357
2358         /* Prevent the buffers from switching */
2359         arch_spin_lock(&global_trace.max_lock);
2360
2361         buffer = global_trace.array_buffer.buffer;
2362         if (buffer)
2363                 ring_buffer_record_disable(buffer);
2364
2365 #ifdef CONFIG_TRACER_MAX_TRACE
2366         buffer = global_trace.max_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369 #endif
2370
2371         arch_spin_unlock(&global_trace.max_lock);
2372
2373  out:
2374         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2375 }
2376
2377 static void tracing_stop_tr(struct trace_array *tr)
2378 {
2379         struct trace_buffer *buffer;
2380         unsigned long flags;
2381
2382         /* If global, we need to also stop the max tracer */
2383         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2384                 return tracing_stop();
2385
2386         raw_spin_lock_irqsave(&tr->start_lock, flags);
2387         if (tr->stop_count++)
2388                 goto out;
2389
2390         buffer = tr->array_buffer.buffer;
2391         if (buffer)
2392                 ring_buffer_record_disable(buffer);
2393
2394  out:
2395         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2396 }
2397
2398 static int trace_save_cmdline(struct task_struct *tsk)
2399 {
2400         unsigned tpid, idx;
2401
2402         /* treat recording of idle task as a success */
2403         if (!tsk->pid)
2404                 return 1;
2405
2406         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2407
2408         /*
2409          * It's not the end of the world if we don't get
2410          * the lock, but we also don't want to spin
2411          * nor do we want to disable interrupts,
2412          * so if we miss here, then better luck next time.
2413          */
2414         if (!arch_spin_trylock(&trace_cmdline_lock))
2415                 return 0;
2416
2417         idx = savedcmd->map_pid_to_cmdline[tpid];
2418         if (idx == NO_CMDLINE_MAP) {
2419                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2420
2421                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2422                 savedcmd->cmdline_idx = idx;
2423         }
2424
2425         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2426         set_cmdline(idx, tsk->comm);
2427
2428         arch_spin_unlock(&trace_cmdline_lock);
2429
2430         return 1;
2431 }
2432
2433 static void __trace_find_cmdline(int pid, char comm[])
2434 {
2435         unsigned map;
2436         int tpid;
2437
2438         if (!pid) {
2439                 strcpy(comm, "<idle>");
2440                 return;
2441         }
2442
2443         if (WARN_ON_ONCE(pid < 0)) {
2444                 strcpy(comm, "<XXX>");
2445                 return;
2446         }
2447
2448         tpid = pid & (PID_MAX_DEFAULT - 1);
2449         map = savedcmd->map_pid_to_cmdline[tpid];
2450         if (map != NO_CMDLINE_MAP) {
2451                 tpid = savedcmd->map_cmdline_to_pid[map];
2452                 if (tpid == pid) {
2453                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2454                         return;
2455                 }
2456         }
2457         strcpy(comm, "<...>");
2458 }
2459
2460 void trace_find_cmdline(int pid, char comm[])
2461 {
2462         preempt_disable();
2463         arch_spin_lock(&trace_cmdline_lock);
2464
2465         __trace_find_cmdline(pid, comm);
2466
2467         arch_spin_unlock(&trace_cmdline_lock);
2468         preempt_enable();
2469 }
2470
2471 int trace_find_tgid(int pid)
2472 {
2473         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2474                 return 0;
2475
2476         return tgid_map[pid];
2477 }
2478
2479 static int trace_save_tgid(struct task_struct *tsk)
2480 {
2481         /* treat recording of idle task as a success */
2482         if (!tsk->pid)
2483                 return 1;
2484
2485         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2486                 return 0;
2487
2488         tgid_map[tsk->pid] = tsk->tgid;
2489         return 1;
2490 }
2491
2492 static bool tracing_record_taskinfo_skip(int flags)
2493 {
2494         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2495                 return true;
2496         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2497                 return true;
2498         if (!__this_cpu_read(trace_taskinfo_save))
2499                 return true;
2500         return false;
2501 }
2502
2503 /**
2504  * tracing_record_taskinfo - record the task info of a task
2505  *
2506  * @task:  task to record
2507  * @flags: TRACE_RECORD_CMDLINE for recording comm
2508  *         TRACE_RECORD_TGID for recording tgid
2509  */
2510 void tracing_record_taskinfo(struct task_struct *task, int flags)
2511 {
2512         bool done;
2513
2514         if (tracing_record_taskinfo_skip(flags))
2515                 return;
2516
2517         /*
2518          * Record as much task information as possible. If some fail, continue
2519          * to try to record the others.
2520          */
2521         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2522         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2523
2524         /* If recording any information failed, retry again soon. */
2525         if (!done)
2526                 return;
2527
2528         __this_cpu_write(trace_taskinfo_save, false);
2529 }
2530
2531 /**
2532  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2533  *
2534  * @prev: previous task during sched_switch
2535  * @next: next task during sched_switch
2536  * @flags: TRACE_RECORD_CMDLINE for recording comm
2537  *         TRACE_RECORD_TGID for recording tgid
2538  */
2539 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2540                                           struct task_struct *next, int flags)
2541 {
2542         bool done;
2543
2544         if (tracing_record_taskinfo_skip(flags))
2545                 return;
2546
2547         /*
2548          * Record as much task information as possible. If some fail, continue
2549          * to try to record the others.
2550          */
2551         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2552         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2553         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2555
2556         /* If recording any information failed, retry again soon. */
2557         if (!done)
2558                 return;
2559
2560         __this_cpu_write(trace_taskinfo_save, false);
2561 }
2562
2563 /* Helpers to record a specific task information */
2564 void tracing_record_cmdline(struct task_struct *task)
2565 {
2566         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2567 }
2568
2569 void tracing_record_tgid(struct task_struct *task)
2570 {
2571         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2572 }
2573
2574 /*
2575  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2576  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2577  * simplifies those functions and keeps them in sync.
2578  */
2579 enum print_line_t trace_handle_return(struct trace_seq *s)
2580 {
2581         return trace_seq_has_overflowed(s) ?
2582                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2583 }
2584 EXPORT_SYMBOL_GPL(trace_handle_return);
2585
2586 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2587 {
2588         unsigned int trace_flags = irqs_status;
2589         unsigned int pc;
2590
2591         pc = preempt_count();
2592
2593         if (pc & NMI_MASK)
2594                 trace_flags |= TRACE_FLAG_NMI;
2595         if (pc & HARDIRQ_MASK)
2596                 trace_flags |= TRACE_FLAG_HARDIRQ;
2597         if (in_serving_softirq())
2598                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2599
2600         if (tif_need_resched())
2601                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2602         if (test_preempt_need_resched())
2603                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2604         return (trace_flags << 16) | (pc & 0xff);
2605 }
2606
2607 struct ring_buffer_event *
2608 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2609                           int type,
2610                           unsigned long len,
2611                           unsigned int trace_ctx)
2612 {
2613         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2614 }
2615
2616 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2617 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2618 static int trace_buffered_event_ref;
2619
2620 /**
2621  * trace_buffered_event_enable - enable buffering events
2622  *
2623  * When events are being filtered, it is quicker to use a temporary
2624  * buffer to write the event data into if there's a likely chance
2625  * that it will not be committed. The discard of the ring buffer
2626  * is not as fast as committing, and is much slower than copying
2627  * a commit.
2628  *
2629  * When an event is to be filtered, allocate per cpu buffers to
2630  * write the event data into, and if the event is filtered and discarded
2631  * it is simply dropped, otherwise, the entire data is to be committed
2632  * in one shot.
2633  */
2634 void trace_buffered_event_enable(void)
2635 {
2636         struct ring_buffer_event *event;
2637         struct page *page;
2638         int cpu;
2639
2640         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2641
2642         if (trace_buffered_event_ref++)
2643                 return;
2644
2645         for_each_tracing_cpu(cpu) {
2646                 page = alloc_pages_node(cpu_to_node(cpu),
2647                                         GFP_KERNEL | __GFP_NORETRY, 0);
2648                 if (!page)
2649                         goto failed;
2650
2651                 event = page_address(page);
2652                 memset(event, 0, sizeof(*event));
2653
2654                 per_cpu(trace_buffered_event, cpu) = event;
2655
2656                 preempt_disable();
2657                 if (cpu == smp_processor_id() &&
2658                     __this_cpu_read(trace_buffered_event) !=
2659                     per_cpu(trace_buffered_event, cpu))
2660                         WARN_ON_ONCE(1);
2661                 preempt_enable();
2662         }
2663
2664         return;
2665  failed:
2666         trace_buffered_event_disable();
2667 }
2668
2669 static void enable_trace_buffered_event(void *data)
2670 {
2671         /* Probably not needed, but do it anyway */
2672         smp_rmb();
2673         this_cpu_dec(trace_buffered_event_cnt);
2674 }
2675
2676 static void disable_trace_buffered_event(void *data)
2677 {
2678         this_cpu_inc(trace_buffered_event_cnt);
2679 }
2680
2681 /**
2682  * trace_buffered_event_disable - disable buffering events
2683  *
2684  * When a filter is removed, it is faster to not use the buffered
2685  * events, and to commit directly into the ring buffer. Free up
2686  * the temp buffers when there are no more users. This requires
2687  * special synchronization with current events.
2688  */
2689 void trace_buffered_event_disable(void)
2690 {
2691         int cpu;
2692
2693         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2694
2695         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2696                 return;
2697
2698         if (--trace_buffered_event_ref)
2699                 return;
2700
2701         preempt_disable();
2702         /* For each CPU, set the buffer as used. */
2703         smp_call_function_many(tracing_buffer_mask,
2704                                disable_trace_buffered_event, NULL, 1);
2705         preempt_enable();
2706
2707         /* Wait for all current users to finish */
2708         synchronize_rcu();
2709
2710         for_each_tracing_cpu(cpu) {
2711                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2712                 per_cpu(trace_buffered_event, cpu) = NULL;
2713         }
2714         /*
2715          * Make sure trace_buffered_event is NULL before clearing
2716          * trace_buffered_event_cnt.
2717          */
2718         smp_wmb();
2719
2720         preempt_disable();
2721         /* Do the work on each cpu */
2722         smp_call_function_many(tracing_buffer_mask,
2723                                enable_trace_buffered_event, NULL, 1);
2724         preempt_enable();
2725 }
2726
2727 static struct trace_buffer *temp_buffer;
2728
2729 struct ring_buffer_event *
2730 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2731                           struct trace_event_file *trace_file,
2732                           int type, unsigned long len,
2733                           unsigned int trace_ctx)
2734 {
2735         struct ring_buffer_event *entry;
2736         struct trace_array *tr = trace_file->tr;
2737         int val;
2738
2739         *current_rb = tr->array_buffer.buffer;
2740
2741         if (!tr->no_filter_buffering_ref &&
2742             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2743             (entry = this_cpu_read(trace_buffered_event))) {
2744                 /*
2745                  * Filtering is on, so try to use the per cpu buffer first.
2746                  * This buffer will simulate a ring_buffer_event,
2747                  * where the type_len is zero and the array[0] will
2748                  * hold the full length.
2749                  * (see include/linux/ring-buffer.h for details on
2750                  *  how the ring_buffer_event is structured).
2751                  *
2752                  * Using a temp buffer during filtering and copying it
2753                  * on a matched filter is quicker than writing directly
2754                  * into the ring buffer and then discarding it when
2755                  * it doesn't match. That is because the discard
2756                  * requires several atomic operations to get right.
2757                  * Copying on match and doing nothing on a failed match
2758                  * is still quicker than no copy on match, but having
2759                  * to discard out of the ring buffer on a failed match.
2760                  */
2761                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2762
2763                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2764
2765                 /*
2766                  * Preemption is disabled, but interrupts and NMIs
2767                  * can still come in now. If that happens after
2768                  * the above increment, then it will have to go
2769                  * back to the old method of allocating the event
2770                  * on the ring buffer, and if the filter fails, it
2771                  * will have to call ring_buffer_discard_commit()
2772                  * to remove it.
2773                  *
2774                  * Need to also check the unlikely case that the
2775                  * length is bigger than the temp buffer size.
2776                  * If that happens, then the reserve is pretty much
2777                  * guaranteed to fail, as the ring buffer currently
2778                  * only allows events less than a page. But that may
2779                  * change in the future, so let the ring buffer reserve
2780                  * handle the failure in that case.
2781                  */
2782                 if (val == 1 && likely(len <= max_len)) {
2783                         trace_event_setup(entry, type, trace_ctx);
2784                         entry->array[0] = len;
2785                         return entry;
2786                 }
2787                 this_cpu_dec(trace_buffered_event_cnt);
2788         }
2789
2790         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2791                                             trace_ctx);
2792         /*
2793          * If tracing is off, but we have triggers enabled
2794          * we still need to look at the event data. Use the temp_buffer
2795          * to store the trace event for the trigger to use. It's recursive
2796          * safe and will not be recorded anywhere.
2797          */
2798         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2799                 *current_rb = temp_buffer;
2800                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2801                                                     trace_ctx);
2802         }
2803         return entry;
2804 }
2805 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2806
2807 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2808 static DEFINE_MUTEX(tracepoint_printk_mutex);
2809
2810 static void output_printk(struct trace_event_buffer *fbuffer)
2811 {
2812         struct trace_event_call *event_call;
2813         struct trace_event_file *file;
2814         struct trace_event *event;
2815         unsigned long flags;
2816         struct trace_iterator *iter = tracepoint_print_iter;
2817
2818         /* We should never get here if iter is NULL */
2819         if (WARN_ON_ONCE(!iter))
2820                 return;
2821
2822         event_call = fbuffer->trace_file->event_call;
2823         if (!event_call || !event_call->event.funcs ||
2824             !event_call->event.funcs->trace)
2825                 return;
2826
2827         file = fbuffer->trace_file;
2828         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2829             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2830              !filter_match_preds(file->filter, fbuffer->entry)))
2831                 return;
2832
2833         event = &fbuffer->trace_file->event_call->event;
2834
2835         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2836         trace_seq_init(&iter->seq);
2837         iter->ent = fbuffer->entry;
2838         event_call->event.funcs->trace(iter, 0, event);
2839         trace_seq_putc(&iter->seq, 0);
2840         printk("%s", iter->seq.buffer);
2841
2842         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2843 }
2844
2845 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2846                              void *buffer, size_t *lenp,
2847                              loff_t *ppos)
2848 {
2849         int save_tracepoint_printk;
2850         int ret;
2851
2852         mutex_lock(&tracepoint_printk_mutex);
2853         save_tracepoint_printk = tracepoint_printk;
2854
2855         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2856
2857         /*
2858          * This will force exiting early, as tracepoint_printk
2859          * is always zero when tracepoint_printk_iter is not allocated
2860          */
2861         if (!tracepoint_print_iter)
2862                 tracepoint_printk = 0;
2863
2864         if (save_tracepoint_printk == tracepoint_printk)
2865                 goto out;
2866
2867         if (tracepoint_printk)
2868                 static_key_enable(&tracepoint_printk_key.key);
2869         else
2870                 static_key_disable(&tracepoint_printk_key.key);
2871
2872  out:
2873         mutex_unlock(&tracepoint_printk_mutex);
2874
2875         return ret;
2876 }
2877
2878 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2879 {
2880         if (static_key_false(&tracepoint_printk_key.key))
2881                 output_printk(fbuffer);
2882
2883         if (static_branch_unlikely(&trace_event_exports_enabled))
2884                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2885         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2886                                     fbuffer->event, fbuffer->entry,
2887                                     fbuffer->trace_ctx, fbuffer->regs);
2888 }
2889 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2890
2891 /*
2892  * Skip 3:
2893  *
2894  *   trace_buffer_unlock_commit_regs()
2895  *   trace_event_buffer_commit()
2896  *   trace_event_raw_event_xxx()
2897  */
2898 # define STACK_SKIP 3
2899
2900 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2901                                      struct trace_buffer *buffer,
2902                                      struct ring_buffer_event *event,
2903                                      unsigned int trace_ctx,
2904                                      struct pt_regs *regs)
2905 {
2906         __buffer_unlock_commit(buffer, event);
2907
2908         /*
2909          * If regs is not set, then skip the necessary functions.
2910          * Note, we can still get here via blktrace, wakeup tracer
2911          * and mmiotrace, but that's ok if they lose a function or
2912          * two. They are not that meaningful.
2913          */
2914         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2915         ftrace_trace_userstack(tr, buffer, trace_ctx);
2916 }
2917
2918 /*
2919  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2920  */
2921 void
2922 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2923                                    struct ring_buffer_event *event)
2924 {
2925         __buffer_unlock_commit(buffer, event);
2926 }
2927
2928 void
2929 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2930                parent_ip, unsigned int trace_ctx)
2931 {
2932         struct trace_event_call *call = &event_function;
2933         struct trace_buffer *buffer = tr->array_buffer.buffer;
2934         struct ring_buffer_event *event;
2935         struct ftrace_entry *entry;
2936
2937         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2938                                             trace_ctx);
2939         if (!event)
2940                 return;
2941         entry   = ring_buffer_event_data(event);
2942         entry->ip                       = ip;
2943         entry->parent_ip                = parent_ip;
2944
2945         if (!call_filter_check_discard(call, entry, buffer, event)) {
2946                 if (static_branch_unlikely(&trace_function_exports_enabled))
2947                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2948                 __buffer_unlock_commit(buffer, event);
2949         }
2950 }
2951
2952 #ifdef CONFIG_STACKTRACE
2953
2954 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2955 #define FTRACE_KSTACK_NESTING   4
2956
2957 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2958
2959 struct ftrace_stack {
2960         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2961 };
2962
2963
2964 struct ftrace_stacks {
2965         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2966 };
2967
2968 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2969 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2970
2971 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2972                                  unsigned int trace_ctx,
2973                                  int skip, struct pt_regs *regs)
2974 {
2975         struct trace_event_call *call = &event_kernel_stack;
2976         struct ring_buffer_event *event;
2977         unsigned int size, nr_entries;
2978         struct ftrace_stack *fstack;
2979         struct stack_entry *entry;
2980         int stackidx;
2981
2982         /*
2983          * Add one, for this function and the call to save_stack_trace()
2984          * If regs is set, then these functions will not be in the way.
2985          */
2986 #ifndef CONFIG_UNWINDER_ORC
2987         if (!regs)
2988                 skip++;
2989 #endif
2990
2991         preempt_disable_notrace();
2992
2993         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2994
2995         /* This should never happen. If it does, yell once and skip */
2996         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2997                 goto out;
2998
2999         /*
3000          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3001          * interrupt will either see the value pre increment or post
3002          * increment. If the interrupt happens pre increment it will have
3003          * restored the counter when it returns.  We just need a barrier to
3004          * keep gcc from moving things around.
3005          */
3006         barrier();
3007
3008         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3009         size = ARRAY_SIZE(fstack->calls);
3010
3011         if (regs) {
3012                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3013                                                    size, skip);
3014         } else {
3015                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3016         }
3017
3018         size = nr_entries * sizeof(unsigned long);
3019         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3020                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3021                                     trace_ctx);
3022         if (!event)
3023                 goto out;
3024         entry = ring_buffer_event_data(event);
3025
3026         memcpy(&entry->caller, fstack->calls, size);
3027         entry->size = nr_entries;
3028
3029         if (!call_filter_check_discard(call, entry, buffer, event))
3030                 __buffer_unlock_commit(buffer, event);
3031
3032  out:
3033         /* Again, don't let gcc optimize things here */
3034         barrier();
3035         __this_cpu_dec(ftrace_stack_reserve);
3036         preempt_enable_notrace();
3037
3038 }
3039
3040 static inline void ftrace_trace_stack(struct trace_array *tr,
3041                                       struct trace_buffer *buffer,
3042                                       unsigned int trace_ctx,
3043                                       int skip, struct pt_regs *regs)
3044 {
3045         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3046                 return;
3047
3048         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3049 }
3050
3051 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3052                    int skip)
3053 {
3054         struct trace_buffer *buffer = tr->array_buffer.buffer;
3055
3056         if (rcu_is_watching()) {
3057                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3058                 return;
3059         }
3060
3061         /*
3062          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3063          * but if the above rcu_is_watching() failed, then the NMI
3064          * triggered someplace critical, and rcu_irq_enter() should
3065          * not be called from NMI.
3066          */
3067         if (unlikely(in_nmi()))
3068                 return;
3069
3070         rcu_irq_enter_irqson();
3071         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3072         rcu_irq_exit_irqson();
3073 }
3074
3075 /**
3076  * trace_dump_stack - record a stack back trace in the trace buffer
3077  * @skip: Number of functions to skip (helper handlers)
3078  */
3079 void trace_dump_stack(int skip)
3080 {
3081         if (tracing_disabled || tracing_selftest_running)
3082                 return;
3083
3084 #ifndef CONFIG_UNWINDER_ORC
3085         /* Skip 1 to skip this function. */
3086         skip++;
3087 #endif
3088         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3089                              tracing_gen_ctx(), skip, NULL);
3090 }
3091 EXPORT_SYMBOL_GPL(trace_dump_stack);
3092
3093 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3094 static DEFINE_PER_CPU(int, user_stack_count);
3095
3096 static void
3097 ftrace_trace_userstack(struct trace_array *tr,
3098                        struct trace_buffer *buffer, unsigned int trace_ctx)
3099 {
3100         struct trace_event_call *call = &event_user_stack;
3101         struct ring_buffer_event *event;
3102         struct userstack_entry *entry;
3103
3104         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3105                 return;
3106
3107         /*
3108          * NMIs can not handle page faults, even with fix ups.
3109          * The save user stack can (and often does) fault.
3110          */
3111         if (unlikely(in_nmi()))
3112                 return;
3113
3114         /*
3115          * prevent recursion, since the user stack tracing may
3116          * trigger other kernel events.
3117          */
3118         preempt_disable();
3119         if (__this_cpu_read(user_stack_count))
3120                 goto out;
3121
3122         __this_cpu_inc(user_stack_count);
3123
3124         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3125                                             sizeof(*entry), trace_ctx);
3126         if (!event)
3127                 goto out_drop_count;
3128         entry   = ring_buffer_event_data(event);
3129
3130         entry->tgid             = current->tgid;
3131         memset(&entry->caller, 0, sizeof(entry->caller));
3132
3133         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3134         if (!call_filter_check_discard(call, entry, buffer, event))
3135                 __buffer_unlock_commit(buffer, event);
3136
3137  out_drop_count:
3138         __this_cpu_dec(user_stack_count);
3139  out:
3140         preempt_enable();
3141 }
3142 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3143 static void ftrace_trace_userstack(struct trace_array *tr,
3144                                    struct trace_buffer *buffer,
3145                                    unsigned int trace_ctx)
3146 {
3147 }
3148 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3149
3150 #endif /* CONFIG_STACKTRACE */
3151
3152 static inline void
3153 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3154                           unsigned long long delta)
3155 {
3156         entry->bottom_delta_ts = delta & U32_MAX;
3157         entry->top_delta_ts = (delta >> 32);
3158 }
3159
3160 void trace_last_func_repeats(struct trace_array *tr,
3161                              struct trace_func_repeats *last_info,
3162                              unsigned int trace_ctx)
3163 {
3164         struct trace_buffer *buffer = tr->array_buffer.buffer;
3165         struct func_repeats_entry *entry;
3166         struct ring_buffer_event *event;
3167         u64 delta;
3168
3169         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3170                                             sizeof(*entry), trace_ctx);
3171         if (!event)
3172                 return;
3173
3174         delta = ring_buffer_event_time_stamp(buffer, event) -
3175                 last_info->ts_last_call;
3176
3177         entry = ring_buffer_event_data(event);
3178         entry->ip = last_info->ip;
3179         entry->parent_ip = last_info->parent_ip;
3180         entry->count = last_info->count;
3181         func_repeats_set_delta_ts(entry, delta);
3182
3183         __buffer_unlock_commit(buffer, event);
3184 }
3185
3186 /* created for use with alloc_percpu */
3187 struct trace_buffer_struct {
3188         int nesting;
3189         char buffer[4][TRACE_BUF_SIZE];
3190 };
3191
3192 static struct trace_buffer_struct *trace_percpu_buffer;
3193
3194 /*
3195  * This allows for lockless recording.  If we're nested too deeply, then
3196  * this returns NULL.
3197  */
3198 static char *get_trace_buf(void)
3199 {
3200         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3201
3202         if (!buffer || buffer->nesting >= 4)
3203                 return NULL;
3204
3205         buffer->nesting++;
3206
3207         /* Interrupts must see nesting incremented before we use the buffer */
3208         barrier();
3209         return &buffer->buffer[buffer->nesting - 1][0];
3210 }
3211
3212 static void put_trace_buf(void)
3213 {
3214         /* Don't let the decrement of nesting leak before this */
3215         barrier();
3216         this_cpu_dec(trace_percpu_buffer->nesting);
3217 }
3218
3219 static int alloc_percpu_trace_buffer(void)
3220 {
3221         struct trace_buffer_struct *buffers;
3222
3223         if (trace_percpu_buffer)
3224                 return 0;
3225
3226         buffers = alloc_percpu(struct trace_buffer_struct);
3227         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3228                 return -ENOMEM;
3229
3230         trace_percpu_buffer = buffers;
3231         return 0;
3232 }
3233
3234 static int buffers_allocated;
3235
3236 void trace_printk_init_buffers(void)
3237 {
3238         if (buffers_allocated)
3239                 return;
3240
3241         if (alloc_percpu_trace_buffer())
3242                 return;
3243
3244         /* trace_printk() is for debug use only. Don't use it in production. */
3245
3246         pr_warn("\n");
3247         pr_warn("**********************************************************\n");
3248         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3249         pr_warn("**                                                      **\n");
3250         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3251         pr_warn("**                                                      **\n");
3252         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3253         pr_warn("** unsafe for production use.                           **\n");
3254         pr_warn("**                                                      **\n");
3255         pr_warn("** If you see this message and you are not debugging    **\n");
3256         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3257         pr_warn("**                                                      **\n");
3258         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3259         pr_warn("**********************************************************\n");
3260
3261         /* Expand the buffers to set size */
3262         tracing_update_buffers();
3263
3264         buffers_allocated = 1;
3265
3266         /*
3267          * trace_printk_init_buffers() can be called by modules.
3268          * If that happens, then we need to start cmdline recording
3269          * directly here. If the global_trace.buffer is already
3270          * allocated here, then this was called by module code.
3271          */
3272         if (global_trace.array_buffer.buffer)
3273                 tracing_start_cmdline_record();
3274 }
3275 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3276
3277 void trace_printk_start_comm(void)
3278 {
3279         /* Start tracing comms if trace printk is set */
3280         if (!buffers_allocated)
3281                 return;
3282         tracing_start_cmdline_record();
3283 }
3284
3285 static void trace_printk_start_stop_comm(int enabled)
3286 {
3287         if (!buffers_allocated)
3288                 return;
3289
3290         if (enabled)
3291                 tracing_start_cmdline_record();
3292         else
3293                 tracing_stop_cmdline_record();
3294 }
3295
3296 /**
3297  * trace_vbprintk - write binary msg to tracing buffer
3298  * @ip:    The address of the caller
3299  * @fmt:   The string format to write to the buffer
3300  * @args:  Arguments for @fmt
3301  */
3302 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3303 {
3304         struct trace_event_call *call = &event_bprint;
3305         struct ring_buffer_event *event;
3306         struct trace_buffer *buffer;
3307         struct trace_array *tr = &global_trace;
3308         struct bprint_entry *entry;
3309         unsigned int trace_ctx;
3310         char *tbuffer;
3311         int len = 0, size;
3312
3313         if (unlikely(tracing_selftest_running || tracing_disabled))
3314                 return 0;
3315
3316         /* Don't pollute graph traces with trace_vprintk internals */
3317         pause_graph_tracing();
3318
3319         trace_ctx = tracing_gen_ctx();
3320         preempt_disable_notrace();
3321
3322         tbuffer = get_trace_buf();
3323         if (!tbuffer) {
3324                 len = 0;
3325                 goto out_nobuffer;
3326         }
3327
3328         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3329
3330         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3331                 goto out_put;
3332
3333         size = sizeof(*entry) + sizeof(u32) * len;
3334         buffer = tr->array_buffer.buffer;
3335         ring_buffer_nest_start(buffer);
3336         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3337                                             trace_ctx);
3338         if (!event)
3339                 goto out;
3340         entry = ring_buffer_event_data(event);
3341         entry->ip                       = ip;
3342         entry->fmt                      = fmt;
3343
3344         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3345         if (!call_filter_check_discard(call, entry, buffer, event)) {
3346                 __buffer_unlock_commit(buffer, event);
3347                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3348         }
3349
3350 out:
3351         ring_buffer_nest_end(buffer);
3352 out_put:
3353         put_trace_buf();
3354
3355 out_nobuffer:
3356         preempt_enable_notrace();
3357         unpause_graph_tracing();
3358
3359         return len;
3360 }
3361 EXPORT_SYMBOL_GPL(trace_vbprintk);
3362
3363 __printf(3, 0)
3364 static int
3365 __trace_array_vprintk(struct trace_buffer *buffer,
3366                       unsigned long ip, const char *fmt, va_list args)
3367 {
3368         struct trace_event_call *call = &event_print;
3369         struct ring_buffer_event *event;
3370         int len = 0, size;
3371         struct print_entry *entry;
3372         unsigned int trace_ctx;
3373         char *tbuffer;
3374
3375         if (tracing_disabled || tracing_selftest_running)
3376                 return 0;
3377
3378         /* Don't pollute graph traces with trace_vprintk internals */
3379         pause_graph_tracing();
3380
3381         trace_ctx = tracing_gen_ctx();
3382         preempt_disable_notrace();
3383
3384
3385         tbuffer = get_trace_buf();
3386         if (!tbuffer) {
3387                 len = 0;
3388                 goto out_nobuffer;
3389         }
3390
3391         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3392
3393         size = sizeof(*entry) + len + 1;
3394         ring_buffer_nest_start(buffer);
3395         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3396                                             trace_ctx);
3397         if (!event)
3398                 goto out;
3399         entry = ring_buffer_event_data(event);
3400         entry->ip = ip;
3401
3402         memcpy(&entry->buf, tbuffer, len + 1);
3403         if (!call_filter_check_discard(call, entry, buffer, event)) {
3404                 __buffer_unlock_commit(buffer, event);
3405                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3406         }
3407
3408 out:
3409         ring_buffer_nest_end(buffer);
3410         put_trace_buf();
3411
3412 out_nobuffer:
3413         preempt_enable_notrace();
3414         unpause_graph_tracing();
3415
3416         return len;
3417 }
3418
3419 __printf(3, 0)
3420 int trace_array_vprintk(struct trace_array *tr,
3421                         unsigned long ip, const char *fmt, va_list args)
3422 {
3423         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3424 }
3425
3426 /**
3427  * trace_array_printk - Print a message to a specific instance
3428  * @tr: The instance trace_array descriptor
3429  * @ip: The instruction pointer that this is called from.
3430  * @fmt: The format to print (printf format)
3431  *
3432  * If a subsystem sets up its own instance, they have the right to
3433  * printk strings into their tracing instance buffer using this
3434  * function. Note, this function will not write into the top level
3435  * buffer (use trace_printk() for that), as writing into the top level
3436  * buffer should only have events that can be individually disabled.
3437  * trace_printk() is only used for debugging a kernel, and should not
3438  * be ever incorporated in normal use.
3439  *
3440  * trace_array_printk() can be used, as it will not add noise to the
3441  * top level tracing buffer.
3442  *
3443  * Note, trace_array_init_printk() must be called on @tr before this
3444  * can be used.
3445  */
3446 __printf(3, 0)
3447 int trace_array_printk(struct trace_array *tr,
3448                        unsigned long ip, const char *fmt, ...)
3449 {
3450         int ret;
3451         va_list ap;
3452
3453         if (!tr)
3454                 return -ENOENT;
3455
3456         /* This is only allowed for created instances */
3457         if (tr == &global_trace)
3458                 return 0;
3459
3460         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3461                 return 0;
3462
3463         va_start(ap, fmt);
3464         ret = trace_array_vprintk(tr, ip, fmt, ap);
3465         va_end(ap);
3466         return ret;
3467 }
3468 EXPORT_SYMBOL_GPL(trace_array_printk);
3469
3470 /**
3471  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3472  * @tr: The trace array to initialize the buffers for
3473  *
3474  * As trace_array_printk() only writes into instances, they are OK to
3475  * have in the kernel (unlike trace_printk()). This needs to be called
3476  * before trace_array_printk() can be used on a trace_array.
3477  */
3478 int trace_array_init_printk(struct trace_array *tr)
3479 {
3480         if (!tr)
3481                 return -ENOENT;
3482
3483         /* This is only allowed for created instances */
3484         if (tr == &global_trace)
3485                 return -EINVAL;
3486
3487         return alloc_percpu_trace_buffer();
3488 }
3489 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3490
3491 __printf(3, 4)
3492 int trace_array_printk_buf(struct trace_buffer *buffer,
3493                            unsigned long ip, const char *fmt, ...)
3494 {
3495         int ret;
3496         va_list ap;
3497
3498         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3499                 return 0;
3500
3501         va_start(ap, fmt);
3502         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3503         va_end(ap);
3504         return ret;
3505 }
3506
3507 __printf(2, 0)
3508 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3509 {
3510         return trace_array_vprintk(&global_trace, ip, fmt, args);
3511 }
3512 EXPORT_SYMBOL_GPL(trace_vprintk);
3513
3514 static void trace_iterator_increment(struct trace_iterator *iter)
3515 {
3516         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3517
3518         iter->idx++;
3519         if (buf_iter)
3520                 ring_buffer_iter_advance(buf_iter);
3521 }
3522
3523 static struct trace_entry *
3524 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3525                 unsigned long *lost_events)
3526 {
3527         struct ring_buffer_event *event;
3528         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3529
3530         if (buf_iter) {
3531                 event = ring_buffer_iter_peek(buf_iter, ts);
3532                 if (lost_events)
3533                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3534                                 (unsigned long)-1 : 0;
3535         } else {
3536                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3537                                          lost_events);
3538         }
3539
3540         if (event) {
3541                 iter->ent_size = ring_buffer_event_length(event);
3542                 return ring_buffer_event_data(event);
3543         }
3544         iter->ent_size = 0;
3545         return NULL;
3546 }
3547
3548 static struct trace_entry *
3549 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3550                   unsigned long *missing_events, u64 *ent_ts)
3551 {
3552         struct trace_buffer *buffer = iter->array_buffer->buffer;
3553         struct trace_entry *ent, *next = NULL;
3554         unsigned long lost_events = 0, next_lost = 0;
3555         int cpu_file = iter->cpu_file;
3556         u64 next_ts = 0, ts;
3557         int next_cpu = -1;
3558         int next_size = 0;
3559         int cpu;
3560
3561         /*
3562          * If we are in a per_cpu trace file, don't bother by iterating over
3563          * all cpu and peek directly.
3564          */
3565         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3566                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3567                         return NULL;
3568                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3569                 if (ent_cpu)
3570                         *ent_cpu = cpu_file;
3571
3572                 return ent;
3573         }
3574
3575         for_each_tracing_cpu(cpu) {
3576
3577                 if (ring_buffer_empty_cpu(buffer, cpu))
3578                         continue;
3579
3580                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3581
3582                 /*
3583                  * Pick the entry with the smallest timestamp:
3584                  */
3585                 if (ent && (!next || ts < next_ts)) {
3586                         next = ent;
3587                         next_cpu = cpu;
3588                         next_ts = ts;
3589                         next_lost = lost_events;
3590                         next_size = iter->ent_size;
3591                 }
3592         }
3593
3594         iter->ent_size = next_size;
3595
3596         if (ent_cpu)
3597                 *ent_cpu = next_cpu;
3598
3599         if (ent_ts)
3600                 *ent_ts = next_ts;
3601
3602         if (missing_events)
3603                 *missing_events = next_lost;
3604
3605         return next;
3606 }
3607
3608 #define STATIC_FMT_BUF_SIZE     128
3609 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3610
3611 static char *trace_iter_expand_format(struct trace_iterator *iter)
3612 {
3613         char *tmp;
3614
3615         /*
3616          * iter->tr is NULL when used with tp_printk, which makes
3617          * this get called where it is not safe to call krealloc().
3618          */
3619         if (!iter->tr || iter->fmt == static_fmt_buf)
3620                 return NULL;
3621
3622         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3623                        GFP_KERNEL);
3624         if (tmp) {
3625                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3626                 iter->fmt = tmp;
3627         }
3628
3629         return tmp;
3630 }
3631
3632 /* Returns true if the string is safe to dereference from an event */
3633 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3634 {
3635         unsigned long addr = (unsigned long)str;
3636         struct trace_event *trace_event;
3637         struct trace_event_call *event;
3638
3639         /* OK if part of the event data */
3640         if ((addr >= (unsigned long)iter->ent) &&
3641             (addr < (unsigned long)iter->ent + iter->ent_size))
3642                 return true;
3643
3644         /* OK if part of the temp seq buffer */
3645         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3646             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3647                 return true;
3648
3649         /* Core rodata can not be freed */
3650         if (is_kernel_rodata(addr))
3651                 return true;
3652
3653         if (trace_is_tracepoint_string(str))
3654                 return true;
3655
3656         /*
3657          * Now this could be a module event, referencing core module
3658          * data, which is OK.
3659          */
3660         if (!iter->ent)
3661                 return false;
3662
3663         trace_event = ftrace_find_event(iter->ent->type);
3664         if (!trace_event)
3665                 return false;
3666
3667         event = container_of(trace_event, struct trace_event_call, event);
3668         if (!event->mod)
3669                 return false;
3670
3671         /* Would rather have rodata, but this will suffice */
3672         if (within_module_core(addr, event->mod))
3673                 return true;
3674
3675         return false;
3676 }
3677
3678 static const char *show_buffer(struct trace_seq *s)
3679 {
3680         struct seq_buf *seq = &s->seq;
3681
3682         seq_buf_terminate(seq);
3683
3684         return seq->buffer;
3685 }
3686
3687 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3688
3689 static int test_can_verify_check(const char *fmt, ...)
3690 {
3691         char buf[16];
3692         va_list ap;
3693         int ret;
3694
3695         /*
3696          * The verifier is dependent on vsnprintf() modifies the va_list
3697          * passed to it, where it is sent as a reference. Some architectures
3698          * (like x86_32) passes it by value, which means that vsnprintf()
3699          * does not modify the va_list passed to it, and the verifier
3700          * would then need to be able to understand all the values that
3701          * vsnprintf can use. If it is passed by value, then the verifier
3702          * is disabled.
3703          */
3704         va_start(ap, fmt);
3705         vsnprintf(buf, 16, "%d", ap);
3706         ret = va_arg(ap, int);
3707         va_end(ap);
3708
3709         return ret;
3710 }
3711
3712 static void test_can_verify(void)
3713 {
3714         if (!test_can_verify_check("%d %d", 0, 1)) {
3715                 pr_info("trace event string verifier disabled\n");
3716                 static_branch_inc(&trace_no_verify);
3717         }
3718 }
3719
3720 /**
3721  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3722  * @iter: The iterator that holds the seq buffer and the event being printed
3723  * @fmt: The format used to print the event
3724  * @ap: The va_list holding the data to print from @fmt.
3725  *
3726  * This writes the data into the @iter->seq buffer using the data from
3727  * @fmt and @ap. If the format has a %s, then the source of the string
3728  * is examined to make sure it is safe to print, otherwise it will
3729  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3730  * pointer.
3731  */
3732 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3733                          va_list ap)
3734 {
3735         const char *p = fmt;
3736         const char *str;
3737         int i, j;
3738
3739         if (WARN_ON_ONCE(!fmt))
3740                 return;
3741
3742         if (static_branch_unlikely(&trace_no_verify))
3743                 goto print;
3744
3745         /* Don't bother checking when doing a ftrace_dump() */
3746         if (iter->fmt == static_fmt_buf)
3747                 goto print;
3748
3749         while (*p) {
3750                 bool star = false;
3751                 int len = 0;
3752
3753                 j = 0;
3754
3755                 /* We only care about %s and variants */
3756                 for (i = 0; p[i]; i++) {
3757                         if (i + 1 >= iter->fmt_size) {
3758                                 /*
3759                                  * If we can't expand the copy buffer,
3760                                  * just print it.
3761                                  */
3762                                 if (!trace_iter_expand_format(iter))
3763                                         goto print;
3764                         }
3765
3766                         if (p[i] == '\\' && p[i+1]) {
3767                                 i++;
3768                                 continue;
3769                         }
3770                         if (p[i] == '%') {
3771                                 /* Need to test cases like %08.*s */
3772                                 for (j = 1; p[i+j]; j++) {
3773                                         if (isdigit(p[i+j]) ||
3774                                             p[i+j] == '.')
3775                                                 continue;
3776                                         if (p[i+j] == '*') {
3777                                                 star = true;
3778                                                 continue;
3779                                         }
3780                                         break;
3781                                 }
3782                                 if (p[i+j] == 's')
3783                                         break;
3784                                 star = false;
3785                         }
3786                         j = 0;
3787                 }
3788                 /* If no %s found then just print normally */
3789                 if (!p[i])
3790                         break;
3791
3792                 /* Copy up to the %s, and print that */
3793                 strncpy(iter->fmt, p, i);
3794                 iter->fmt[i] = '\0';
3795                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3796
3797                 if (star)
3798                         len = va_arg(ap, int);
3799
3800                 /* The ap now points to the string data of the %s */
3801                 str = va_arg(ap, const char *);
3802
3803                 /*
3804                  * If you hit this warning, it is likely that the
3805                  * trace event in question used %s on a string that
3806                  * was saved at the time of the event, but may not be
3807                  * around when the trace is read. Use __string(),
3808                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3809                  * instead. See samples/trace_events/trace-events-sample.h
3810                  * for reference.
3811                  */
3812                 if (WARN_ONCE(!trace_safe_str(iter, str),
3813                               "fmt: '%s' current_buffer: '%s'",
3814                               fmt, show_buffer(&iter->seq))) {
3815                         int ret;
3816
3817                         /* Try to safely read the string */
3818                         if (star) {
3819                                 if (len + 1 > iter->fmt_size)
3820                                         len = iter->fmt_size - 1;
3821                                 if (len < 0)
3822                                         len = 0;
3823                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3824                                 iter->fmt[len] = 0;
3825                                 star = false;
3826                         } else {
3827                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3828                                                                   iter->fmt_size);
3829                         }
3830                         if (ret < 0)
3831                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3832                         else
3833                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3834                                                  str, iter->fmt);
3835                         str = "[UNSAFE-MEMORY]";
3836                         strcpy(iter->fmt, "%s");
3837                 } else {
3838                         strncpy(iter->fmt, p + i, j + 1);
3839                         iter->fmt[j+1] = '\0';
3840                 }
3841                 if (star)
3842                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3843                 else
3844                         trace_seq_printf(&iter->seq, iter->fmt, str);
3845
3846                 p += i + j + 1;
3847         }
3848  print:
3849         if (*p)
3850                 trace_seq_vprintf(&iter->seq, p, ap);
3851 }
3852
3853 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3854 {
3855         const char *p, *new_fmt;
3856         char *q;
3857
3858         if (WARN_ON_ONCE(!fmt))
3859                 return fmt;
3860
3861         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3862                 return fmt;
3863
3864         p = fmt;
3865         new_fmt = q = iter->fmt;
3866         while (*p) {
3867                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3868                         if (!trace_iter_expand_format(iter))
3869                                 return fmt;
3870
3871                         q += iter->fmt - new_fmt;
3872                         new_fmt = iter->fmt;
3873                 }
3874
3875                 *q++ = *p++;
3876
3877                 /* Replace %p with %px */
3878                 if (p[-1] == '%') {
3879                         if (p[0] == '%') {
3880                                 *q++ = *p++;
3881                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3882                                 *q++ = *p++;
3883                                 *q++ = 'x';
3884                         }
3885                 }
3886         }
3887         *q = '\0';
3888
3889         return new_fmt;
3890 }
3891
3892 #define STATIC_TEMP_BUF_SIZE    128
3893 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3894
3895 /* Find the next real entry, without updating the iterator itself */
3896 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3897                                           int *ent_cpu, u64 *ent_ts)
3898 {
3899         /* __find_next_entry will reset ent_size */
3900         int ent_size = iter->ent_size;
3901         struct trace_entry *entry;
3902
3903         /*
3904          * If called from ftrace_dump(), then the iter->temp buffer
3905          * will be the static_temp_buf and not created from kmalloc.
3906          * If the entry size is greater than the buffer, we can
3907          * not save it. Just return NULL in that case. This is only
3908          * used to add markers when two consecutive events' time
3909          * stamps have a large delta. See trace_print_lat_context()
3910          */
3911         if (iter->temp == static_temp_buf &&
3912             STATIC_TEMP_BUF_SIZE < ent_size)
3913                 return NULL;
3914
3915         /*
3916          * The __find_next_entry() may call peek_next_entry(), which may
3917          * call ring_buffer_peek() that may make the contents of iter->ent
3918          * undefined. Need to copy iter->ent now.
3919          */
3920         if (iter->ent && iter->ent != iter->temp) {
3921                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3922                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3923                         void *temp;
3924                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3925                         if (!temp)
3926                                 return NULL;
3927                         kfree(iter->temp);
3928                         iter->temp = temp;
3929                         iter->temp_size = iter->ent_size;
3930                 }
3931                 memcpy(iter->temp, iter->ent, iter->ent_size);
3932                 iter->ent = iter->temp;
3933         }
3934         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3935         /* Put back the original ent_size */
3936         iter->ent_size = ent_size;
3937
3938         return entry;
3939 }
3940
3941 /* Find the next real entry, and increment the iterator to the next entry */
3942 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3943 {
3944         iter->ent = __find_next_entry(iter, &iter->cpu,
3945                                       &iter->lost_events, &iter->ts);
3946
3947         if (iter->ent)
3948                 trace_iterator_increment(iter);
3949
3950         return iter->ent ? iter : NULL;
3951 }
3952
3953 static void trace_consume(struct trace_iterator *iter)
3954 {
3955         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3956                             &iter->lost_events);
3957 }
3958
3959 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3960 {
3961         struct trace_iterator *iter = m->private;
3962         int i = (int)*pos;
3963         void *ent;
3964
3965         WARN_ON_ONCE(iter->leftover);
3966
3967         (*pos)++;
3968
3969         /* can't go backwards */
3970         if (iter->idx > i)
3971                 return NULL;
3972
3973         if (iter->idx < 0)
3974                 ent = trace_find_next_entry_inc(iter);
3975         else
3976                 ent = iter;
3977
3978         while (ent && iter->idx < i)
3979                 ent = trace_find_next_entry_inc(iter);
3980
3981         iter->pos = *pos;
3982
3983         return ent;
3984 }
3985
3986 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3987 {
3988         struct ring_buffer_iter *buf_iter;
3989         unsigned long entries = 0;
3990         u64 ts;
3991
3992         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3993
3994         buf_iter = trace_buffer_iter(iter, cpu);
3995         if (!buf_iter)
3996                 return;
3997
3998         ring_buffer_iter_reset(buf_iter);
3999
4000         /*
4001          * We could have the case with the max latency tracers
4002          * that a reset never took place on a cpu. This is evident
4003          * by the timestamp being before the start of the buffer.
4004          */
4005         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4006                 if (ts >= iter->array_buffer->time_start)
4007                         break;
4008                 entries++;
4009                 ring_buffer_iter_advance(buf_iter);
4010         }
4011
4012         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4013 }
4014
4015 /*
4016  * The current tracer is copied to avoid a global locking
4017  * all around.
4018  */
4019 static void *s_start(struct seq_file *m, loff_t *pos)
4020 {
4021         struct trace_iterator *iter = m->private;
4022         struct trace_array *tr = iter->tr;
4023         int cpu_file = iter->cpu_file;
4024         void *p = NULL;
4025         loff_t l = 0;
4026         int cpu;
4027
4028         /*
4029          * copy the tracer to avoid using a global lock all around.
4030          * iter->trace is a copy of current_trace, the pointer to the
4031          * name may be used instead of a strcmp(), as iter->trace->name
4032          * will point to the same string as current_trace->name.
4033          */
4034         mutex_lock(&trace_types_lock);
4035         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4036                 *iter->trace = *tr->current_trace;
4037         mutex_unlock(&trace_types_lock);
4038
4039 #ifdef CONFIG_TRACER_MAX_TRACE
4040         if (iter->snapshot && iter->trace->use_max_tr)
4041                 return ERR_PTR(-EBUSY);
4042 #endif
4043
4044         if (!iter->snapshot)
4045                 atomic_inc(&trace_record_taskinfo_disabled);
4046
4047         if (*pos != iter->pos) {
4048                 iter->ent = NULL;
4049                 iter->cpu = 0;
4050                 iter->idx = -1;
4051
4052                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4053                         for_each_tracing_cpu(cpu)
4054                                 tracing_iter_reset(iter, cpu);
4055                 } else
4056                         tracing_iter_reset(iter, cpu_file);
4057
4058                 iter->leftover = 0;
4059                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4060                         ;
4061
4062         } else {
4063                 /*
4064                  * If we overflowed the seq_file before, then we want
4065                  * to just reuse the trace_seq buffer again.
4066                  */
4067                 if (iter->leftover)
4068                         p = iter;
4069                 else {
4070                         l = *pos - 1;
4071                         p = s_next(m, p, &l);
4072                 }
4073         }
4074
4075         trace_event_read_lock();
4076         trace_access_lock(cpu_file);
4077         return p;
4078 }
4079
4080 static void s_stop(struct seq_file *m, void *p)
4081 {
4082         struct trace_iterator *iter = m->private;
4083
4084 #ifdef CONFIG_TRACER_MAX_TRACE
4085         if (iter->snapshot && iter->trace->use_max_tr)
4086                 return;
4087 #endif
4088
4089         if (!iter->snapshot)
4090                 atomic_dec(&trace_record_taskinfo_disabled);
4091
4092         trace_access_unlock(iter->cpu_file);
4093         trace_event_read_unlock();
4094 }
4095
4096 static void
4097 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4098                       unsigned long *entries, int cpu)
4099 {
4100         unsigned long count;
4101
4102         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4103         /*
4104          * If this buffer has skipped entries, then we hold all
4105          * entries for the trace and we need to ignore the
4106          * ones before the time stamp.
4107          */
4108         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4109                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4110                 /* total is the same as the entries */
4111                 *total = count;
4112         } else
4113                 *total = count +
4114                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4115         *entries = count;
4116 }
4117
4118 static void
4119 get_total_entries(struct array_buffer *buf,
4120                   unsigned long *total, unsigned long *entries)
4121 {
4122         unsigned long t, e;
4123         int cpu;
4124
4125         *total = 0;
4126         *entries = 0;
4127
4128         for_each_tracing_cpu(cpu) {
4129                 get_total_entries_cpu(buf, &t, &e, cpu);
4130                 *total += t;
4131                 *entries += e;
4132         }
4133 }
4134
4135 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4136 {
4137         unsigned long total, entries;
4138
4139         if (!tr)
4140                 tr = &global_trace;
4141
4142         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4143
4144         return entries;
4145 }
4146
4147 unsigned long trace_total_entries(struct trace_array *tr)
4148 {
4149         unsigned long total, entries;
4150
4151         if (!tr)
4152                 tr = &global_trace;
4153
4154         get_total_entries(&tr->array_buffer, &total, &entries);
4155
4156         return entries;
4157 }
4158
4159 static void print_lat_help_header(struct seq_file *m)
4160 {
4161         seq_puts(m, "#                    _------=> CPU#            \n"
4162                     "#                   / _-----=> irqs-off        \n"
4163                     "#                  | / _----=> need-resched    \n"
4164                     "#                  || / _---=> hardirq/softirq \n"
4165                     "#                  ||| / _--=> preempt-depth   \n"
4166                     "#                  |||| /     delay            \n"
4167                     "#  cmd     pid     ||||| time  |   caller      \n"
4168                     "#     \\   /        |||||  \\    |   /         \n");
4169 }
4170
4171 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4172 {
4173         unsigned long total;
4174         unsigned long entries;
4175
4176         get_total_entries(buf, &total, &entries);
4177         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4178                    entries, total, num_online_cpus());
4179         seq_puts(m, "#\n");
4180 }
4181
4182 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4183                                    unsigned int flags)
4184 {
4185         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4186
4187         print_event_info(buf, m);
4188
4189         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4190         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4191 }
4192
4193 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4194                                        unsigned int flags)
4195 {
4196         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4197         const char *space = "            ";
4198         int prec = tgid ? 12 : 2;
4199
4200         print_event_info(buf, m);
4201
4202         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4203         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4204         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4205         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4206         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4207         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4208         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4209 }
4210
4211 void
4212 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4213 {
4214         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4215         struct array_buffer *buf = iter->array_buffer;
4216         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4217         struct tracer *type = iter->trace;
4218         unsigned long entries;
4219         unsigned long total;
4220         const char *name = "preemption";
4221
4222         name = type->name;
4223
4224         get_total_entries(buf, &total, &entries);
4225
4226         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4227                    name, UTS_RELEASE);
4228         seq_puts(m, "# -----------------------------------"
4229                  "---------------------------------\n");
4230         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4231                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4232                    nsecs_to_usecs(data->saved_latency),
4233                    entries,
4234                    total,
4235                    buf->cpu,
4236 #if defined(CONFIG_PREEMPT_NONE)
4237                    "server",
4238 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4239                    "desktop",
4240 #elif defined(CONFIG_PREEMPT)
4241                    "preempt",
4242 #elif defined(CONFIG_PREEMPT_RT)
4243                    "preempt_rt",
4244 #else
4245                    "unknown",
4246 #endif
4247                    /* These are reserved for later use */
4248                    0, 0, 0, 0);
4249 #ifdef CONFIG_SMP
4250         seq_printf(m, " #P:%d)\n", num_online_cpus());
4251 #else
4252         seq_puts(m, ")\n");
4253 #endif
4254         seq_puts(m, "#    -----------------\n");
4255         seq_printf(m, "#    | task: %.16s-%d "
4256                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4257                    data->comm, data->pid,
4258                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4259                    data->policy, data->rt_priority);
4260         seq_puts(m, "#    -----------------\n");
4261
4262         if (data->critical_start) {
4263                 seq_puts(m, "#  => started at: ");
4264                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4265                 trace_print_seq(m, &iter->seq);
4266                 seq_puts(m, "\n#  => ended at:   ");
4267                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4268                 trace_print_seq(m, &iter->seq);
4269                 seq_puts(m, "\n#\n");
4270         }
4271
4272         seq_puts(m, "#\n");
4273 }
4274
4275 static void test_cpu_buff_start(struct trace_iterator *iter)
4276 {
4277         struct trace_seq *s = &iter->seq;
4278         struct trace_array *tr = iter->tr;
4279
4280         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4281                 return;
4282
4283         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4284                 return;
4285
4286         if (cpumask_available(iter->started) &&
4287             cpumask_test_cpu(iter->cpu, iter->started))
4288                 return;
4289
4290         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4291                 return;
4292
4293         if (cpumask_available(iter->started))
4294                 cpumask_set_cpu(iter->cpu, iter->started);
4295
4296         /* Don't print started cpu buffer for the first entry of the trace */
4297         if (iter->idx > 1)
4298                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4299                                 iter->cpu);
4300 }
4301
4302 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4303 {
4304         struct trace_array *tr = iter->tr;
4305         struct trace_seq *s = &iter->seq;
4306         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4307         struct trace_entry *entry;
4308         struct trace_event *event;
4309
4310         entry = iter->ent;
4311
4312         test_cpu_buff_start(iter);
4313
4314         event = ftrace_find_event(entry->type);
4315
4316         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4317                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4318                         trace_print_lat_context(iter);
4319                 else
4320                         trace_print_context(iter);
4321         }
4322
4323         if (trace_seq_has_overflowed(s))
4324                 return TRACE_TYPE_PARTIAL_LINE;
4325
4326         if (event)
4327                 return event->funcs->trace(iter, sym_flags, event);
4328
4329         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4330
4331         return trace_handle_return(s);
4332 }
4333
4334 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4335 {
4336         struct trace_array *tr = iter->tr;
4337         struct trace_seq *s = &iter->seq;
4338         struct trace_entry *entry;
4339         struct trace_event *event;
4340
4341         entry = iter->ent;
4342
4343         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4344                 trace_seq_printf(s, "%d %d %llu ",
4345                                  entry->pid, iter->cpu, iter->ts);
4346
4347         if (trace_seq_has_overflowed(s))
4348                 return TRACE_TYPE_PARTIAL_LINE;
4349
4350         event = ftrace_find_event(entry->type);
4351         if (event)
4352                 return event->funcs->raw(iter, 0, event);
4353
4354         trace_seq_printf(s, "%d ?\n", entry->type);
4355
4356         return trace_handle_return(s);
4357 }
4358
4359 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4360 {
4361         struct trace_array *tr = iter->tr;
4362         struct trace_seq *s = &iter->seq;
4363         unsigned char newline = '\n';
4364         struct trace_entry *entry;
4365         struct trace_event *event;
4366
4367         entry = iter->ent;
4368
4369         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4370                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4371                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4372                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4373                 if (trace_seq_has_overflowed(s))
4374                         return TRACE_TYPE_PARTIAL_LINE;
4375         }
4376
4377         event = ftrace_find_event(entry->type);
4378         if (event) {
4379                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4380                 if (ret != TRACE_TYPE_HANDLED)
4381                         return ret;
4382         }
4383
4384         SEQ_PUT_FIELD(s, newline);
4385
4386         return trace_handle_return(s);
4387 }
4388
4389 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4390 {
4391         struct trace_array *tr = iter->tr;
4392         struct trace_seq *s = &iter->seq;
4393         struct trace_entry *entry;
4394         struct trace_event *event;
4395
4396         entry = iter->ent;
4397
4398         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4399                 SEQ_PUT_FIELD(s, entry->pid);
4400                 SEQ_PUT_FIELD(s, iter->cpu);
4401                 SEQ_PUT_FIELD(s, iter->ts);
4402                 if (trace_seq_has_overflowed(s))
4403                         return TRACE_TYPE_PARTIAL_LINE;
4404         }
4405
4406         event = ftrace_find_event(entry->type);
4407         return event ? event->funcs->binary(iter, 0, event) :
4408                 TRACE_TYPE_HANDLED;
4409 }
4410
4411 int trace_empty(struct trace_iterator *iter)
4412 {
4413         struct ring_buffer_iter *buf_iter;
4414         int cpu;
4415
4416         /* If we are looking at one CPU buffer, only check that one */
4417         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4418                 cpu = iter->cpu_file;
4419                 buf_iter = trace_buffer_iter(iter, cpu);
4420                 if (buf_iter) {
4421                         if (!ring_buffer_iter_empty(buf_iter))
4422                                 return 0;
4423                 } else {
4424                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4425                                 return 0;
4426                 }
4427                 return 1;
4428         }
4429
4430         for_each_tracing_cpu(cpu) {
4431                 buf_iter = trace_buffer_iter(iter, cpu);
4432                 if (buf_iter) {
4433                         if (!ring_buffer_iter_empty(buf_iter))
4434                                 return 0;
4435                 } else {
4436                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4437                                 return 0;
4438                 }
4439         }
4440
4441         return 1;
4442 }
4443
4444 /*  Called with trace_event_read_lock() held. */
4445 enum print_line_t print_trace_line(struct trace_iterator *iter)
4446 {
4447         struct trace_array *tr = iter->tr;
4448         unsigned long trace_flags = tr->trace_flags;
4449         enum print_line_t ret;
4450
4451         if (iter->lost_events) {
4452                 if (iter->lost_events == (unsigned long)-1)
4453                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4454                                          iter->cpu);
4455                 else
4456                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4457                                          iter->cpu, iter->lost_events);
4458                 if (trace_seq_has_overflowed(&iter->seq))
4459                         return TRACE_TYPE_PARTIAL_LINE;
4460         }
4461
4462         if (iter->trace && iter->trace->print_line) {
4463                 ret = iter->trace->print_line(iter);
4464                 if (ret != TRACE_TYPE_UNHANDLED)
4465                         return ret;
4466         }
4467
4468         if (iter->ent->type == TRACE_BPUTS &&
4469                         trace_flags & TRACE_ITER_PRINTK &&
4470                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4471                 return trace_print_bputs_msg_only(iter);
4472
4473         if (iter->ent->type == TRACE_BPRINT &&
4474                         trace_flags & TRACE_ITER_PRINTK &&
4475                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4476                 return trace_print_bprintk_msg_only(iter);
4477
4478         if (iter->ent->type == TRACE_PRINT &&
4479                         trace_flags & TRACE_ITER_PRINTK &&
4480                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4481                 return trace_print_printk_msg_only(iter);
4482
4483         if (trace_flags & TRACE_ITER_BIN)
4484                 return print_bin_fmt(iter);
4485
4486         if (trace_flags & TRACE_ITER_HEX)
4487                 return print_hex_fmt(iter);
4488
4489         if (trace_flags & TRACE_ITER_RAW)
4490                 return print_raw_fmt(iter);
4491
4492         return print_trace_fmt(iter);
4493 }
4494
4495 void trace_latency_header(struct seq_file *m)
4496 {
4497         struct trace_iterator *iter = m->private;
4498         struct trace_array *tr = iter->tr;
4499
4500         /* print nothing if the buffers are empty */
4501         if (trace_empty(iter))
4502                 return;
4503
4504         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4505                 print_trace_header(m, iter);
4506
4507         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4508                 print_lat_help_header(m);
4509 }
4510
4511 void trace_default_header(struct seq_file *m)
4512 {
4513         struct trace_iterator *iter = m->private;
4514         struct trace_array *tr = iter->tr;
4515         unsigned long trace_flags = tr->trace_flags;
4516
4517         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4518                 return;
4519
4520         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4521                 /* print nothing if the buffers are empty */
4522                 if (trace_empty(iter))
4523                         return;
4524                 print_trace_header(m, iter);
4525                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4526                         print_lat_help_header(m);
4527         } else {
4528                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4529                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4530                                 print_func_help_header_irq(iter->array_buffer,
4531                                                            m, trace_flags);
4532                         else
4533                                 print_func_help_header(iter->array_buffer, m,
4534                                                        trace_flags);
4535                 }
4536         }
4537 }
4538
4539 static void test_ftrace_alive(struct seq_file *m)
4540 {
4541         if (!ftrace_is_dead())
4542                 return;
4543         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4544                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4545 }
4546
4547 #ifdef CONFIG_TRACER_MAX_TRACE
4548 static void show_snapshot_main_help(struct seq_file *m)
4549 {
4550         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4551                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4552                     "#                      Takes a snapshot of the main buffer.\n"
4553                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4554                     "#                      (Doesn't have to be '2' works with any number that\n"
4555                     "#                       is not a '0' or '1')\n");
4556 }
4557
4558 static void show_snapshot_percpu_help(struct seq_file *m)
4559 {
4560         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4561 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4562         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4563                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4564 #else
4565         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4566                     "#                     Must use main snapshot file to allocate.\n");
4567 #endif
4568         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4569                     "#                      (Doesn't have to be '2' works with any number that\n"
4570                     "#                       is not a '0' or '1')\n");
4571 }
4572
4573 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4574 {
4575         if (iter->tr->allocated_snapshot)
4576                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4577         else
4578                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4579
4580         seq_puts(m, "# Snapshot commands:\n");
4581         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4582                 show_snapshot_main_help(m);
4583         else
4584                 show_snapshot_percpu_help(m);
4585 }
4586 #else
4587 /* Should never be called */
4588 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4589 #endif
4590
4591 static int s_show(struct seq_file *m, void *v)
4592 {
4593         struct trace_iterator *iter = v;
4594         int ret;
4595
4596         if (iter->ent == NULL) {
4597                 if (iter->tr) {
4598                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4599                         seq_puts(m, "#\n");
4600                         test_ftrace_alive(m);
4601                 }
4602                 if (iter->snapshot && trace_empty(iter))
4603                         print_snapshot_help(m, iter);
4604                 else if (iter->trace && iter->trace->print_header)
4605                         iter->trace->print_header(m);
4606                 else
4607                         trace_default_header(m);
4608
4609         } else if (iter->leftover) {
4610                 /*
4611                  * If we filled the seq_file buffer earlier, we
4612                  * want to just show it now.
4613                  */
4614                 ret = trace_print_seq(m, &iter->seq);
4615
4616                 /* ret should this time be zero, but you never know */
4617                 iter->leftover = ret;
4618
4619         } else {
4620                 print_trace_line(iter);
4621                 ret = trace_print_seq(m, &iter->seq);
4622                 /*
4623                  * If we overflow the seq_file buffer, then it will
4624                  * ask us for this data again at start up.
4625                  * Use that instead.
4626                  *  ret is 0 if seq_file write succeeded.
4627                  *        -1 otherwise.
4628                  */
4629                 iter->leftover = ret;
4630         }
4631
4632         return 0;
4633 }
4634
4635 /*
4636  * Should be used after trace_array_get(), trace_types_lock
4637  * ensures that i_cdev was already initialized.
4638  */
4639 static inline int tracing_get_cpu(struct inode *inode)
4640 {
4641         if (inode->i_cdev) /* See trace_create_cpu_file() */
4642                 return (long)inode->i_cdev - 1;
4643         return RING_BUFFER_ALL_CPUS;
4644 }
4645
4646 static const struct seq_operations tracer_seq_ops = {
4647         .start          = s_start,
4648         .next           = s_next,
4649         .stop           = s_stop,
4650         .show           = s_show,
4651 };
4652
4653 static struct trace_iterator *
4654 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4655 {
4656         struct trace_array *tr = inode->i_private;
4657         struct trace_iterator *iter;
4658         int cpu;
4659
4660         if (tracing_disabled)
4661                 return ERR_PTR(-ENODEV);
4662
4663         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4664         if (!iter)
4665                 return ERR_PTR(-ENOMEM);
4666
4667         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4668                                     GFP_KERNEL);
4669         if (!iter->buffer_iter)
4670                 goto release;
4671
4672         /*
4673          * trace_find_next_entry() may need to save off iter->ent.
4674          * It will place it into the iter->temp buffer. As most
4675          * events are less than 128, allocate a buffer of that size.
4676          * If one is greater, then trace_find_next_entry() will
4677          * allocate a new buffer to adjust for the bigger iter->ent.
4678          * It's not critical if it fails to get allocated here.
4679          */
4680         iter->temp = kmalloc(128, GFP_KERNEL);
4681         if (iter->temp)
4682                 iter->temp_size = 128;
4683
4684         /*
4685          * trace_event_printf() may need to modify given format
4686          * string to replace %p with %px so that it shows real address
4687          * instead of hash value. However, that is only for the event
4688          * tracing, other tracer may not need. Defer the allocation
4689          * until it is needed.
4690          */
4691         iter->fmt = NULL;
4692         iter->fmt_size = 0;
4693
4694         /*
4695          * We make a copy of the current tracer to avoid concurrent
4696          * changes on it while we are reading.
4697          */
4698         mutex_lock(&trace_types_lock);
4699         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4700         if (!iter->trace)
4701                 goto fail;
4702
4703         *iter->trace = *tr->current_trace;
4704
4705         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4706                 goto fail;
4707
4708         iter->tr = tr;
4709
4710 #ifdef CONFIG_TRACER_MAX_TRACE
4711         /* Currently only the top directory has a snapshot */
4712         if (tr->current_trace->print_max || snapshot)
4713                 iter->array_buffer = &tr->max_buffer;
4714         else
4715 #endif
4716                 iter->array_buffer = &tr->array_buffer;
4717         iter->snapshot = snapshot;
4718         iter->pos = -1;
4719         iter->cpu_file = tracing_get_cpu(inode);
4720         mutex_init(&iter->mutex);
4721
4722         /* Notify the tracer early; before we stop tracing. */
4723         if (iter->trace->open)
4724                 iter->trace->open(iter);
4725
4726         /* Annotate start of buffers if we had overruns */
4727         if (ring_buffer_overruns(iter->array_buffer->buffer))
4728                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4729
4730         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4731         if (trace_clocks[tr->clock_id].in_ns)
4732                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4733
4734         /*
4735          * If pause-on-trace is enabled, then stop the trace while
4736          * dumping, unless this is the "snapshot" file
4737          */
4738         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4739                 tracing_stop_tr(tr);
4740
4741         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4742                 for_each_tracing_cpu(cpu) {
4743                         iter->buffer_iter[cpu] =
4744                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4745                                                          cpu, GFP_KERNEL);
4746                 }
4747                 ring_buffer_read_prepare_sync();
4748                 for_each_tracing_cpu(cpu) {
4749                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4750                         tracing_iter_reset(iter, cpu);
4751                 }
4752         } else {
4753                 cpu = iter->cpu_file;
4754                 iter->buffer_iter[cpu] =
4755                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4756                                                  cpu, GFP_KERNEL);
4757                 ring_buffer_read_prepare_sync();
4758                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4759                 tracing_iter_reset(iter, cpu);
4760         }
4761
4762         mutex_unlock(&trace_types_lock);
4763
4764         return iter;
4765
4766  fail:
4767         mutex_unlock(&trace_types_lock);
4768         kfree(iter->trace);
4769         kfree(iter->temp);
4770         kfree(iter->buffer_iter);
4771 release:
4772         seq_release_private(inode, file);
4773         return ERR_PTR(-ENOMEM);
4774 }
4775
4776 int tracing_open_generic(struct inode *inode, struct file *filp)
4777 {
4778         int ret;
4779
4780         ret = tracing_check_open_get_tr(NULL);
4781         if (ret)
4782                 return ret;
4783
4784         filp->private_data = inode->i_private;
4785         return 0;
4786 }
4787
4788 bool tracing_is_disabled(void)
4789 {
4790         return (tracing_disabled) ? true: false;
4791 }
4792
4793 /*
4794  * Open and update trace_array ref count.
4795  * Must have the current trace_array passed to it.
4796  */
4797 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4798 {
4799         struct trace_array *tr = inode->i_private;
4800         int ret;
4801
4802         ret = tracing_check_open_get_tr(tr);
4803         if (ret)
4804                 return ret;
4805
4806         filp->private_data = inode->i_private;
4807
4808         return 0;
4809 }
4810
4811 static int tracing_release(struct inode *inode, struct file *file)
4812 {
4813         struct trace_array *tr = inode->i_private;
4814         struct seq_file *m = file->private_data;
4815         struct trace_iterator *iter;
4816         int cpu;
4817
4818         if (!(file->f_mode & FMODE_READ)) {
4819                 trace_array_put(tr);
4820                 return 0;
4821         }
4822
4823         /* Writes do not use seq_file */
4824         iter = m->private;
4825         mutex_lock(&trace_types_lock);
4826
4827         for_each_tracing_cpu(cpu) {
4828                 if (iter->buffer_iter[cpu])
4829                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4830         }
4831
4832         if (iter->trace && iter->trace->close)
4833                 iter->trace->close(iter);
4834
4835         if (!iter->snapshot && tr->stop_count)
4836                 /* reenable tracing if it was previously enabled */
4837                 tracing_start_tr(tr);
4838
4839         __trace_array_put(tr);
4840
4841         mutex_unlock(&trace_types_lock);
4842
4843         mutex_destroy(&iter->mutex);
4844         free_cpumask_var(iter->started);
4845         kfree(iter->fmt);
4846         kfree(iter->temp);
4847         kfree(iter->trace);
4848         kfree(iter->buffer_iter);
4849         seq_release_private(inode, file);
4850
4851         return 0;
4852 }
4853
4854 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4855 {
4856         struct trace_array *tr = inode->i_private;
4857
4858         trace_array_put(tr);
4859         return 0;
4860 }
4861
4862 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4863 {
4864         struct trace_array *tr = inode->i_private;
4865
4866         trace_array_put(tr);
4867
4868         return single_release(inode, file);
4869 }
4870
4871 static int tracing_open(struct inode *inode, struct file *file)
4872 {
4873         struct trace_array *tr = inode->i_private;
4874         struct trace_iterator *iter;
4875         int ret;
4876
4877         ret = tracing_check_open_get_tr(tr);
4878         if (ret)
4879                 return ret;
4880
4881         /* If this file was open for write, then erase contents */
4882         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4883                 int cpu = tracing_get_cpu(inode);
4884                 struct array_buffer *trace_buf = &tr->array_buffer;
4885
4886 #ifdef CONFIG_TRACER_MAX_TRACE
4887                 if (tr->current_trace->print_max)
4888                         trace_buf = &tr->max_buffer;
4889 #endif
4890
4891                 if (cpu == RING_BUFFER_ALL_CPUS)
4892                         tracing_reset_online_cpus(trace_buf);
4893                 else
4894                         tracing_reset_cpu(trace_buf, cpu);
4895         }
4896
4897         if (file->f_mode & FMODE_READ) {
4898                 iter = __tracing_open(inode, file, false);
4899                 if (IS_ERR(iter))
4900                         ret = PTR_ERR(iter);
4901                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4902                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4903         }
4904
4905         if (ret < 0)
4906                 trace_array_put(tr);
4907
4908         return ret;
4909 }
4910
4911 /*
4912  * Some tracers are not suitable for instance buffers.
4913  * A tracer is always available for the global array (toplevel)
4914  * or if it explicitly states that it is.
4915  */
4916 static bool
4917 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4918 {
4919         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4920 }
4921
4922 /* Find the next tracer that this trace array may use */
4923 static struct tracer *
4924 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4925 {
4926         while (t && !trace_ok_for_array(t, tr))
4927                 t = t->next;
4928
4929         return t;
4930 }
4931
4932 static void *
4933 t_next(struct seq_file *m, void *v, loff_t *pos)
4934 {
4935         struct trace_array *tr = m->private;
4936         struct tracer *t = v;
4937
4938         (*pos)++;
4939
4940         if (t)
4941                 t = get_tracer_for_array(tr, t->next);
4942
4943         return t;
4944 }
4945
4946 static void *t_start(struct seq_file *m, loff_t *pos)
4947 {
4948         struct trace_array *tr = m->private;
4949         struct tracer *t;
4950         loff_t l = 0;
4951
4952         mutex_lock(&trace_types_lock);
4953
4954         t = get_tracer_for_array(tr, trace_types);
4955         for (; t && l < *pos; t = t_next(m, t, &l))
4956                         ;
4957
4958         return t;
4959 }
4960
4961 static void t_stop(struct seq_file *m, void *p)
4962 {
4963         mutex_unlock(&trace_types_lock);
4964 }
4965
4966 static int t_show(struct seq_file *m, void *v)
4967 {
4968         struct tracer *t = v;
4969
4970         if (!t)
4971                 return 0;
4972
4973         seq_puts(m, t->name);
4974         if (t->next)
4975                 seq_putc(m, ' ');
4976         else
4977                 seq_putc(m, '\n');
4978
4979         return 0;
4980 }
4981
4982 static const struct seq_operations show_traces_seq_ops = {
4983         .start          = t_start,
4984         .next           = t_next,
4985         .stop           = t_stop,
4986         .show           = t_show,
4987 };
4988
4989 static int show_traces_open(struct inode *inode, struct file *file)
4990 {
4991         struct trace_array *tr = inode->i_private;
4992         struct seq_file *m;
4993         int ret;
4994
4995         ret = tracing_check_open_get_tr(tr);
4996         if (ret)
4997                 return ret;
4998
4999         ret = seq_open(file, &show_traces_seq_ops);
5000         if (ret) {
5001                 trace_array_put(tr);
5002                 return ret;
5003         }
5004
5005         m = file->private_data;
5006         m->private = tr;
5007
5008         return 0;
5009 }
5010
5011 static int show_traces_release(struct inode *inode, struct file *file)
5012 {
5013         struct trace_array *tr = inode->i_private;
5014
5015         trace_array_put(tr);
5016         return seq_release(inode, file);
5017 }
5018
5019 static ssize_t
5020 tracing_write_stub(struct file *filp, const char __user *ubuf,
5021                    size_t count, loff_t *ppos)
5022 {
5023         return count;
5024 }
5025
5026 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5027 {
5028         int ret;
5029
5030         if (file->f_mode & FMODE_READ)
5031                 ret = seq_lseek(file, offset, whence);
5032         else
5033                 file->f_pos = ret = 0;
5034
5035         return ret;
5036 }
5037
5038 static const struct file_operations tracing_fops = {
5039         .open           = tracing_open,
5040         .read           = seq_read,
5041         .write          = tracing_write_stub,
5042         .llseek         = tracing_lseek,
5043         .release        = tracing_release,
5044 };
5045
5046 static const struct file_operations show_traces_fops = {
5047         .open           = show_traces_open,
5048         .read           = seq_read,
5049         .llseek         = seq_lseek,
5050         .release        = show_traces_release,
5051 };
5052
5053 static ssize_t
5054 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5055                      size_t count, loff_t *ppos)
5056 {
5057         struct trace_array *tr = file_inode(filp)->i_private;
5058         char *mask_str;
5059         int len;
5060
5061         len = snprintf(NULL, 0, "%*pb\n",
5062                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5063         mask_str = kmalloc(len, GFP_KERNEL);
5064         if (!mask_str)
5065                 return -ENOMEM;
5066
5067         len = snprintf(mask_str, len, "%*pb\n",
5068                        cpumask_pr_args(tr->tracing_cpumask));
5069         if (len >= count) {
5070                 count = -EINVAL;
5071                 goto out_err;
5072         }
5073         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5074
5075 out_err:
5076         kfree(mask_str);
5077
5078         return count;
5079 }
5080
5081 int tracing_set_cpumask(struct trace_array *tr,
5082                         cpumask_var_t tracing_cpumask_new)
5083 {
5084         int cpu;
5085
5086         if (!tr)
5087                 return -EINVAL;
5088
5089         local_irq_disable();
5090         arch_spin_lock(&tr->max_lock);
5091         for_each_tracing_cpu(cpu) {
5092                 /*
5093                  * Increase/decrease the disabled counter if we are
5094                  * about to flip a bit in the cpumask:
5095                  */
5096                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5097                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5098                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5099                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5100                 }
5101                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5102                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5103                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5104                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5105                 }
5106         }
5107         arch_spin_unlock(&tr->max_lock);
5108         local_irq_enable();
5109
5110         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5111
5112         return 0;
5113 }
5114
5115 static ssize_t
5116 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5117                       size_t count, loff_t *ppos)
5118 {
5119         struct trace_array *tr = file_inode(filp)->i_private;
5120         cpumask_var_t tracing_cpumask_new;
5121         int err;
5122
5123         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5124                 return -ENOMEM;
5125
5126         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5127         if (err)
5128                 goto err_free;
5129
5130         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5131         if (err)
5132                 goto err_free;
5133
5134         free_cpumask_var(tracing_cpumask_new);
5135
5136         return count;
5137
5138 err_free:
5139         free_cpumask_var(tracing_cpumask_new);
5140
5141         return err;
5142 }
5143
5144 static const struct file_operations tracing_cpumask_fops = {
5145         .open           = tracing_open_generic_tr,
5146         .read           = tracing_cpumask_read,
5147         .write          = tracing_cpumask_write,
5148         .release        = tracing_release_generic_tr,
5149         .llseek         = generic_file_llseek,
5150 };
5151
5152 static int tracing_trace_options_show(struct seq_file *m, void *v)
5153 {
5154         struct tracer_opt *trace_opts;
5155         struct trace_array *tr = m->private;
5156         u32 tracer_flags;
5157         int i;
5158
5159         mutex_lock(&trace_types_lock);
5160         tracer_flags = tr->current_trace->flags->val;
5161         trace_opts = tr->current_trace->flags->opts;
5162
5163         for (i = 0; trace_options[i]; i++) {
5164                 if (tr->trace_flags & (1 << i))
5165                         seq_printf(m, "%s\n", trace_options[i]);
5166                 else
5167                         seq_printf(m, "no%s\n", trace_options[i]);
5168         }
5169
5170         for (i = 0; trace_opts[i].name; i++) {
5171                 if (tracer_flags & trace_opts[i].bit)
5172                         seq_printf(m, "%s\n", trace_opts[i].name);
5173                 else
5174                         seq_printf(m, "no%s\n", trace_opts[i].name);
5175         }
5176         mutex_unlock(&trace_types_lock);
5177
5178         return 0;
5179 }
5180
5181 static int __set_tracer_option(struct trace_array *tr,
5182                                struct tracer_flags *tracer_flags,
5183                                struct tracer_opt *opts, int neg)
5184 {
5185         struct tracer *trace = tracer_flags->trace;
5186         int ret;
5187
5188         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5189         if (ret)
5190                 return ret;
5191
5192         if (neg)
5193                 tracer_flags->val &= ~opts->bit;
5194         else
5195                 tracer_flags->val |= opts->bit;
5196         return 0;
5197 }
5198
5199 /* Try to assign a tracer specific option */
5200 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5201 {
5202         struct tracer *trace = tr->current_trace;
5203         struct tracer_flags *tracer_flags = trace->flags;
5204         struct tracer_opt *opts = NULL;
5205         int i;
5206
5207         for (i = 0; tracer_flags->opts[i].name; i++) {
5208                 opts = &tracer_flags->opts[i];
5209
5210                 if (strcmp(cmp, opts->name) == 0)
5211                         return __set_tracer_option(tr, trace->flags, opts, neg);
5212         }
5213
5214         return -EINVAL;
5215 }
5216
5217 /* Some tracers require overwrite to stay enabled */
5218 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5219 {
5220         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5221                 return -1;
5222
5223         return 0;
5224 }
5225
5226 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5227 {
5228         if ((mask == TRACE_ITER_RECORD_TGID) ||
5229             (mask == TRACE_ITER_RECORD_CMD))
5230                 lockdep_assert_held(&event_mutex);
5231
5232         /* do nothing if flag is already set */
5233         if (!!(tr->trace_flags & mask) == !!enabled)
5234                 return 0;
5235
5236         /* Give the tracer a chance to approve the change */
5237         if (tr->current_trace->flag_changed)
5238                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5239                         return -EINVAL;
5240
5241         if (enabled)
5242                 tr->trace_flags |= mask;
5243         else
5244                 tr->trace_flags &= ~mask;
5245
5246         if (mask == TRACE_ITER_RECORD_CMD)
5247                 trace_event_enable_cmd_record(enabled);
5248
5249         if (mask == TRACE_ITER_RECORD_TGID) {
5250                 if (!tgid_map)
5251                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5252                                            sizeof(*tgid_map),
5253                                            GFP_KERNEL);
5254                 if (!tgid_map) {
5255                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5256                         return -ENOMEM;
5257                 }
5258
5259                 trace_event_enable_tgid_record(enabled);
5260         }
5261
5262         if (mask == TRACE_ITER_EVENT_FORK)
5263                 trace_event_follow_fork(tr, enabled);
5264
5265         if (mask == TRACE_ITER_FUNC_FORK)
5266                 ftrace_pid_follow_fork(tr, enabled);
5267
5268         if (mask == TRACE_ITER_OVERWRITE) {
5269                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5270 #ifdef CONFIG_TRACER_MAX_TRACE
5271                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5272 #endif
5273         }
5274
5275         if (mask == TRACE_ITER_PRINTK) {
5276                 trace_printk_start_stop_comm(enabled);
5277                 trace_printk_control(enabled);
5278         }
5279
5280         return 0;
5281 }
5282
5283 int trace_set_options(struct trace_array *tr, char *option)
5284 {
5285         char *cmp;
5286         int neg = 0;
5287         int ret;
5288         size_t orig_len = strlen(option);
5289         int len;
5290
5291         cmp = strstrip(option);
5292
5293         len = str_has_prefix(cmp, "no");
5294         if (len)
5295                 neg = 1;
5296
5297         cmp += len;
5298
5299         mutex_lock(&event_mutex);
5300         mutex_lock(&trace_types_lock);
5301
5302         ret = match_string(trace_options, -1, cmp);
5303         /* If no option could be set, test the specific tracer options */
5304         if (ret < 0)
5305                 ret = set_tracer_option(tr, cmp, neg);
5306         else
5307                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5308
5309         mutex_unlock(&trace_types_lock);
5310         mutex_unlock(&event_mutex);
5311
5312         /*
5313          * If the first trailing whitespace is replaced with '\0' by strstrip,
5314          * turn it back into a space.
5315          */
5316         if (orig_len > strlen(option))
5317                 option[strlen(option)] = ' ';
5318
5319         return ret;
5320 }
5321
5322 static void __init apply_trace_boot_options(void)
5323 {
5324         char *buf = trace_boot_options_buf;
5325         char *option;
5326
5327         while (true) {
5328                 option = strsep(&buf, ",");
5329
5330                 if (!option)
5331                         break;
5332
5333                 if (*option)
5334                         trace_set_options(&global_trace, option);
5335
5336                 /* Put back the comma to allow this to be called again */
5337                 if (buf)
5338                         *(buf - 1) = ',';
5339         }
5340 }
5341
5342 static ssize_t
5343 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5344                         size_t cnt, loff_t *ppos)
5345 {
5346         struct seq_file *m = filp->private_data;
5347         struct trace_array *tr = m->private;
5348         char buf[64];
5349         int ret;
5350
5351         if (cnt >= sizeof(buf))
5352                 return -EINVAL;
5353
5354         if (copy_from_user(buf, ubuf, cnt))
5355                 return -EFAULT;
5356
5357         buf[cnt] = 0;
5358
5359         ret = trace_set_options(tr, buf);
5360         if (ret < 0)
5361                 return ret;
5362
5363         *ppos += cnt;
5364
5365         return cnt;
5366 }
5367
5368 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5369 {
5370         struct trace_array *tr = inode->i_private;
5371         int ret;
5372
5373         ret = tracing_check_open_get_tr(tr);
5374         if (ret)
5375                 return ret;
5376
5377         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5378         if (ret < 0)
5379                 trace_array_put(tr);
5380
5381         return ret;
5382 }
5383
5384 static const struct file_operations tracing_iter_fops = {
5385         .open           = tracing_trace_options_open,
5386         .read           = seq_read,
5387         .llseek         = seq_lseek,
5388         .release        = tracing_single_release_tr,
5389         .write          = tracing_trace_options_write,
5390 };
5391
5392 static const char readme_msg[] =
5393         "tracing mini-HOWTO:\n\n"
5394         "# echo 0 > tracing_on : quick way to disable tracing\n"
5395         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5396         " Important files:\n"
5397         "  trace\t\t\t- The static contents of the buffer\n"
5398         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5399         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5400         "  current_tracer\t- function and latency tracers\n"
5401         "  available_tracers\t- list of configured tracers for current_tracer\n"
5402         "  error_log\t- error log for failed commands (that support it)\n"
5403         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5404         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5405         "  trace_clock\t\t-change the clock used to order events\n"
5406         "       local:   Per cpu clock but may not be synced across CPUs\n"
5407         "      global:   Synced across CPUs but slows tracing down.\n"
5408         "     counter:   Not a clock, but just an increment\n"
5409         "      uptime:   Jiffy counter from time of boot\n"
5410         "        perf:   Same clock that perf events use\n"
5411 #ifdef CONFIG_X86_64
5412         "     x86-tsc:   TSC cycle counter\n"
5413 #endif
5414         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5415         "       delta:   Delta difference against a buffer-wide timestamp\n"
5416         "    absolute:   Absolute (standalone) timestamp\n"
5417         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5418         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5419         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5420         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5421         "\t\t\t  Remove sub-buffer with rmdir\n"
5422         "  trace_options\t\t- Set format or modify how tracing happens\n"
5423         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5424         "\t\t\t  option name\n"
5425         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5426 #ifdef CONFIG_DYNAMIC_FTRACE
5427         "\n  available_filter_functions - list of functions that can be filtered on\n"
5428         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5429         "\t\t\t  functions\n"
5430         "\t     accepts: func_full_name or glob-matching-pattern\n"
5431         "\t     modules: Can select a group via module\n"
5432         "\t      Format: :mod:<module-name>\n"
5433         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5434         "\t    triggers: a command to perform when function is hit\n"
5435         "\t      Format: <function>:<trigger>[:count]\n"
5436         "\t     trigger: traceon, traceoff\n"
5437         "\t\t      enable_event:<system>:<event>\n"
5438         "\t\t      disable_event:<system>:<event>\n"
5439 #ifdef CONFIG_STACKTRACE
5440         "\t\t      stacktrace\n"
5441 #endif
5442 #ifdef CONFIG_TRACER_SNAPSHOT
5443         "\t\t      snapshot\n"
5444 #endif
5445         "\t\t      dump\n"
5446         "\t\t      cpudump\n"
5447         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5448         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5449         "\t     The first one will disable tracing every time do_fault is hit\n"
5450         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5451         "\t       The first time do trap is hit and it disables tracing, the\n"
5452         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5453         "\t       the counter will not decrement. It only decrements when the\n"
5454         "\t       trigger did work\n"
5455         "\t     To remove trigger without count:\n"
5456         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5457         "\t     To remove trigger with a count:\n"
5458         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5459         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5460         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5461         "\t    modules: Can select a group via module command :mod:\n"
5462         "\t    Does not accept triggers\n"
5463 #endif /* CONFIG_DYNAMIC_FTRACE */
5464 #ifdef CONFIG_FUNCTION_TRACER
5465         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5466         "\t\t    (function)\n"
5467         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5468         "\t\t    (function)\n"
5469 #endif
5470 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5471         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5472         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5473         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5474 #endif
5475 #ifdef CONFIG_TRACER_SNAPSHOT
5476         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5477         "\t\t\t  snapshot buffer. Read the contents for more\n"
5478         "\t\t\t  information\n"
5479 #endif
5480 #ifdef CONFIG_STACK_TRACER
5481         "  stack_trace\t\t- Shows the max stack trace when active\n"
5482         "  stack_max_size\t- Shows current max stack size that was traced\n"
5483         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5484         "\t\t\t  new trace)\n"
5485 #ifdef CONFIG_DYNAMIC_FTRACE
5486         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5487         "\t\t\t  traces\n"
5488 #endif
5489 #endif /* CONFIG_STACK_TRACER */
5490 #ifdef CONFIG_DYNAMIC_EVENTS
5491         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5492         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5493 #endif
5494 #ifdef CONFIG_KPROBE_EVENTS
5495         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5496         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5497 #endif
5498 #ifdef CONFIG_UPROBE_EVENTS
5499         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5500         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5501 #endif
5502 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5503         "\t  accepts: event-definitions (one definition per line)\n"
5504         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5505         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5506 #ifdef CONFIG_HIST_TRIGGERS
5507         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5508 #endif
5509         "\t           -:[<group>/]<event>\n"
5510 #ifdef CONFIG_KPROBE_EVENTS
5511         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5512   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5513 #endif
5514 #ifdef CONFIG_UPROBE_EVENTS
5515   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5516 #endif
5517         "\t     args: <name>=fetcharg[:type]\n"
5518         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5519 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5520         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5521 #else
5522         "\t           $stack<index>, $stack, $retval, $comm,\n"
5523 #endif
5524         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5525         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5526         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5527         "\t           <type>\\[<array-size>\\]\n"
5528 #ifdef CONFIG_HIST_TRIGGERS
5529         "\t    field: <stype> <name>;\n"
5530         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5531         "\t           [unsigned] char/int/long\n"
5532 #endif
5533 #endif
5534         "  events/\t\t- Directory containing all trace event subsystems:\n"
5535         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5536         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5537         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5538         "\t\t\t  events\n"
5539         "      filter\t\t- If set, only events passing filter are traced\n"
5540         "  events/<system>/<event>/\t- Directory containing control files for\n"
5541         "\t\t\t  <event>:\n"
5542         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5543         "      filter\t\t- If set, only events passing filter are traced\n"
5544         "      trigger\t\t- If set, a command to perform when event is hit\n"
5545         "\t    Format: <trigger>[:count][if <filter>]\n"
5546         "\t   trigger: traceon, traceoff\n"
5547         "\t            enable_event:<system>:<event>\n"
5548         "\t            disable_event:<system>:<event>\n"
5549 #ifdef CONFIG_HIST_TRIGGERS
5550         "\t            enable_hist:<system>:<event>\n"
5551         "\t            disable_hist:<system>:<event>\n"
5552 #endif
5553 #ifdef CONFIG_STACKTRACE
5554         "\t\t    stacktrace\n"
5555 #endif
5556 #ifdef CONFIG_TRACER_SNAPSHOT
5557         "\t\t    snapshot\n"
5558 #endif
5559 #ifdef CONFIG_HIST_TRIGGERS
5560         "\t\t    hist (see below)\n"
5561 #endif
5562         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5563         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5564         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5565         "\t                  events/block/block_unplug/trigger\n"
5566         "\t   The first disables tracing every time block_unplug is hit.\n"
5567         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5568         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5569         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5570         "\t   Like function triggers, the counter is only decremented if it\n"
5571         "\t    enabled or disabled tracing.\n"
5572         "\t   To remove a trigger without a count:\n"
5573         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5574         "\t   To remove a trigger with a count:\n"
5575         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5576         "\t   Filters can be ignored when removing a trigger.\n"
5577 #ifdef CONFIG_HIST_TRIGGERS
5578         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5579         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5580         "\t            [:values=<field1[,field2,...]>]\n"
5581         "\t            [:sort=<field1[,field2,...]>]\n"
5582         "\t            [:size=#entries]\n"
5583         "\t            [:pause][:continue][:clear]\n"
5584         "\t            [:name=histname1]\n"
5585         "\t            [:<handler>.<action>]\n"
5586         "\t            [if <filter>]\n\n"
5587         "\t    When a matching event is hit, an entry is added to a hash\n"
5588         "\t    table using the key(s) and value(s) named, and the value of a\n"
5589         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5590         "\t    correspond to fields in the event's format description.  Keys\n"
5591         "\t    can be any field, or the special string 'stacktrace'.\n"
5592         "\t    Compound keys consisting of up to two fields can be specified\n"
5593         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5594         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5595         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5596         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5597         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5598         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5599         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5600         "\t    its histogram data will be shared with other triggers of the\n"
5601         "\t    same name, and trigger hits will update this common data.\n\n"
5602         "\t    Reading the 'hist' file for the event will dump the hash\n"
5603         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5604         "\t    triggers attached to an event, there will be a table for each\n"
5605         "\t    trigger in the output.  The table displayed for a named\n"
5606         "\t    trigger will be the same as any other instance having the\n"
5607         "\t    same name.  The default format used to display a given field\n"
5608         "\t    can be modified by appending any of the following modifiers\n"
5609         "\t    to the field name, as applicable:\n\n"
5610         "\t            .hex        display a number as a hex value\n"
5611         "\t            .sym        display an address as a symbol\n"
5612         "\t            .sym-offset display an address as a symbol and offset\n"
5613         "\t            .execname   display a common_pid as a program name\n"
5614         "\t            .syscall    display a syscall id as a syscall name\n"
5615         "\t            .log2       display log2 value rather than raw number\n"
5616         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5617         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5618         "\t    trigger or to start a hist trigger but not log any events\n"
5619         "\t    until told to do so.  'continue' can be used to start or\n"
5620         "\t    restart a paused hist trigger.\n\n"
5621         "\t    The 'clear' parameter will clear the contents of a running\n"
5622         "\t    hist trigger and leave its current paused/active state\n"
5623         "\t    unchanged.\n\n"
5624         "\t    The enable_hist and disable_hist triggers can be used to\n"
5625         "\t    have one event conditionally start and stop another event's\n"
5626         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5627         "\t    the enable_event and disable_event triggers.\n\n"
5628         "\t    Hist trigger handlers and actions are executed whenever a\n"
5629         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5630         "\t        <handler>.<action>\n\n"
5631         "\t    The available handlers are:\n\n"
5632         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5633         "\t        onmax(var)               - invoke if var exceeds current max\n"
5634         "\t        onchange(var)            - invoke action if var changes\n\n"
5635         "\t    The available actions are:\n\n"
5636         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5637         "\t        save(field,...)                      - save current event fields\n"
5638 #ifdef CONFIG_TRACER_SNAPSHOT
5639         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5640 #endif
5641 #ifdef CONFIG_SYNTH_EVENTS
5642         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5643         "\t  Write into this file to define/undefine new synthetic events.\n"
5644         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5645 #endif
5646 #endif
5647 ;
5648
5649 static ssize_t
5650 tracing_readme_read(struct file *filp, char __user *ubuf,
5651                        size_t cnt, loff_t *ppos)
5652 {
5653         return simple_read_from_buffer(ubuf, cnt, ppos,
5654                                         readme_msg, strlen(readme_msg));
5655 }
5656
5657 static const struct file_operations tracing_readme_fops = {
5658         .open           = tracing_open_generic,
5659         .read           = tracing_readme_read,
5660         .llseek         = generic_file_llseek,
5661 };
5662
5663 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5664 {
5665         int *ptr = v;
5666
5667         if (*pos || m->count)
5668                 ptr++;
5669
5670         (*pos)++;
5671
5672         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5673                 if (trace_find_tgid(*ptr))
5674                         return ptr;
5675         }
5676
5677         return NULL;
5678 }
5679
5680 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5681 {
5682         void *v;
5683         loff_t l = 0;
5684
5685         if (!tgid_map)
5686                 return NULL;
5687
5688         v = &tgid_map[0];
5689         while (l <= *pos) {
5690                 v = saved_tgids_next(m, v, &l);
5691                 if (!v)
5692                         return NULL;
5693         }
5694
5695         return v;
5696 }
5697
5698 static void saved_tgids_stop(struct seq_file *m, void *v)
5699 {
5700 }
5701
5702 static int saved_tgids_show(struct seq_file *m, void *v)
5703 {
5704         int pid = (int *)v - tgid_map;
5705
5706         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5707         return 0;
5708 }
5709
5710 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5711         .start          = saved_tgids_start,
5712         .stop           = saved_tgids_stop,
5713         .next           = saved_tgids_next,
5714         .show           = saved_tgids_show,
5715 };
5716
5717 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5718 {
5719         int ret;
5720
5721         ret = tracing_check_open_get_tr(NULL);
5722         if (ret)
5723                 return ret;
5724
5725         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5726 }
5727
5728
5729 static const struct file_operations tracing_saved_tgids_fops = {
5730         .open           = tracing_saved_tgids_open,
5731         .read           = seq_read,
5732         .llseek         = seq_lseek,
5733         .release        = seq_release,
5734 };
5735
5736 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5737 {
5738         unsigned int *ptr = v;
5739
5740         if (*pos || m->count)
5741                 ptr++;
5742
5743         (*pos)++;
5744
5745         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5746              ptr++) {
5747                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5748                         continue;
5749
5750                 return ptr;
5751         }
5752
5753         return NULL;
5754 }
5755
5756 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5757 {
5758         void *v;
5759         loff_t l = 0;
5760
5761         preempt_disable();
5762         arch_spin_lock(&trace_cmdline_lock);
5763
5764         v = &savedcmd->map_cmdline_to_pid[0];
5765         while (l <= *pos) {
5766                 v = saved_cmdlines_next(m, v, &l);
5767                 if (!v)
5768                         return NULL;
5769         }
5770
5771         return v;
5772 }
5773
5774 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5775 {
5776         arch_spin_unlock(&trace_cmdline_lock);
5777         preempt_enable();
5778 }
5779
5780 static int saved_cmdlines_show(struct seq_file *m, void *v)
5781 {
5782         char buf[TASK_COMM_LEN];
5783         unsigned int *pid = v;
5784
5785         __trace_find_cmdline(*pid, buf);
5786         seq_printf(m, "%d %s\n", *pid, buf);
5787         return 0;
5788 }
5789
5790 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5791         .start          = saved_cmdlines_start,
5792         .next           = saved_cmdlines_next,
5793         .stop           = saved_cmdlines_stop,
5794         .show           = saved_cmdlines_show,
5795 };
5796
5797 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5798 {
5799         int ret;
5800
5801         ret = tracing_check_open_get_tr(NULL);
5802         if (ret)
5803                 return ret;
5804
5805         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5806 }
5807
5808 static const struct file_operations tracing_saved_cmdlines_fops = {
5809         .open           = tracing_saved_cmdlines_open,
5810         .read           = seq_read,
5811         .llseek         = seq_lseek,
5812         .release        = seq_release,
5813 };
5814
5815 static ssize_t
5816 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5817                                  size_t cnt, loff_t *ppos)
5818 {
5819         char buf[64];
5820         int r;
5821
5822         arch_spin_lock(&trace_cmdline_lock);
5823         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5824         arch_spin_unlock(&trace_cmdline_lock);
5825
5826         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5827 }
5828
5829 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5830 {
5831         kfree(s->saved_cmdlines);
5832         kfree(s->map_cmdline_to_pid);
5833         kfree(s);
5834 }
5835
5836 static int tracing_resize_saved_cmdlines(unsigned int val)
5837 {
5838         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5839
5840         s = kmalloc(sizeof(*s), GFP_KERNEL);
5841         if (!s)
5842                 return -ENOMEM;
5843
5844         if (allocate_cmdlines_buffer(val, s) < 0) {
5845                 kfree(s);
5846                 return -ENOMEM;
5847         }
5848
5849         arch_spin_lock(&trace_cmdline_lock);
5850         savedcmd_temp = savedcmd;
5851         savedcmd = s;
5852         arch_spin_unlock(&trace_cmdline_lock);
5853         free_saved_cmdlines_buffer(savedcmd_temp);
5854
5855         return 0;
5856 }
5857
5858 static ssize_t
5859 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5860                                   size_t cnt, loff_t *ppos)
5861 {
5862         unsigned long val;
5863         int ret;
5864
5865         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5866         if (ret)
5867                 return ret;
5868
5869         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5870         if (!val || val > PID_MAX_DEFAULT)
5871                 return -EINVAL;
5872
5873         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5874         if (ret < 0)
5875                 return ret;
5876
5877         *ppos += cnt;
5878
5879         return cnt;
5880 }
5881
5882 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5883         .open           = tracing_open_generic,
5884         .read           = tracing_saved_cmdlines_size_read,
5885         .write          = tracing_saved_cmdlines_size_write,
5886 };
5887
5888 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5889 static union trace_eval_map_item *
5890 update_eval_map(union trace_eval_map_item *ptr)
5891 {
5892         if (!ptr->map.eval_string) {
5893                 if (ptr->tail.next) {
5894                         ptr = ptr->tail.next;
5895                         /* Set ptr to the next real item (skip head) */
5896                         ptr++;
5897                 } else
5898                         return NULL;
5899         }
5900         return ptr;
5901 }
5902
5903 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5904 {
5905         union trace_eval_map_item *ptr = v;
5906
5907         /*
5908          * Paranoid! If ptr points to end, we don't want to increment past it.
5909          * This really should never happen.
5910          */
5911         (*pos)++;
5912         ptr = update_eval_map(ptr);
5913         if (WARN_ON_ONCE(!ptr))
5914                 return NULL;
5915
5916         ptr++;
5917         ptr = update_eval_map(ptr);
5918
5919         return ptr;
5920 }
5921
5922 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5923 {
5924         union trace_eval_map_item *v;
5925         loff_t l = 0;
5926
5927         mutex_lock(&trace_eval_mutex);
5928
5929         v = trace_eval_maps;
5930         if (v)
5931                 v++;
5932
5933         while (v && l < *pos) {
5934                 v = eval_map_next(m, v, &l);
5935         }
5936
5937         return v;
5938 }
5939
5940 static void eval_map_stop(struct seq_file *m, void *v)
5941 {
5942         mutex_unlock(&trace_eval_mutex);
5943 }
5944
5945 static int eval_map_show(struct seq_file *m, void *v)
5946 {
5947         union trace_eval_map_item *ptr = v;
5948
5949         seq_printf(m, "%s %ld (%s)\n",
5950                    ptr->map.eval_string, ptr->map.eval_value,
5951                    ptr->map.system);
5952
5953         return 0;
5954 }
5955
5956 static const struct seq_operations tracing_eval_map_seq_ops = {
5957         .start          = eval_map_start,
5958         .next           = eval_map_next,
5959         .stop           = eval_map_stop,
5960         .show           = eval_map_show,
5961 };
5962
5963 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5964 {
5965         int ret;
5966
5967         ret = tracing_check_open_get_tr(NULL);
5968         if (ret)
5969                 return ret;
5970
5971         return seq_open(filp, &tracing_eval_map_seq_ops);
5972 }
5973
5974 static const struct file_operations tracing_eval_map_fops = {
5975         .open           = tracing_eval_map_open,
5976         .read           = seq_read,
5977         .llseek         = seq_lseek,
5978         .release        = seq_release,
5979 };
5980
5981 static inline union trace_eval_map_item *
5982 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5983 {
5984         /* Return tail of array given the head */
5985         return ptr + ptr->head.length + 1;
5986 }
5987
5988 static void
5989 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5990                            int len)
5991 {
5992         struct trace_eval_map **stop;
5993         struct trace_eval_map **map;
5994         union trace_eval_map_item *map_array;
5995         union trace_eval_map_item *ptr;
5996
5997         stop = start + len;
5998
5999         /*
6000          * The trace_eval_maps contains the map plus a head and tail item,
6001          * where the head holds the module and length of array, and the
6002          * tail holds a pointer to the next list.
6003          */
6004         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6005         if (!map_array) {
6006                 pr_warn("Unable to allocate trace eval mapping\n");
6007                 return;
6008         }
6009
6010         mutex_lock(&trace_eval_mutex);
6011
6012         if (!trace_eval_maps)
6013                 trace_eval_maps = map_array;
6014         else {
6015                 ptr = trace_eval_maps;
6016                 for (;;) {
6017                         ptr = trace_eval_jmp_to_tail(ptr);
6018                         if (!ptr->tail.next)
6019                                 break;
6020                         ptr = ptr->tail.next;
6021
6022                 }
6023                 ptr->tail.next = map_array;
6024         }
6025         map_array->head.mod = mod;
6026         map_array->head.length = len;
6027         map_array++;
6028
6029         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6030                 map_array->map = **map;
6031                 map_array++;
6032         }
6033         memset(map_array, 0, sizeof(*map_array));
6034
6035         mutex_unlock(&trace_eval_mutex);
6036 }
6037
6038 static void trace_create_eval_file(struct dentry *d_tracer)
6039 {
6040         trace_create_file("eval_map", 0444, d_tracer,
6041                           NULL, &tracing_eval_map_fops);
6042 }
6043
6044 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6045 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6046 static inline void trace_insert_eval_map_file(struct module *mod,
6047                               struct trace_eval_map **start, int len) { }
6048 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6049
6050 static void trace_insert_eval_map(struct module *mod,
6051                                   struct trace_eval_map **start, int len)
6052 {
6053         struct trace_eval_map **map;
6054
6055         if (len <= 0)
6056                 return;
6057
6058         map = start;
6059
6060         trace_event_eval_update(map, len);
6061
6062         trace_insert_eval_map_file(mod, start, len);
6063 }
6064
6065 static ssize_t
6066 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6067                        size_t cnt, loff_t *ppos)
6068 {
6069         struct trace_array *tr = filp->private_data;
6070         char buf[MAX_TRACER_SIZE+2];
6071         int r;
6072
6073         mutex_lock(&trace_types_lock);
6074         r = sprintf(buf, "%s\n", tr->current_trace->name);
6075         mutex_unlock(&trace_types_lock);
6076
6077         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6078 }
6079
6080 int tracer_init(struct tracer *t, struct trace_array *tr)
6081 {
6082         tracing_reset_online_cpus(&tr->array_buffer);
6083         return t->init(tr);
6084 }
6085
6086 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6087 {
6088         int cpu;
6089
6090         for_each_tracing_cpu(cpu)
6091                 per_cpu_ptr(buf->data, cpu)->entries = val;
6092 }
6093
6094 #ifdef CONFIG_TRACER_MAX_TRACE
6095 /* resize @tr's buffer to the size of @size_tr's entries */
6096 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6097                                         struct array_buffer *size_buf, int cpu_id)
6098 {
6099         int cpu, ret = 0;
6100
6101         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6102                 for_each_tracing_cpu(cpu) {
6103                         ret = ring_buffer_resize(trace_buf->buffer,
6104                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6105                         if (ret < 0)
6106                                 break;
6107                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6108                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6109                 }
6110         } else {
6111                 ret = ring_buffer_resize(trace_buf->buffer,
6112                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6113                 if (ret == 0)
6114                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6115                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6116         }
6117
6118         return ret;
6119 }
6120 #endif /* CONFIG_TRACER_MAX_TRACE */
6121
6122 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6123                                         unsigned long size, int cpu)
6124 {
6125         int ret;
6126
6127         /*
6128          * If kernel or user changes the size of the ring buffer
6129          * we use the size that was given, and we can forget about
6130          * expanding it later.
6131          */
6132         ring_buffer_expanded = true;
6133
6134         /* May be called before buffers are initialized */
6135         if (!tr->array_buffer.buffer)
6136                 return 0;
6137
6138         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6139         if (ret < 0)
6140                 return ret;
6141
6142 #ifdef CONFIG_TRACER_MAX_TRACE
6143         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6144             !tr->current_trace->use_max_tr)
6145                 goto out;
6146
6147         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6148         if (ret < 0) {
6149                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6150                                                      &tr->array_buffer, cpu);
6151                 if (r < 0) {
6152                         /*
6153                          * AARGH! We are left with different
6154                          * size max buffer!!!!
6155                          * The max buffer is our "snapshot" buffer.
6156                          * When a tracer needs a snapshot (one of the
6157                          * latency tracers), it swaps the max buffer
6158                          * with the saved snap shot. We succeeded to
6159                          * update the size of the main buffer, but failed to
6160                          * update the size of the max buffer. But when we tried
6161                          * to reset the main buffer to the original size, we
6162                          * failed there too. This is very unlikely to
6163                          * happen, but if it does, warn and kill all
6164                          * tracing.
6165                          */
6166                         WARN_ON(1);
6167                         tracing_disabled = 1;
6168                 }
6169                 return ret;
6170         }
6171
6172         if (cpu == RING_BUFFER_ALL_CPUS)
6173                 set_buffer_entries(&tr->max_buffer, size);
6174         else
6175                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6176
6177  out:
6178 #endif /* CONFIG_TRACER_MAX_TRACE */
6179
6180         if (cpu == RING_BUFFER_ALL_CPUS)
6181                 set_buffer_entries(&tr->array_buffer, size);
6182         else
6183                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6184
6185         return ret;
6186 }
6187
6188 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6189                                   unsigned long size, int cpu_id)
6190 {
6191         int ret;
6192
6193         mutex_lock(&trace_types_lock);
6194
6195         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6196                 /* make sure, this cpu is enabled in the mask */
6197                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6198                         ret = -EINVAL;
6199                         goto out;
6200                 }
6201         }
6202
6203         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6204         if (ret < 0)
6205                 ret = -ENOMEM;
6206
6207 out:
6208         mutex_unlock(&trace_types_lock);
6209
6210         return ret;
6211 }
6212
6213
6214 /**
6215  * tracing_update_buffers - used by tracing facility to expand ring buffers
6216  *
6217  * To save on memory when the tracing is never used on a system with it
6218  * configured in. The ring buffers are set to a minimum size. But once
6219  * a user starts to use the tracing facility, then they need to grow
6220  * to their default size.
6221  *
6222  * This function is to be called when a tracer is about to be used.
6223  */
6224 int tracing_update_buffers(void)
6225 {
6226         int ret = 0;
6227
6228         mutex_lock(&trace_types_lock);
6229         if (!ring_buffer_expanded)
6230                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6231                                                 RING_BUFFER_ALL_CPUS);
6232         mutex_unlock(&trace_types_lock);
6233
6234         return ret;
6235 }
6236
6237 struct trace_option_dentry;
6238
6239 static void
6240 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6241
6242 /*
6243  * Used to clear out the tracer before deletion of an instance.
6244  * Must have trace_types_lock held.
6245  */
6246 static void tracing_set_nop(struct trace_array *tr)
6247 {
6248         if (tr->current_trace == &nop_trace)
6249                 return;
6250         
6251         tr->current_trace->enabled--;
6252
6253         if (tr->current_trace->reset)
6254                 tr->current_trace->reset(tr);
6255
6256         tr->current_trace = &nop_trace;
6257 }
6258
6259 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6260 {
6261         /* Only enable if the directory has been created already. */
6262         if (!tr->dir)
6263                 return;
6264
6265         create_trace_option_files(tr, t);
6266 }
6267
6268 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6269 {
6270         struct tracer *t;
6271 #ifdef CONFIG_TRACER_MAX_TRACE
6272         bool had_max_tr;
6273 #endif
6274         int ret = 0;
6275
6276         mutex_lock(&trace_types_lock);
6277
6278         if (!ring_buffer_expanded) {
6279                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6280                                                 RING_BUFFER_ALL_CPUS);
6281                 if (ret < 0)
6282                         goto out;
6283                 ret = 0;
6284         }
6285
6286         for (t = trace_types; t; t = t->next) {
6287                 if (strcmp(t->name, buf) == 0)
6288                         break;
6289         }
6290         if (!t) {
6291                 ret = -EINVAL;
6292                 goto out;
6293         }
6294         if (t == tr->current_trace)
6295                 goto out;
6296
6297 #ifdef CONFIG_TRACER_SNAPSHOT
6298         if (t->use_max_tr) {
6299                 arch_spin_lock(&tr->max_lock);
6300                 if (tr->cond_snapshot)
6301                         ret = -EBUSY;
6302                 arch_spin_unlock(&tr->max_lock);
6303                 if (ret)
6304                         goto out;
6305         }
6306 #endif
6307         /* Some tracers won't work on kernel command line */
6308         if (system_state < SYSTEM_RUNNING && t->noboot) {
6309                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6310                         t->name);
6311                 goto out;
6312         }
6313
6314         /* Some tracers are only allowed for the top level buffer */
6315         if (!trace_ok_for_array(t, tr)) {
6316                 ret = -EINVAL;
6317                 goto out;
6318         }
6319
6320         /* If trace pipe files are being read, we can't change the tracer */
6321         if (tr->trace_ref) {
6322                 ret = -EBUSY;
6323                 goto out;
6324         }
6325
6326         trace_branch_disable();
6327
6328         tr->current_trace->enabled--;
6329
6330         if (tr->current_trace->reset)
6331                 tr->current_trace->reset(tr);
6332
6333         /* Current trace needs to be nop_trace before synchronize_rcu */
6334         tr->current_trace = &nop_trace;
6335
6336 #ifdef CONFIG_TRACER_MAX_TRACE
6337         had_max_tr = tr->allocated_snapshot;
6338
6339         if (had_max_tr && !t->use_max_tr) {
6340                 /*
6341                  * We need to make sure that the update_max_tr sees that
6342                  * current_trace changed to nop_trace to keep it from
6343                  * swapping the buffers after we resize it.
6344                  * The update_max_tr is called from interrupts disabled
6345                  * so a synchronized_sched() is sufficient.
6346                  */
6347                 synchronize_rcu();
6348                 free_snapshot(tr);
6349         }
6350 #endif
6351
6352 #ifdef CONFIG_TRACER_MAX_TRACE
6353         if (t->use_max_tr && !had_max_tr) {
6354                 ret = tracing_alloc_snapshot_instance(tr);
6355                 if (ret < 0)
6356                         goto out;
6357         }
6358 #endif
6359
6360         if (t->init) {
6361                 ret = tracer_init(t, tr);
6362                 if (ret)
6363                         goto out;
6364         }
6365
6366         tr->current_trace = t;
6367         tr->current_trace->enabled++;
6368         trace_branch_enable(tr);
6369  out:
6370         mutex_unlock(&trace_types_lock);
6371
6372         return ret;
6373 }
6374
6375 static ssize_t
6376 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6377                         size_t cnt, loff_t *ppos)
6378 {
6379         struct trace_array *tr = filp->private_data;
6380         char buf[MAX_TRACER_SIZE+1];
6381         int i;
6382         size_t ret;
6383         int err;
6384
6385         ret = cnt;
6386
6387         if (cnt > MAX_TRACER_SIZE)
6388                 cnt = MAX_TRACER_SIZE;
6389
6390         if (copy_from_user(buf, ubuf, cnt))
6391                 return -EFAULT;
6392
6393         buf[cnt] = 0;
6394
6395         /* strip ending whitespace. */
6396         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6397                 buf[i] = 0;
6398
6399         err = tracing_set_tracer(tr, buf);
6400         if (err)
6401                 return err;
6402
6403         *ppos += ret;
6404
6405         return ret;
6406 }
6407
6408 static ssize_t
6409 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6410                    size_t cnt, loff_t *ppos)
6411 {
6412         char buf[64];
6413         int r;
6414
6415         r = snprintf(buf, sizeof(buf), "%ld\n",
6416                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6417         if (r > sizeof(buf))
6418                 r = sizeof(buf);
6419         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6420 }
6421
6422 static ssize_t
6423 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6424                     size_t cnt, loff_t *ppos)
6425 {
6426         unsigned long val;
6427         int ret;
6428
6429         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6430         if (ret)
6431                 return ret;
6432
6433         *ptr = val * 1000;
6434
6435         return cnt;
6436 }
6437
6438 static ssize_t
6439 tracing_thresh_read(struct file *filp, char __user *ubuf,
6440                     size_t cnt, loff_t *ppos)
6441 {
6442         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6443 }
6444
6445 static ssize_t
6446 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6447                      size_t cnt, loff_t *ppos)
6448 {
6449         struct trace_array *tr = filp->private_data;
6450         int ret;
6451
6452         mutex_lock(&trace_types_lock);
6453         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6454         if (ret < 0)
6455                 goto out;
6456
6457         if (tr->current_trace->update_thresh) {
6458                 ret = tr->current_trace->update_thresh(tr);
6459                 if (ret < 0)
6460                         goto out;
6461         }
6462
6463         ret = cnt;
6464 out:
6465         mutex_unlock(&trace_types_lock);
6466
6467         return ret;
6468 }
6469
6470 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6471
6472 static ssize_t
6473 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6474                      size_t cnt, loff_t *ppos)
6475 {
6476         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6477 }
6478
6479 static ssize_t
6480 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6481                       size_t cnt, loff_t *ppos)
6482 {
6483         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6484 }
6485
6486 #endif
6487
6488 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6489 {
6490         struct trace_array *tr = inode->i_private;
6491         struct trace_iterator *iter;
6492         int ret;
6493
6494         ret = tracing_check_open_get_tr(tr);
6495         if (ret)
6496                 return ret;
6497
6498         mutex_lock(&trace_types_lock);
6499
6500         /* create a buffer to store the information to pass to userspace */
6501         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6502         if (!iter) {
6503                 ret = -ENOMEM;
6504                 __trace_array_put(tr);
6505                 goto out;
6506         }
6507
6508         trace_seq_init(&iter->seq);
6509         iter->trace = tr->current_trace;
6510
6511         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6512                 ret = -ENOMEM;
6513                 goto fail;
6514         }
6515
6516         /* trace pipe does not show start of buffer */
6517         cpumask_setall(iter->started);
6518
6519         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6520                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6521
6522         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6523         if (trace_clocks[tr->clock_id].in_ns)
6524                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6525
6526         iter->tr = tr;
6527         iter->array_buffer = &tr->array_buffer;
6528         iter->cpu_file = tracing_get_cpu(inode);
6529         mutex_init(&iter->mutex);
6530         filp->private_data = iter;
6531
6532         if (iter->trace->pipe_open)
6533                 iter->trace->pipe_open(iter);
6534
6535         nonseekable_open(inode, filp);
6536
6537         tr->trace_ref++;
6538 out:
6539         mutex_unlock(&trace_types_lock);
6540         return ret;
6541
6542 fail:
6543         kfree(iter);
6544         __trace_array_put(tr);
6545         mutex_unlock(&trace_types_lock);
6546         return ret;
6547 }
6548
6549 static int tracing_release_pipe(struct inode *inode, struct file *file)
6550 {
6551         struct trace_iterator *iter = file->private_data;
6552         struct trace_array *tr = inode->i_private;
6553
6554         mutex_lock(&trace_types_lock);
6555
6556         tr->trace_ref--;
6557
6558         if (iter->trace->pipe_close)
6559                 iter->trace->pipe_close(iter);
6560
6561         mutex_unlock(&trace_types_lock);
6562
6563         free_cpumask_var(iter->started);
6564         mutex_destroy(&iter->mutex);
6565         kfree(iter);
6566
6567         trace_array_put(tr);
6568
6569         return 0;
6570 }
6571
6572 static __poll_t
6573 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6574 {
6575         struct trace_array *tr = iter->tr;
6576
6577         /* Iterators are static, they should be filled or empty */
6578         if (trace_buffer_iter(iter, iter->cpu_file))
6579                 return EPOLLIN | EPOLLRDNORM;
6580
6581         if (tr->trace_flags & TRACE_ITER_BLOCK)
6582                 /*
6583                  * Always select as readable when in blocking mode
6584                  */
6585                 return EPOLLIN | EPOLLRDNORM;
6586         else
6587                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6588                                              filp, poll_table);
6589 }
6590
6591 static __poll_t
6592 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6593 {
6594         struct trace_iterator *iter = filp->private_data;
6595
6596         return trace_poll(iter, filp, poll_table);
6597 }
6598
6599 /* Must be called with iter->mutex held. */
6600 static int tracing_wait_pipe(struct file *filp)
6601 {
6602         struct trace_iterator *iter = filp->private_data;
6603         int ret;
6604
6605         while (trace_empty(iter)) {
6606
6607                 if ((filp->f_flags & O_NONBLOCK)) {
6608                         return -EAGAIN;
6609                 }
6610
6611                 /*
6612                  * We block until we read something and tracing is disabled.
6613                  * We still block if tracing is disabled, but we have never
6614                  * read anything. This allows a user to cat this file, and
6615                  * then enable tracing. But after we have read something,
6616                  * we give an EOF when tracing is again disabled.
6617                  *
6618                  * iter->pos will be 0 if we haven't read anything.
6619                  */
6620                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6621                         break;
6622
6623                 mutex_unlock(&iter->mutex);
6624
6625                 ret = wait_on_pipe(iter, 0);
6626
6627                 mutex_lock(&iter->mutex);
6628
6629                 if (ret)
6630                         return ret;
6631         }
6632
6633         return 1;
6634 }
6635
6636 /*
6637  * Consumer reader.
6638  */
6639 static ssize_t
6640 tracing_read_pipe(struct file *filp, char __user *ubuf,
6641                   size_t cnt, loff_t *ppos)
6642 {
6643         struct trace_iterator *iter = filp->private_data;
6644         ssize_t sret;
6645
6646         /*
6647          * Avoid more than one consumer on a single file descriptor
6648          * This is just a matter of traces coherency, the ring buffer itself
6649          * is protected.
6650          */
6651         mutex_lock(&iter->mutex);
6652
6653         /* return any leftover data */
6654         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6655         if (sret != -EBUSY)
6656                 goto out;
6657
6658         trace_seq_init(&iter->seq);
6659
6660         if (iter->trace->read) {
6661                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6662                 if (sret)
6663                         goto out;
6664         }
6665
6666 waitagain:
6667         sret = tracing_wait_pipe(filp);
6668         if (sret <= 0)
6669                 goto out;
6670
6671         /* stop when tracing is finished */
6672         if (trace_empty(iter)) {
6673                 sret = 0;
6674                 goto out;
6675         }
6676
6677         if (cnt >= PAGE_SIZE)
6678                 cnt = PAGE_SIZE - 1;
6679
6680         /* reset all but tr, trace, and overruns */
6681         memset(&iter->seq, 0,
6682                sizeof(struct trace_iterator) -
6683                offsetof(struct trace_iterator, seq));
6684         cpumask_clear(iter->started);
6685         trace_seq_init(&iter->seq);
6686         iter->pos = -1;
6687
6688         trace_event_read_lock();
6689         trace_access_lock(iter->cpu_file);
6690         while (trace_find_next_entry_inc(iter) != NULL) {
6691                 enum print_line_t ret;
6692                 int save_len = iter->seq.seq.len;
6693
6694                 ret = print_trace_line(iter);
6695                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6696                         /* don't print partial lines */
6697                         iter->seq.seq.len = save_len;
6698                         break;
6699                 }
6700                 if (ret != TRACE_TYPE_NO_CONSUME)
6701                         trace_consume(iter);
6702
6703                 if (trace_seq_used(&iter->seq) >= cnt)
6704                         break;
6705
6706                 /*
6707                  * Setting the full flag means we reached the trace_seq buffer
6708                  * size and we should leave by partial output condition above.
6709                  * One of the trace_seq_* functions is not used properly.
6710                  */
6711                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6712                           iter->ent->type);
6713         }
6714         trace_access_unlock(iter->cpu_file);
6715         trace_event_read_unlock();
6716
6717         /* Now copy what we have to the user */
6718         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6719         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6720                 trace_seq_init(&iter->seq);
6721
6722         /*
6723          * If there was nothing to send to user, in spite of consuming trace
6724          * entries, go back to wait for more entries.
6725          */
6726         if (sret == -EBUSY)
6727                 goto waitagain;
6728
6729 out:
6730         mutex_unlock(&iter->mutex);
6731
6732         return sret;
6733 }
6734
6735 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6736                                      unsigned int idx)
6737 {
6738         __free_page(spd->pages[idx]);
6739 }
6740
6741 static size_t
6742 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6743 {
6744         size_t count;
6745         int save_len;
6746         int ret;
6747
6748         /* Seq buffer is page-sized, exactly what we need. */
6749         for (;;) {
6750                 save_len = iter->seq.seq.len;
6751                 ret = print_trace_line(iter);
6752
6753                 if (trace_seq_has_overflowed(&iter->seq)) {
6754                         iter->seq.seq.len = save_len;
6755                         break;
6756                 }
6757
6758                 /*
6759                  * This should not be hit, because it should only
6760                  * be set if the iter->seq overflowed. But check it
6761                  * anyway to be safe.
6762                  */
6763                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6764                         iter->seq.seq.len = save_len;
6765                         break;
6766                 }
6767
6768                 count = trace_seq_used(&iter->seq) - save_len;
6769                 if (rem < count) {
6770                         rem = 0;
6771                         iter->seq.seq.len = save_len;
6772                         break;
6773                 }
6774
6775                 if (ret != TRACE_TYPE_NO_CONSUME)
6776                         trace_consume(iter);
6777                 rem -= count;
6778                 if (!trace_find_next_entry_inc(iter))   {
6779                         rem = 0;
6780                         iter->ent = NULL;
6781                         break;
6782                 }
6783         }
6784
6785         return rem;
6786 }
6787
6788 static ssize_t tracing_splice_read_pipe(struct file *filp,
6789                                         loff_t *ppos,
6790                                         struct pipe_inode_info *pipe,
6791                                         size_t len,
6792                                         unsigned int flags)
6793 {
6794         struct page *pages_def[PIPE_DEF_BUFFERS];
6795         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6796         struct trace_iterator *iter = filp->private_data;
6797         struct splice_pipe_desc spd = {
6798                 .pages          = pages_def,
6799                 .partial        = partial_def,
6800                 .nr_pages       = 0, /* This gets updated below. */
6801                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6802                 .ops            = &default_pipe_buf_ops,
6803                 .spd_release    = tracing_spd_release_pipe,
6804         };
6805         ssize_t ret;
6806         size_t rem;
6807         unsigned int i;
6808
6809         if (splice_grow_spd(pipe, &spd))
6810                 return -ENOMEM;
6811
6812         mutex_lock(&iter->mutex);
6813
6814         if (iter->trace->splice_read) {
6815                 ret = iter->trace->splice_read(iter, filp,
6816                                                ppos, pipe, len, flags);
6817                 if (ret)
6818                         goto out_err;
6819         }
6820
6821         ret = tracing_wait_pipe(filp);
6822         if (ret <= 0)
6823                 goto out_err;
6824
6825         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6826                 ret = -EFAULT;
6827                 goto out_err;
6828         }
6829
6830         trace_event_read_lock();
6831         trace_access_lock(iter->cpu_file);
6832
6833         /* Fill as many pages as possible. */
6834         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6835                 spd.pages[i] = alloc_page(GFP_KERNEL);
6836                 if (!spd.pages[i])
6837                         break;
6838
6839                 rem = tracing_fill_pipe_page(rem, iter);
6840
6841                 /* Copy the data into the page, so we can start over. */
6842                 ret = trace_seq_to_buffer(&iter->seq,
6843                                           page_address(spd.pages[i]),
6844                                           trace_seq_used(&iter->seq));
6845                 if (ret < 0) {
6846                         __free_page(spd.pages[i]);
6847                         break;
6848                 }
6849                 spd.partial[i].offset = 0;
6850                 spd.partial[i].len = trace_seq_used(&iter->seq);
6851
6852                 trace_seq_init(&iter->seq);
6853         }
6854
6855         trace_access_unlock(iter->cpu_file);
6856         trace_event_read_unlock();
6857         mutex_unlock(&iter->mutex);
6858
6859         spd.nr_pages = i;
6860
6861         if (i)
6862                 ret = splice_to_pipe(pipe, &spd);
6863         else
6864                 ret = 0;
6865 out:
6866         splice_shrink_spd(&spd);
6867         return ret;
6868
6869 out_err:
6870         mutex_unlock(&iter->mutex);
6871         goto out;
6872 }
6873
6874 static ssize_t
6875 tracing_entries_read(struct file *filp, char __user *ubuf,
6876                      size_t cnt, loff_t *ppos)
6877 {
6878         struct inode *inode = file_inode(filp);
6879         struct trace_array *tr = inode->i_private;
6880         int cpu = tracing_get_cpu(inode);
6881         char buf[64];
6882         int r = 0;
6883         ssize_t ret;
6884
6885         mutex_lock(&trace_types_lock);
6886
6887         if (cpu == RING_BUFFER_ALL_CPUS) {
6888                 int cpu, buf_size_same;
6889                 unsigned long size;
6890
6891                 size = 0;
6892                 buf_size_same = 1;
6893                 /* check if all cpu sizes are same */
6894                 for_each_tracing_cpu(cpu) {
6895                         /* fill in the size from first enabled cpu */
6896                         if (size == 0)
6897                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6898                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6899                                 buf_size_same = 0;
6900                                 break;
6901                         }
6902                 }
6903
6904                 if (buf_size_same) {
6905                         if (!ring_buffer_expanded)
6906                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6907                                             size >> 10,
6908                                             trace_buf_size >> 10);
6909                         else
6910                                 r = sprintf(buf, "%lu\n", size >> 10);
6911                 } else
6912                         r = sprintf(buf, "X\n");
6913         } else
6914                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6915
6916         mutex_unlock(&trace_types_lock);
6917
6918         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6919         return ret;
6920 }
6921
6922 static ssize_t
6923 tracing_entries_write(struct file *filp, const char __user *ubuf,
6924                       size_t cnt, loff_t *ppos)
6925 {
6926         struct inode *inode = file_inode(filp);
6927         struct trace_array *tr = inode->i_private;
6928         unsigned long val;
6929         int ret;
6930
6931         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6932         if (ret)
6933                 return ret;
6934
6935         /* must have at least 1 entry */
6936         if (!val)
6937                 return -EINVAL;
6938
6939         /* value is in KB */
6940         val <<= 10;
6941         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6942         if (ret < 0)
6943                 return ret;
6944
6945         *ppos += cnt;
6946
6947         return cnt;
6948 }
6949
6950 static ssize_t
6951 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6952                                 size_t cnt, loff_t *ppos)
6953 {
6954         struct trace_array *tr = filp->private_data;
6955         char buf[64];
6956         int r, cpu;
6957         unsigned long size = 0, expanded_size = 0;
6958
6959         mutex_lock(&trace_types_lock);
6960         for_each_tracing_cpu(cpu) {
6961                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6962                 if (!ring_buffer_expanded)
6963                         expanded_size += trace_buf_size >> 10;
6964         }
6965         if (ring_buffer_expanded)
6966                 r = sprintf(buf, "%lu\n", size);
6967         else
6968                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6969         mutex_unlock(&trace_types_lock);
6970
6971         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6972 }
6973
6974 static ssize_t
6975 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6976                           size_t cnt, loff_t *ppos)
6977 {
6978         /*
6979          * There is no need to read what the user has written, this function
6980          * is just to make sure that there is no error when "echo" is used
6981          */
6982
6983         *ppos += cnt;
6984
6985         return cnt;
6986 }
6987
6988 static int
6989 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6990 {
6991         struct trace_array *tr = inode->i_private;
6992
6993         /* disable tracing ? */
6994         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6995                 tracer_tracing_off(tr);
6996         /* resize the ring buffer to 0 */
6997         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6998
6999         trace_array_put(tr);
7000
7001         return 0;
7002 }
7003
7004 static ssize_t
7005 tracing_mark_write(struct file *filp, const char __user *ubuf,
7006                                         size_t cnt, loff_t *fpos)
7007 {
7008         struct trace_array *tr = filp->private_data;
7009         struct ring_buffer_event *event;
7010         enum event_trigger_type tt = ETT_NONE;
7011         struct trace_buffer *buffer;
7012         struct print_entry *entry;
7013         ssize_t written;
7014         int size;
7015         int len;
7016
7017 /* Used in tracing_mark_raw_write() as well */
7018 #define FAULTED_STR "<faulted>"
7019 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7020
7021         if (tracing_disabled)
7022                 return -EINVAL;
7023
7024         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7025                 return -EINVAL;
7026
7027         if (cnt > TRACE_BUF_SIZE)
7028                 cnt = TRACE_BUF_SIZE;
7029
7030         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7031
7032         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7033
7034         /* If less than "<faulted>", then make sure we can still add that */
7035         if (cnt < FAULTED_SIZE)
7036                 size += FAULTED_SIZE - cnt;
7037
7038         buffer = tr->array_buffer.buffer;
7039         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7040                                             tracing_gen_ctx());
7041         if (unlikely(!event))
7042                 /* Ring buffer disabled, return as if not open for write */
7043                 return -EBADF;
7044
7045         entry = ring_buffer_event_data(event);
7046         entry->ip = _THIS_IP_;
7047
7048         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7049         if (len) {
7050                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7051                 cnt = FAULTED_SIZE;
7052                 written = -EFAULT;
7053         } else
7054                 written = cnt;
7055
7056         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7057                 /* do not add \n before testing triggers, but add \0 */
7058                 entry->buf[cnt] = '\0';
7059                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7060         }
7061
7062         if (entry->buf[cnt - 1] != '\n') {
7063                 entry->buf[cnt] = '\n';
7064                 entry->buf[cnt + 1] = '\0';
7065         } else
7066                 entry->buf[cnt] = '\0';
7067
7068         if (static_branch_unlikely(&trace_marker_exports_enabled))
7069                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7070         __buffer_unlock_commit(buffer, event);
7071
7072         if (tt)
7073                 event_triggers_post_call(tr->trace_marker_file, tt);
7074
7075         if (written > 0)
7076                 *fpos += written;
7077
7078         return written;
7079 }
7080
7081 /* Limit it for now to 3K (including tag) */
7082 #define RAW_DATA_MAX_SIZE (1024*3)
7083
7084 static ssize_t
7085 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7086                                         size_t cnt, loff_t *fpos)
7087 {
7088         struct trace_array *tr = filp->private_data;
7089         struct ring_buffer_event *event;
7090         struct trace_buffer *buffer;
7091         struct raw_data_entry *entry;
7092         ssize_t written;
7093         int size;
7094         int len;
7095
7096 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7097
7098         if (tracing_disabled)
7099                 return -EINVAL;
7100
7101         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7102                 return -EINVAL;
7103
7104         /* The marker must at least have a tag id */
7105         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7106                 return -EINVAL;
7107
7108         if (cnt > TRACE_BUF_SIZE)
7109                 cnt = TRACE_BUF_SIZE;
7110
7111         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7112
7113         size = sizeof(*entry) + cnt;
7114         if (cnt < FAULT_SIZE_ID)
7115                 size += FAULT_SIZE_ID - cnt;
7116
7117         buffer = tr->array_buffer.buffer;
7118         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7119                                             tracing_gen_ctx());
7120         if (!event)
7121                 /* Ring buffer disabled, return as if not open for write */
7122                 return -EBADF;
7123
7124         entry = ring_buffer_event_data(event);
7125
7126         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7127         if (len) {
7128                 entry->id = -1;
7129                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7130                 written = -EFAULT;
7131         } else
7132                 written = cnt;
7133
7134         __buffer_unlock_commit(buffer, event);
7135
7136         if (written > 0)
7137                 *fpos += written;
7138
7139         return written;
7140 }
7141
7142 static int tracing_clock_show(struct seq_file *m, void *v)
7143 {
7144         struct trace_array *tr = m->private;
7145         int i;
7146
7147         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7148                 seq_printf(m,
7149                         "%s%s%s%s", i ? " " : "",
7150                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7151                         i == tr->clock_id ? "]" : "");
7152         seq_putc(m, '\n');
7153
7154         return 0;
7155 }
7156
7157 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7158 {
7159         int i;
7160
7161         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7162                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7163                         break;
7164         }
7165         if (i == ARRAY_SIZE(trace_clocks))
7166                 return -EINVAL;
7167
7168         mutex_lock(&trace_types_lock);
7169
7170         tr->clock_id = i;
7171
7172         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7173
7174         /*
7175          * New clock may not be consistent with the previous clock.
7176          * Reset the buffer so that it doesn't have incomparable timestamps.
7177          */
7178         tracing_reset_online_cpus(&tr->array_buffer);
7179
7180 #ifdef CONFIG_TRACER_MAX_TRACE
7181         if (tr->max_buffer.buffer)
7182                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7183         tracing_reset_online_cpus(&tr->max_buffer);
7184 #endif
7185
7186         mutex_unlock(&trace_types_lock);
7187
7188         return 0;
7189 }
7190
7191 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7192                                    size_t cnt, loff_t *fpos)
7193 {
7194         struct seq_file *m = filp->private_data;
7195         struct trace_array *tr = m->private;
7196         char buf[64];
7197         const char *clockstr;
7198         int ret;
7199
7200         if (cnt >= sizeof(buf))
7201                 return -EINVAL;
7202
7203         if (copy_from_user(buf, ubuf, cnt))
7204                 return -EFAULT;
7205
7206         buf[cnt] = 0;
7207
7208         clockstr = strstrip(buf);
7209
7210         ret = tracing_set_clock(tr, clockstr);
7211         if (ret)
7212                 return ret;
7213
7214         *fpos += cnt;
7215
7216         return cnt;
7217 }
7218
7219 static int tracing_clock_open(struct inode *inode, struct file *file)
7220 {
7221         struct trace_array *tr = inode->i_private;
7222         int ret;
7223
7224         ret = tracing_check_open_get_tr(tr);
7225         if (ret)
7226                 return ret;
7227
7228         ret = single_open(file, tracing_clock_show, inode->i_private);
7229         if (ret < 0)
7230                 trace_array_put(tr);
7231
7232         return ret;
7233 }
7234
7235 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7236 {
7237         struct trace_array *tr = m->private;
7238
7239         mutex_lock(&trace_types_lock);
7240
7241         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7242                 seq_puts(m, "delta [absolute]\n");
7243         else
7244                 seq_puts(m, "[delta] absolute\n");
7245
7246         mutex_unlock(&trace_types_lock);
7247
7248         return 0;
7249 }
7250
7251 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7252 {
7253         struct trace_array *tr = inode->i_private;
7254         int ret;
7255
7256         ret = tracing_check_open_get_tr(tr);
7257         if (ret)
7258                 return ret;
7259
7260         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7261         if (ret < 0)
7262                 trace_array_put(tr);
7263
7264         return ret;
7265 }
7266
7267 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7268 {
7269         if (rbe == this_cpu_read(trace_buffered_event))
7270                 return ring_buffer_time_stamp(buffer);
7271
7272         return ring_buffer_event_time_stamp(buffer, rbe);
7273 }
7274
7275 /*
7276  * Set or disable using the per CPU trace_buffer_event when possible.
7277  */
7278 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7279 {
7280         int ret = 0;
7281
7282         mutex_lock(&trace_types_lock);
7283
7284         if (set && tr->no_filter_buffering_ref++)
7285                 goto out;
7286
7287         if (!set) {
7288                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7289                         ret = -EINVAL;
7290                         goto out;
7291                 }
7292
7293                 --tr->no_filter_buffering_ref;
7294         }
7295  out:
7296         mutex_unlock(&trace_types_lock);
7297
7298         return ret;
7299 }
7300
7301 struct ftrace_buffer_info {
7302         struct trace_iterator   iter;
7303         void                    *spare;
7304         unsigned int            spare_cpu;
7305         unsigned int            read;
7306 };
7307
7308 #ifdef CONFIG_TRACER_SNAPSHOT
7309 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7310 {
7311         struct trace_array *tr = inode->i_private;
7312         struct trace_iterator *iter;
7313         struct seq_file *m;
7314         int ret;
7315
7316         ret = tracing_check_open_get_tr(tr);
7317         if (ret)
7318                 return ret;
7319
7320         if (file->f_mode & FMODE_READ) {
7321                 iter = __tracing_open(inode, file, true);
7322                 if (IS_ERR(iter))
7323                         ret = PTR_ERR(iter);
7324         } else {
7325                 /* Writes still need the seq_file to hold the private data */
7326                 ret = -ENOMEM;
7327                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7328                 if (!m)
7329                         goto out;
7330                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7331                 if (!iter) {
7332                         kfree(m);
7333                         goto out;
7334                 }
7335                 ret = 0;
7336
7337                 iter->tr = tr;
7338                 iter->array_buffer = &tr->max_buffer;
7339                 iter->cpu_file = tracing_get_cpu(inode);
7340                 m->private = iter;
7341                 file->private_data = m;
7342         }
7343 out:
7344         if (ret < 0)
7345                 trace_array_put(tr);
7346
7347         return ret;
7348 }
7349
7350 static ssize_t
7351 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7352                        loff_t *ppos)
7353 {
7354         struct seq_file *m = filp->private_data;
7355         struct trace_iterator *iter = m->private;
7356         struct trace_array *tr = iter->tr;
7357         unsigned long val;
7358         int ret;
7359
7360         ret = tracing_update_buffers();
7361         if (ret < 0)
7362                 return ret;
7363
7364         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7365         if (ret)
7366                 return ret;
7367
7368         mutex_lock(&trace_types_lock);
7369
7370         if (tr->current_trace->use_max_tr) {
7371                 ret = -EBUSY;
7372                 goto out;
7373         }
7374
7375         arch_spin_lock(&tr->max_lock);
7376         if (tr->cond_snapshot)
7377                 ret = -EBUSY;
7378         arch_spin_unlock(&tr->max_lock);
7379         if (ret)
7380                 goto out;
7381
7382         switch (val) {
7383         case 0:
7384                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7385                         ret = -EINVAL;
7386                         break;
7387                 }
7388                 if (tr->allocated_snapshot)
7389                         free_snapshot(tr);
7390                 break;
7391         case 1:
7392 /* Only allow per-cpu swap if the ring buffer supports it */
7393 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7394                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7395                         ret = -EINVAL;
7396                         break;
7397                 }
7398 #endif
7399                 if (tr->allocated_snapshot)
7400                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7401                                         &tr->array_buffer, iter->cpu_file);
7402                 else
7403                         ret = tracing_alloc_snapshot_instance(tr);
7404                 if (ret < 0)
7405                         break;
7406                 local_irq_disable();
7407                 /* Now, we're going to swap */
7408                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7409                         update_max_tr(tr, current, smp_processor_id(), NULL);
7410                 else
7411                         update_max_tr_single(tr, current, iter->cpu_file);
7412                 local_irq_enable();
7413                 break;
7414         default:
7415                 if (tr->allocated_snapshot) {
7416                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7417                                 tracing_reset_online_cpus(&tr->max_buffer);
7418                         else
7419                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7420                 }
7421                 break;
7422         }
7423
7424         if (ret >= 0) {
7425                 *ppos += cnt;
7426                 ret = cnt;
7427         }
7428 out:
7429         mutex_unlock(&trace_types_lock);
7430         return ret;
7431 }
7432
7433 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7434 {
7435         struct seq_file *m = file->private_data;
7436         int ret;
7437
7438         ret = tracing_release(inode, file);
7439
7440         if (file->f_mode & FMODE_READ)
7441                 return ret;
7442
7443         /* If write only, the seq_file is just a stub */
7444         if (m)
7445                 kfree(m->private);
7446         kfree(m);
7447
7448         return 0;
7449 }
7450
7451 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7452 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7453                                     size_t count, loff_t *ppos);
7454 static int tracing_buffers_release(struct inode *inode, struct file *file);
7455 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7456                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7457
7458 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7459 {
7460         struct ftrace_buffer_info *info;
7461         int ret;
7462
7463         /* The following checks for tracefs lockdown */
7464         ret = tracing_buffers_open(inode, filp);
7465         if (ret < 0)
7466                 return ret;
7467
7468         info = filp->private_data;
7469
7470         if (info->iter.trace->use_max_tr) {
7471                 tracing_buffers_release(inode, filp);
7472                 return -EBUSY;
7473         }
7474
7475         info->iter.snapshot = true;
7476         info->iter.array_buffer = &info->iter.tr->max_buffer;
7477
7478         return ret;
7479 }
7480
7481 #endif /* CONFIG_TRACER_SNAPSHOT */
7482
7483
7484 static const struct file_operations tracing_thresh_fops = {
7485         .open           = tracing_open_generic,
7486         .read           = tracing_thresh_read,
7487         .write          = tracing_thresh_write,
7488         .llseek         = generic_file_llseek,
7489 };
7490
7491 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7492 static const struct file_operations tracing_max_lat_fops = {
7493         .open           = tracing_open_generic,
7494         .read           = tracing_max_lat_read,
7495         .write          = tracing_max_lat_write,
7496         .llseek         = generic_file_llseek,
7497 };
7498 #endif
7499
7500 static const struct file_operations set_tracer_fops = {
7501         .open           = tracing_open_generic,
7502         .read           = tracing_set_trace_read,
7503         .write          = tracing_set_trace_write,
7504         .llseek         = generic_file_llseek,
7505 };
7506
7507 static const struct file_operations tracing_pipe_fops = {
7508         .open           = tracing_open_pipe,
7509         .poll           = tracing_poll_pipe,
7510         .read           = tracing_read_pipe,
7511         .splice_read    = tracing_splice_read_pipe,
7512         .release        = tracing_release_pipe,
7513         .llseek         = no_llseek,
7514 };
7515
7516 static const struct file_operations tracing_entries_fops = {
7517         .open           = tracing_open_generic_tr,
7518         .read           = tracing_entries_read,
7519         .write          = tracing_entries_write,
7520         .llseek         = generic_file_llseek,
7521         .release        = tracing_release_generic_tr,
7522 };
7523
7524 static const struct file_operations tracing_total_entries_fops = {
7525         .open           = tracing_open_generic_tr,
7526         .read           = tracing_total_entries_read,
7527         .llseek         = generic_file_llseek,
7528         .release        = tracing_release_generic_tr,
7529 };
7530
7531 static const struct file_operations tracing_free_buffer_fops = {
7532         .open           = tracing_open_generic_tr,
7533         .write          = tracing_free_buffer_write,
7534         .release        = tracing_free_buffer_release,
7535 };
7536
7537 static const struct file_operations tracing_mark_fops = {
7538         .open           = tracing_open_generic_tr,
7539         .write          = tracing_mark_write,
7540         .llseek         = generic_file_llseek,
7541         .release        = tracing_release_generic_tr,
7542 };
7543
7544 static const struct file_operations tracing_mark_raw_fops = {
7545         .open           = tracing_open_generic_tr,
7546         .write          = tracing_mark_raw_write,
7547         .llseek         = generic_file_llseek,
7548         .release        = tracing_release_generic_tr,
7549 };
7550
7551 static const struct file_operations trace_clock_fops = {
7552         .open           = tracing_clock_open,
7553         .read           = seq_read,
7554         .llseek         = seq_lseek,
7555         .release        = tracing_single_release_tr,
7556         .write          = tracing_clock_write,
7557 };
7558
7559 static const struct file_operations trace_time_stamp_mode_fops = {
7560         .open           = tracing_time_stamp_mode_open,
7561         .read           = seq_read,
7562         .llseek         = seq_lseek,
7563         .release        = tracing_single_release_tr,
7564 };
7565
7566 #ifdef CONFIG_TRACER_SNAPSHOT
7567 static const struct file_operations snapshot_fops = {
7568         .open           = tracing_snapshot_open,
7569         .read           = seq_read,
7570         .write          = tracing_snapshot_write,
7571         .llseek         = tracing_lseek,
7572         .release        = tracing_snapshot_release,
7573 };
7574
7575 static const struct file_operations snapshot_raw_fops = {
7576         .open           = snapshot_raw_open,
7577         .read           = tracing_buffers_read,
7578         .release        = tracing_buffers_release,
7579         .splice_read    = tracing_buffers_splice_read,
7580         .llseek         = no_llseek,
7581 };
7582
7583 #endif /* CONFIG_TRACER_SNAPSHOT */
7584
7585 /*
7586  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7587  * @filp: The active open file structure
7588  * @ubuf: The userspace provided buffer to read value into
7589  * @cnt: The maximum number of bytes to read
7590  * @ppos: The current "file" position
7591  *
7592  * This function implements the write interface for a struct trace_min_max_param.
7593  * The filp->private_data must point to a trace_min_max_param structure that
7594  * defines where to write the value, the min and the max acceptable values,
7595  * and a lock to protect the write.
7596  */
7597 static ssize_t
7598 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7599 {
7600         struct trace_min_max_param *param = filp->private_data;
7601         u64 val;
7602         int err;
7603
7604         if (!param)
7605                 return -EFAULT;
7606
7607         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7608         if (err)
7609                 return err;
7610
7611         if (param->lock)
7612                 mutex_lock(param->lock);
7613
7614         if (param->min && val < *param->min)
7615                 err = -EINVAL;
7616
7617         if (param->max && val > *param->max)
7618                 err = -EINVAL;
7619
7620         if (!err)
7621                 *param->val = val;
7622
7623         if (param->lock)
7624                 mutex_unlock(param->lock);
7625
7626         if (err)
7627                 return err;
7628
7629         return cnt;
7630 }
7631
7632 /*
7633  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7634  * @filp: The active open file structure
7635  * @ubuf: The userspace provided buffer to read value into
7636  * @cnt: The maximum number of bytes to read
7637  * @ppos: The current "file" position
7638  *
7639  * This function implements the read interface for a struct trace_min_max_param.
7640  * The filp->private_data must point to a trace_min_max_param struct with valid
7641  * data.
7642  */
7643 static ssize_t
7644 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7645 {
7646         struct trace_min_max_param *param = filp->private_data;
7647         char buf[U64_STR_SIZE];
7648         int len;
7649         u64 val;
7650
7651         if (!param)
7652                 return -EFAULT;
7653
7654         val = *param->val;
7655
7656         if (cnt > sizeof(buf))
7657                 cnt = sizeof(buf);
7658
7659         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7660
7661         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7662 }
7663
7664 const struct file_operations trace_min_max_fops = {
7665         .open           = tracing_open_generic,
7666         .read           = trace_min_max_read,
7667         .write          = trace_min_max_write,
7668 };
7669
7670 #define TRACING_LOG_ERRS_MAX    8
7671 #define TRACING_LOG_LOC_MAX     128
7672
7673 #define CMD_PREFIX "  Command: "
7674
7675 struct err_info {
7676         const char      **errs; /* ptr to loc-specific array of err strings */
7677         u8              type;   /* index into errs -> specific err string */
7678         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7679         u64             ts;
7680 };
7681
7682 struct tracing_log_err {
7683         struct list_head        list;
7684         struct err_info         info;
7685         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7686         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7687 };
7688
7689 static DEFINE_MUTEX(tracing_err_log_lock);
7690
7691 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7692 {
7693         struct tracing_log_err *err;
7694
7695         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7696                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7697                 if (!err)
7698                         err = ERR_PTR(-ENOMEM);
7699                 tr->n_err_log_entries++;
7700
7701                 return err;
7702         }
7703
7704         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7705         list_del(&err->list);
7706
7707         return err;
7708 }
7709
7710 /**
7711  * err_pos - find the position of a string within a command for error careting
7712  * @cmd: The tracing command that caused the error
7713  * @str: The string to position the caret at within @cmd
7714  *
7715  * Finds the position of the first occurrence of @str within @cmd.  The
7716  * return value can be passed to tracing_log_err() for caret placement
7717  * within @cmd.
7718  *
7719  * Returns the index within @cmd of the first occurrence of @str or 0
7720  * if @str was not found.
7721  */
7722 unsigned int err_pos(char *cmd, const char *str)
7723 {
7724         char *found;
7725
7726         if (WARN_ON(!strlen(cmd)))
7727                 return 0;
7728
7729         found = strstr(cmd, str);
7730         if (found)
7731                 return found - cmd;
7732
7733         return 0;
7734 }
7735
7736 /**
7737  * tracing_log_err - write an error to the tracing error log
7738  * @tr: The associated trace array for the error (NULL for top level array)
7739  * @loc: A string describing where the error occurred
7740  * @cmd: The tracing command that caused the error
7741  * @errs: The array of loc-specific static error strings
7742  * @type: The index into errs[], which produces the specific static err string
7743  * @pos: The position the caret should be placed in the cmd
7744  *
7745  * Writes an error into tracing/error_log of the form:
7746  *
7747  * <loc>: error: <text>
7748  *   Command: <cmd>
7749  *              ^
7750  *
7751  * tracing/error_log is a small log file containing the last
7752  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7753  * unless there has been a tracing error, and the error log can be
7754  * cleared and have its memory freed by writing the empty string in
7755  * truncation mode to it i.e. echo > tracing/error_log.
7756  *
7757  * NOTE: the @errs array along with the @type param are used to
7758  * produce a static error string - this string is not copied and saved
7759  * when the error is logged - only a pointer to it is saved.  See
7760  * existing callers for examples of how static strings are typically
7761  * defined for use with tracing_log_err().
7762  */
7763 void tracing_log_err(struct trace_array *tr,
7764                      const char *loc, const char *cmd,
7765                      const char **errs, u8 type, u8 pos)
7766 {
7767         struct tracing_log_err *err;
7768
7769         if (!tr)
7770                 tr = &global_trace;
7771
7772         mutex_lock(&tracing_err_log_lock);
7773         err = get_tracing_log_err(tr);
7774         if (PTR_ERR(err) == -ENOMEM) {
7775                 mutex_unlock(&tracing_err_log_lock);
7776                 return;
7777         }
7778
7779         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7780         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7781
7782         err->info.errs = errs;
7783         err->info.type = type;
7784         err->info.pos = pos;
7785         err->info.ts = local_clock();
7786
7787         list_add_tail(&err->list, &tr->err_log);
7788         mutex_unlock(&tracing_err_log_lock);
7789 }
7790
7791 static void clear_tracing_err_log(struct trace_array *tr)
7792 {
7793         struct tracing_log_err *err, *next;
7794
7795         mutex_lock(&tracing_err_log_lock);
7796         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7797                 list_del(&err->list);
7798                 kfree(err);
7799         }
7800
7801         tr->n_err_log_entries = 0;
7802         mutex_unlock(&tracing_err_log_lock);
7803 }
7804
7805 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7806 {
7807         struct trace_array *tr = m->private;
7808
7809         mutex_lock(&tracing_err_log_lock);
7810
7811         return seq_list_start(&tr->err_log, *pos);
7812 }
7813
7814 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7815 {
7816         struct trace_array *tr = m->private;
7817
7818         return seq_list_next(v, &tr->err_log, pos);
7819 }
7820
7821 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7822 {
7823         mutex_unlock(&tracing_err_log_lock);
7824 }
7825
7826 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7827 {
7828         u8 i;
7829
7830         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7831                 seq_putc(m, ' ');
7832         for (i = 0; i < pos; i++)
7833                 seq_putc(m, ' ');
7834         seq_puts(m, "^\n");
7835 }
7836
7837 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7838 {
7839         struct tracing_log_err *err = v;
7840
7841         if (err) {
7842                 const char *err_text = err->info.errs[err->info.type];
7843                 u64 sec = err->info.ts;
7844                 u32 nsec;
7845
7846                 nsec = do_div(sec, NSEC_PER_SEC);
7847                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7848                            err->loc, err_text);
7849                 seq_printf(m, "%s", err->cmd);
7850                 tracing_err_log_show_pos(m, err->info.pos);
7851         }
7852
7853         return 0;
7854 }
7855
7856 static const struct seq_operations tracing_err_log_seq_ops = {
7857         .start  = tracing_err_log_seq_start,
7858         .next   = tracing_err_log_seq_next,
7859         .stop   = tracing_err_log_seq_stop,
7860         .show   = tracing_err_log_seq_show
7861 };
7862
7863 static int tracing_err_log_open(struct inode *inode, struct file *file)
7864 {
7865         struct trace_array *tr = inode->i_private;
7866         int ret = 0;
7867
7868         ret = tracing_check_open_get_tr(tr);
7869         if (ret)
7870                 return ret;
7871
7872         /* If this file was opened for write, then erase contents */
7873         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7874                 clear_tracing_err_log(tr);
7875
7876         if (file->f_mode & FMODE_READ) {
7877                 ret = seq_open(file, &tracing_err_log_seq_ops);
7878                 if (!ret) {
7879                         struct seq_file *m = file->private_data;
7880                         m->private = tr;
7881                 } else {
7882                         trace_array_put(tr);
7883                 }
7884         }
7885         return ret;
7886 }
7887
7888 static ssize_t tracing_err_log_write(struct file *file,
7889                                      const char __user *buffer,
7890                                      size_t count, loff_t *ppos)
7891 {
7892         return count;
7893 }
7894
7895 static int tracing_err_log_release(struct inode *inode, struct file *file)
7896 {
7897         struct trace_array *tr = inode->i_private;
7898
7899         trace_array_put(tr);
7900
7901         if (file->f_mode & FMODE_READ)
7902                 seq_release(inode, file);
7903
7904         return 0;
7905 }
7906
7907 static const struct file_operations tracing_err_log_fops = {
7908         .open           = tracing_err_log_open,
7909         .write          = tracing_err_log_write,
7910         .read           = seq_read,
7911         .llseek         = seq_lseek,
7912         .release        = tracing_err_log_release,
7913 };
7914
7915 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7916 {
7917         struct trace_array *tr = inode->i_private;
7918         struct ftrace_buffer_info *info;
7919         int ret;
7920
7921         ret = tracing_check_open_get_tr(tr);
7922         if (ret)
7923                 return ret;
7924
7925         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7926         if (!info) {
7927                 trace_array_put(tr);
7928                 return -ENOMEM;
7929         }
7930
7931         mutex_lock(&trace_types_lock);
7932
7933         info->iter.tr           = tr;
7934         info->iter.cpu_file     = tracing_get_cpu(inode);
7935         info->iter.trace        = tr->current_trace;
7936         info->iter.array_buffer = &tr->array_buffer;
7937         info->spare             = NULL;
7938         /* Force reading ring buffer for first read */
7939         info->read              = (unsigned int)-1;
7940
7941         filp->private_data = info;
7942
7943         tr->trace_ref++;
7944
7945         mutex_unlock(&trace_types_lock);
7946
7947         ret = nonseekable_open(inode, filp);
7948         if (ret < 0)
7949                 trace_array_put(tr);
7950
7951         return ret;
7952 }
7953
7954 static __poll_t
7955 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7956 {
7957         struct ftrace_buffer_info *info = filp->private_data;
7958         struct trace_iterator *iter = &info->iter;
7959
7960         return trace_poll(iter, filp, poll_table);
7961 }
7962
7963 static ssize_t
7964 tracing_buffers_read(struct file *filp, char __user *ubuf,
7965                      size_t count, loff_t *ppos)
7966 {
7967         struct ftrace_buffer_info *info = filp->private_data;
7968         struct trace_iterator *iter = &info->iter;
7969         ssize_t ret = 0;
7970         ssize_t size;
7971
7972         if (!count)
7973                 return 0;
7974
7975 #ifdef CONFIG_TRACER_MAX_TRACE
7976         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7977                 return -EBUSY;
7978 #endif
7979
7980         if (!info->spare) {
7981                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7982                                                           iter->cpu_file);
7983                 if (IS_ERR(info->spare)) {
7984                         ret = PTR_ERR(info->spare);
7985                         info->spare = NULL;
7986                 } else {
7987                         info->spare_cpu = iter->cpu_file;
7988                 }
7989         }
7990         if (!info->spare)
7991                 return ret;
7992
7993         /* Do we have previous read data to read? */
7994         if (info->read < PAGE_SIZE)
7995                 goto read;
7996
7997  again:
7998         trace_access_lock(iter->cpu_file);
7999         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8000                                     &info->spare,
8001                                     count,
8002                                     iter->cpu_file, 0);
8003         trace_access_unlock(iter->cpu_file);
8004
8005         if (ret < 0) {
8006                 if (trace_empty(iter)) {
8007                         if ((filp->f_flags & O_NONBLOCK))
8008                                 return -EAGAIN;
8009
8010                         ret = wait_on_pipe(iter, 0);
8011                         if (ret)
8012                                 return ret;
8013
8014                         goto again;
8015                 }
8016                 return 0;
8017         }
8018
8019         info->read = 0;
8020  read:
8021         size = PAGE_SIZE - info->read;
8022         if (size > count)
8023                 size = count;
8024
8025         ret = copy_to_user(ubuf, info->spare + info->read, size);
8026         if (ret == size)
8027                 return -EFAULT;
8028
8029         size -= ret;
8030
8031         *ppos += size;
8032         info->read += size;
8033
8034         return size;
8035 }
8036
8037 static int tracing_buffers_release(struct inode *inode, struct file *file)
8038 {
8039         struct ftrace_buffer_info *info = file->private_data;
8040         struct trace_iterator *iter = &info->iter;
8041
8042         mutex_lock(&trace_types_lock);
8043
8044         iter->tr->trace_ref--;
8045
8046         __trace_array_put(iter->tr);
8047
8048         if (info->spare)
8049                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8050                                            info->spare_cpu, info->spare);
8051         kvfree(info);
8052
8053         mutex_unlock(&trace_types_lock);
8054
8055         return 0;
8056 }
8057
8058 struct buffer_ref {
8059         struct trace_buffer     *buffer;
8060         void                    *page;
8061         int                     cpu;
8062         refcount_t              refcount;
8063 };
8064
8065 static void buffer_ref_release(struct buffer_ref *ref)
8066 {
8067         if (!refcount_dec_and_test(&ref->refcount))
8068                 return;
8069         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8070         kfree(ref);
8071 }
8072
8073 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8074                                     struct pipe_buffer *buf)
8075 {
8076         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8077
8078         buffer_ref_release(ref);
8079         buf->private = 0;
8080 }
8081
8082 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8083                                 struct pipe_buffer *buf)
8084 {
8085         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8086
8087         if (refcount_read(&ref->refcount) > INT_MAX/2)
8088                 return false;
8089
8090         refcount_inc(&ref->refcount);
8091         return true;
8092 }
8093
8094 /* Pipe buffer operations for a buffer. */
8095 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8096         .release                = buffer_pipe_buf_release,
8097         .get                    = buffer_pipe_buf_get,
8098 };
8099
8100 /*
8101  * Callback from splice_to_pipe(), if we need to release some pages
8102  * at the end of the spd in case we error'ed out in filling the pipe.
8103  */
8104 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8105 {
8106         struct buffer_ref *ref =
8107                 (struct buffer_ref *)spd->partial[i].private;
8108
8109         buffer_ref_release(ref);
8110         spd->partial[i].private = 0;
8111 }
8112
8113 static ssize_t
8114 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8115                             struct pipe_inode_info *pipe, size_t len,
8116                             unsigned int flags)
8117 {
8118         struct ftrace_buffer_info *info = file->private_data;
8119         struct trace_iterator *iter = &info->iter;
8120         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8121         struct page *pages_def[PIPE_DEF_BUFFERS];
8122         struct splice_pipe_desc spd = {
8123                 .pages          = pages_def,
8124                 .partial        = partial_def,
8125                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8126                 .ops            = &buffer_pipe_buf_ops,
8127                 .spd_release    = buffer_spd_release,
8128         };
8129         struct buffer_ref *ref;
8130         int entries, i;
8131         ssize_t ret = 0;
8132
8133 #ifdef CONFIG_TRACER_MAX_TRACE
8134         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8135                 return -EBUSY;
8136 #endif
8137
8138         if (*ppos & (PAGE_SIZE - 1))
8139                 return -EINVAL;
8140
8141         if (len & (PAGE_SIZE - 1)) {
8142                 if (len < PAGE_SIZE)
8143                         return -EINVAL;
8144                 len &= PAGE_MASK;
8145         }
8146
8147         if (splice_grow_spd(pipe, &spd))
8148                 return -ENOMEM;
8149
8150  again:
8151         trace_access_lock(iter->cpu_file);
8152         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8153
8154         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8155                 struct page *page;
8156                 int r;
8157
8158                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8159                 if (!ref) {
8160                         ret = -ENOMEM;
8161                         break;
8162                 }
8163
8164                 refcount_set(&ref->refcount, 1);
8165                 ref->buffer = iter->array_buffer->buffer;
8166                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8167                 if (IS_ERR(ref->page)) {
8168                         ret = PTR_ERR(ref->page);
8169                         ref->page = NULL;
8170                         kfree(ref);
8171                         break;
8172                 }
8173                 ref->cpu = iter->cpu_file;
8174
8175                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8176                                           len, iter->cpu_file, 1);
8177                 if (r < 0) {
8178                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8179                                                    ref->page);
8180                         kfree(ref);
8181                         break;
8182                 }
8183
8184                 page = virt_to_page(ref->page);
8185
8186                 spd.pages[i] = page;
8187                 spd.partial[i].len = PAGE_SIZE;
8188                 spd.partial[i].offset = 0;
8189                 spd.partial[i].private = (unsigned long)ref;
8190                 spd.nr_pages++;
8191                 *ppos += PAGE_SIZE;
8192
8193                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8194         }
8195
8196         trace_access_unlock(iter->cpu_file);
8197         spd.nr_pages = i;
8198
8199         /* did we read anything? */
8200         if (!spd.nr_pages) {
8201                 if (ret)
8202                         goto out;
8203
8204                 ret = -EAGAIN;
8205                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8206                         goto out;
8207
8208                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8209                 if (ret)
8210                         goto out;
8211
8212                 goto again;
8213         }
8214
8215         ret = splice_to_pipe(pipe, &spd);
8216 out:
8217         splice_shrink_spd(&spd);
8218
8219         return ret;
8220 }
8221
8222 static const struct file_operations tracing_buffers_fops = {
8223         .open           = tracing_buffers_open,
8224         .read           = tracing_buffers_read,
8225         .poll           = tracing_buffers_poll,
8226         .release        = tracing_buffers_release,
8227         .splice_read    = tracing_buffers_splice_read,
8228         .llseek         = no_llseek,
8229 };
8230
8231 static ssize_t
8232 tracing_stats_read(struct file *filp, char __user *ubuf,
8233                    size_t count, loff_t *ppos)
8234 {
8235         struct inode *inode = file_inode(filp);
8236         struct trace_array *tr = inode->i_private;
8237         struct array_buffer *trace_buf = &tr->array_buffer;
8238         int cpu = tracing_get_cpu(inode);
8239         struct trace_seq *s;
8240         unsigned long cnt;
8241         unsigned long long t;
8242         unsigned long usec_rem;
8243
8244         s = kmalloc(sizeof(*s), GFP_KERNEL);
8245         if (!s)
8246                 return -ENOMEM;
8247
8248         trace_seq_init(s);
8249
8250         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8251         trace_seq_printf(s, "entries: %ld\n", cnt);
8252
8253         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8254         trace_seq_printf(s, "overrun: %ld\n", cnt);
8255
8256         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8257         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8258
8259         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8260         trace_seq_printf(s, "bytes: %ld\n", cnt);
8261
8262         if (trace_clocks[tr->clock_id].in_ns) {
8263                 /* local or global for trace_clock */
8264                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8265                 usec_rem = do_div(t, USEC_PER_SEC);
8266                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8267                                                                 t, usec_rem);
8268
8269                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8270                 usec_rem = do_div(t, USEC_PER_SEC);
8271                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8272         } else {
8273                 /* counter or tsc mode for trace_clock */
8274                 trace_seq_printf(s, "oldest event ts: %llu\n",
8275                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8276
8277                 trace_seq_printf(s, "now ts: %llu\n",
8278                                 ring_buffer_time_stamp(trace_buf->buffer));
8279         }
8280
8281         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8282         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8283
8284         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8285         trace_seq_printf(s, "read events: %ld\n", cnt);
8286
8287         count = simple_read_from_buffer(ubuf, count, ppos,
8288                                         s->buffer, trace_seq_used(s));
8289
8290         kfree(s);
8291
8292         return count;
8293 }
8294
8295 static const struct file_operations tracing_stats_fops = {
8296         .open           = tracing_open_generic_tr,
8297         .read           = tracing_stats_read,
8298         .llseek         = generic_file_llseek,
8299         .release        = tracing_release_generic_tr,
8300 };
8301
8302 #ifdef CONFIG_DYNAMIC_FTRACE
8303
8304 static ssize_t
8305 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8306                   size_t cnt, loff_t *ppos)
8307 {
8308         ssize_t ret;
8309         char *buf;
8310         int r;
8311
8312         /* 256 should be plenty to hold the amount needed */
8313         buf = kmalloc(256, GFP_KERNEL);
8314         if (!buf)
8315                 return -ENOMEM;
8316
8317         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8318                       ftrace_update_tot_cnt,
8319                       ftrace_number_of_pages,
8320                       ftrace_number_of_groups);
8321
8322         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8323         kfree(buf);
8324         return ret;
8325 }
8326
8327 static const struct file_operations tracing_dyn_info_fops = {
8328         .open           = tracing_open_generic,
8329         .read           = tracing_read_dyn_info,
8330         .llseek         = generic_file_llseek,
8331 };
8332 #endif /* CONFIG_DYNAMIC_FTRACE */
8333
8334 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8335 static void
8336 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8337                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8338                 void *data)
8339 {
8340         tracing_snapshot_instance(tr);
8341 }
8342
8343 static void
8344 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8345                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8346                       void *data)
8347 {
8348         struct ftrace_func_mapper *mapper = data;
8349         long *count = NULL;
8350
8351         if (mapper)
8352                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8353
8354         if (count) {
8355
8356                 if (*count <= 0)
8357                         return;
8358
8359                 (*count)--;
8360         }
8361
8362         tracing_snapshot_instance(tr);
8363 }
8364
8365 static int
8366 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8367                       struct ftrace_probe_ops *ops, void *data)
8368 {
8369         struct ftrace_func_mapper *mapper = data;
8370         long *count = NULL;
8371
8372         seq_printf(m, "%ps:", (void *)ip);
8373
8374         seq_puts(m, "snapshot");
8375
8376         if (mapper)
8377                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8378
8379         if (count)
8380                 seq_printf(m, ":count=%ld\n", *count);
8381         else
8382                 seq_puts(m, ":unlimited\n");
8383
8384         return 0;
8385 }
8386
8387 static int
8388 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8389                      unsigned long ip, void *init_data, void **data)
8390 {
8391         struct ftrace_func_mapper *mapper = *data;
8392
8393         if (!mapper) {
8394                 mapper = allocate_ftrace_func_mapper();
8395                 if (!mapper)
8396                         return -ENOMEM;
8397                 *data = mapper;
8398         }
8399
8400         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8401 }
8402
8403 static void
8404 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8405                      unsigned long ip, void *data)
8406 {
8407         struct ftrace_func_mapper *mapper = data;
8408
8409         if (!ip) {
8410                 if (!mapper)
8411                         return;
8412                 free_ftrace_func_mapper(mapper, NULL);
8413                 return;
8414         }
8415
8416         ftrace_func_mapper_remove_ip(mapper, ip);
8417 }
8418
8419 static struct ftrace_probe_ops snapshot_probe_ops = {
8420         .func                   = ftrace_snapshot,
8421         .print                  = ftrace_snapshot_print,
8422 };
8423
8424 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8425         .func                   = ftrace_count_snapshot,
8426         .print                  = ftrace_snapshot_print,
8427         .init                   = ftrace_snapshot_init,
8428         .free                   = ftrace_snapshot_free,
8429 };
8430
8431 static int
8432 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8433                                char *glob, char *cmd, char *param, int enable)
8434 {
8435         struct ftrace_probe_ops *ops;
8436         void *count = (void *)-1;
8437         char *number;
8438         int ret;
8439
8440         if (!tr)
8441                 return -ENODEV;
8442
8443         /* hash funcs only work with set_ftrace_filter */
8444         if (!enable)
8445                 return -EINVAL;
8446
8447         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8448
8449         if (glob[0] == '!')
8450                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8451
8452         if (!param)
8453                 goto out_reg;
8454
8455         number = strsep(&param, ":");
8456
8457         if (!strlen(number))
8458                 goto out_reg;
8459
8460         /*
8461          * We use the callback data field (which is a pointer)
8462          * as our counter.
8463          */
8464         ret = kstrtoul(number, 0, (unsigned long *)&count);
8465         if (ret)
8466                 return ret;
8467
8468  out_reg:
8469         ret = tracing_alloc_snapshot_instance(tr);
8470         if (ret < 0)
8471                 goto out;
8472
8473         ret = register_ftrace_function_probe(glob, tr, ops, count);
8474
8475  out:
8476         return ret < 0 ? ret : 0;
8477 }
8478
8479 static struct ftrace_func_command ftrace_snapshot_cmd = {
8480         .name                   = "snapshot",
8481         .func                   = ftrace_trace_snapshot_callback,
8482 };
8483
8484 static __init int register_snapshot_cmd(void)
8485 {
8486         return register_ftrace_command(&ftrace_snapshot_cmd);
8487 }
8488 #else
8489 static inline __init int register_snapshot_cmd(void) { return 0; }
8490 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8491
8492 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8493 {
8494         if (WARN_ON(!tr->dir))
8495                 return ERR_PTR(-ENODEV);
8496
8497         /* Top directory uses NULL as the parent */
8498         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8499                 return NULL;
8500
8501         /* All sub buffers have a descriptor */
8502         return tr->dir;
8503 }
8504
8505 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8506 {
8507         struct dentry *d_tracer;
8508
8509         if (tr->percpu_dir)
8510                 return tr->percpu_dir;
8511
8512         d_tracer = tracing_get_dentry(tr);
8513         if (IS_ERR(d_tracer))
8514                 return NULL;
8515
8516         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8517
8518         MEM_FAIL(!tr->percpu_dir,
8519                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8520
8521         return tr->percpu_dir;
8522 }
8523
8524 static struct dentry *
8525 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8526                       void *data, long cpu, const struct file_operations *fops)
8527 {
8528         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8529
8530         if (ret) /* See tracing_get_cpu() */
8531                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8532         return ret;
8533 }
8534
8535 static void
8536 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8537 {
8538         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8539         struct dentry *d_cpu;
8540         char cpu_dir[30]; /* 30 characters should be more than enough */
8541
8542         if (!d_percpu)
8543                 return;
8544
8545         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8546         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8547         if (!d_cpu) {
8548                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8549                 return;
8550         }
8551
8552         /* per cpu trace_pipe */
8553         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8554                                 tr, cpu, &tracing_pipe_fops);
8555
8556         /* per cpu trace */
8557         trace_create_cpu_file("trace", 0644, d_cpu,
8558                                 tr, cpu, &tracing_fops);
8559
8560         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8561                                 tr, cpu, &tracing_buffers_fops);
8562
8563         trace_create_cpu_file("stats", 0444, d_cpu,
8564                                 tr, cpu, &tracing_stats_fops);
8565
8566         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8567                                 tr, cpu, &tracing_entries_fops);
8568
8569 #ifdef CONFIG_TRACER_SNAPSHOT
8570         trace_create_cpu_file("snapshot", 0644, d_cpu,
8571                                 tr, cpu, &snapshot_fops);
8572
8573         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8574                                 tr, cpu, &snapshot_raw_fops);
8575 #endif
8576 }
8577
8578 #ifdef CONFIG_FTRACE_SELFTEST
8579 /* Let selftest have access to static functions in this file */
8580 #include "trace_selftest.c"
8581 #endif
8582
8583 static ssize_t
8584 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8585                         loff_t *ppos)
8586 {
8587         struct trace_option_dentry *topt = filp->private_data;
8588         char *buf;
8589
8590         if (topt->flags->val & topt->opt->bit)
8591                 buf = "1\n";
8592         else
8593                 buf = "0\n";
8594
8595         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8596 }
8597
8598 static ssize_t
8599 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8600                          loff_t *ppos)
8601 {
8602         struct trace_option_dentry *topt = filp->private_data;
8603         unsigned long val;
8604         int ret;
8605
8606         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8607         if (ret)
8608                 return ret;
8609
8610         if (val != 0 && val != 1)
8611                 return -EINVAL;
8612
8613         if (!!(topt->flags->val & topt->opt->bit) != val) {
8614                 mutex_lock(&trace_types_lock);
8615                 ret = __set_tracer_option(topt->tr, topt->flags,
8616                                           topt->opt, !val);
8617                 mutex_unlock(&trace_types_lock);
8618                 if (ret)
8619                         return ret;
8620         }
8621
8622         *ppos += cnt;
8623
8624         return cnt;
8625 }
8626
8627
8628 static const struct file_operations trace_options_fops = {
8629         .open = tracing_open_generic,
8630         .read = trace_options_read,
8631         .write = trace_options_write,
8632         .llseek = generic_file_llseek,
8633 };
8634
8635 /*
8636  * In order to pass in both the trace_array descriptor as well as the index
8637  * to the flag that the trace option file represents, the trace_array
8638  * has a character array of trace_flags_index[], which holds the index
8639  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8640  * The address of this character array is passed to the flag option file
8641  * read/write callbacks.
8642  *
8643  * In order to extract both the index and the trace_array descriptor,
8644  * get_tr_index() uses the following algorithm.
8645  *
8646  *   idx = *ptr;
8647  *
8648  * As the pointer itself contains the address of the index (remember
8649  * index[1] == 1).
8650  *
8651  * Then to get the trace_array descriptor, by subtracting that index
8652  * from the ptr, we get to the start of the index itself.
8653  *
8654  *   ptr - idx == &index[0]
8655  *
8656  * Then a simple container_of() from that pointer gets us to the
8657  * trace_array descriptor.
8658  */
8659 static void get_tr_index(void *data, struct trace_array **ptr,
8660                          unsigned int *pindex)
8661 {
8662         *pindex = *(unsigned char *)data;
8663
8664         *ptr = container_of(data - *pindex, struct trace_array,
8665                             trace_flags_index);
8666 }
8667
8668 static ssize_t
8669 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8670                         loff_t *ppos)
8671 {
8672         void *tr_index = filp->private_data;
8673         struct trace_array *tr;
8674         unsigned int index;
8675         char *buf;
8676
8677         get_tr_index(tr_index, &tr, &index);
8678
8679         if (tr->trace_flags & (1 << index))
8680                 buf = "1\n";
8681         else
8682                 buf = "0\n";
8683
8684         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8685 }
8686
8687 static ssize_t
8688 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8689                          loff_t *ppos)
8690 {
8691         void *tr_index = filp->private_data;
8692         struct trace_array *tr;
8693         unsigned int index;
8694         unsigned long val;
8695         int ret;
8696
8697         get_tr_index(tr_index, &tr, &index);
8698
8699         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8700         if (ret)
8701                 return ret;
8702
8703         if (val != 0 && val != 1)
8704                 return -EINVAL;
8705
8706         mutex_lock(&event_mutex);
8707         mutex_lock(&trace_types_lock);
8708         ret = set_tracer_flag(tr, 1 << index, val);
8709         mutex_unlock(&trace_types_lock);
8710         mutex_unlock(&event_mutex);
8711
8712         if (ret < 0)
8713                 return ret;
8714
8715         *ppos += cnt;
8716
8717         return cnt;
8718 }
8719
8720 static const struct file_operations trace_options_core_fops = {
8721         .open = tracing_open_generic,
8722         .read = trace_options_core_read,
8723         .write = trace_options_core_write,
8724         .llseek = generic_file_llseek,
8725 };
8726
8727 struct dentry *trace_create_file(const char *name,
8728                                  umode_t mode,
8729                                  struct dentry *parent,
8730                                  void *data,
8731                                  const struct file_operations *fops)
8732 {
8733         struct dentry *ret;
8734
8735         ret = tracefs_create_file(name, mode, parent, data, fops);
8736         if (!ret)
8737                 pr_warn("Could not create tracefs '%s' entry\n", name);
8738
8739         return ret;
8740 }
8741
8742
8743 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8744 {
8745         struct dentry *d_tracer;
8746
8747         if (tr->options)
8748                 return tr->options;
8749
8750         d_tracer = tracing_get_dentry(tr);
8751         if (IS_ERR(d_tracer))
8752                 return NULL;
8753
8754         tr->options = tracefs_create_dir("options", d_tracer);
8755         if (!tr->options) {
8756                 pr_warn("Could not create tracefs directory 'options'\n");
8757                 return NULL;
8758         }
8759
8760         return tr->options;
8761 }
8762
8763 static void
8764 create_trace_option_file(struct trace_array *tr,
8765                          struct trace_option_dentry *topt,
8766                          struct tracer_flags *flags,
8767                          struct tracer_opt *opt)
8768 {
8769         struct dentry *t_options;
8770
8771         t_options = trace_options_init_dentry(tr);
8772         if (!t_options)
8773                 return;
8774
8775         topt->flags = flags;
8776         topt->opt = opt;
8777         topt->tr = tr;
8778
8779         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8780                                     &trace_options_fops);
8781
8782 }
8783
8784 static void
8785 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8786 {
8787         struct trace_option_dentry *topts;
8788         struct trace_options *tr_topts;
8789         struct tracer_flags *flags;
8790         struct tracer_opt *opts;
8791         int cnt;
8792         int i;
8793
8794         if (!tracer)
8795                 return;
8796
8797         flags = tracer->flags;
8798
8799         if (!flags || !flags->opts)
8800                 return;
8801
8802         /*
8803          * If this is an instance, only create flags for tracers
8804          * the instance may have.
8805          */
8806         if (!trace_ok_for_array(tracer, tr))
8807                 return;
8808
8809         for (i = 0; i < tr->nr_topts; i++) {
8810                 /* Make sure there's no duplicate flags. */
8811                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8812                         return;
8813         }
8814
8815         opts = flags->opts;
8816
8817         for (cnt = 0; opts[cnt].name; cnt++)
8818                 ;
8819
8820         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8821         if (!topts)
8822                 return;
8823
8824         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8825                             GFP_KERNEL);
8826         if (!tr_topts) {
8827                 kfree(topts);
8828                 return;
8829         }
8830
8831         tr->topts = tr_topts;
8832         tr->topts[tr->nr_topts].tracer = tracer;
8833         tr->topts[tr->nr_topts].topts = topts;
8834         tr->nr_topts++;
8835
8836         for (cnt = 0; opts[cnt].name; cnt++) {
8837                 create_trace_option_file(tr, &topts[cnt], flags,
8838                                          &opts[cnt]);
8839                 MEM_FAIL(topts[cnt].entry == NULL,
8840                           "Failed to create trace option: %s",
8841                           opts[cnt].name);
8842         }
8843 }
8844
8845 static struct dentry *
8846 create_trace_option_core_file(struct trace_array *tr,
8847                               const char *option, long index)
8848 {
8849         struct dentry *t_options;
8850
8851         t_options = trace_options_init_dentry(tr);
8852         if (!t_options)
8853                 return NULL;
8854
8855         return trace_create_file(option, 0644, t_options,
8856                                  (void *)&tr->trace_flags_index[index],
8857                                  &trace_options_core_fops);
8858 }
8859
8860 static void create_trace_options_dir(struct trace_array *tr)
8861 {
8862         struct dentry *t_options;
8863         bool top_level = tr == &global_trace;
8864         int i;
8865
8866         t_options = trace_options_init_dentry(tr);
8867         if (!t_options)
8868                 return;
8869
8870         for (i = 0; trace_options[i]; i++) {
8871                 if (top_level ||
8872                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8873                         create_trace_option_core_file(tr, trace_options[i], i);
8874         }
8875 }
8876
8877 static ssize_t
8878 rb_simple_read(struct file *filp, char __user *ubuf,
8879                size_t cnt, loff_t *ppos)
8880 {
8881         struct trace_array *tr = filp->private_data;
8882         char buf[64];
8883         int r;
8884
8885         r = tracer_tracing_is_on(tr);
8886         r = sprintf(buf, "%d\n", r);
8887
8888         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8889 }
8890
8891 static ssize_t
8892 rb_simple_write(struct file *filp, const char __user *ubuf,
8893                 size_t cnt, loff_t *ppos)
8894 {
8895         struct trace_array *tr = filp->private_data;
8896         struct trace_buffer *buffer = tr->array_buffer.buffer;
8897         unsigned long val;
8898         int ret;
8899
8900         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8901         if (ret)
8902                 return ret;
8903
8904         if (buffer) {
8905                 mutex_lock(&trace_types_lock);
8906                 if (!!val == tracer_tracing_is_on(tr)) {
8907                         val = 0; /* do nothing */
8908                 } else if (val) {
8909                         tracer_tracing_on(tr);
8910                         if (tr->current_trace->start)
8911                                 tr->current_trace->start(tr);
8912                 } else {
8913                         tracer_tracing_off(tr);
8914                         if (tr->current_trace->stop)
8915                                 tr->current_trace->stop(tr);
8916                 }
8917                 mutex_unlock(&trace_types_lock);
8918         }
8919
8920         (*ppos)++;
8921
8922         return cnt;
8923 }
8924
8925 static const struct file_operations rb_simple_fops = {
8926         .open           = tracing_open_generic_tr,
8927         .read           = rb_simple_read,
8928         .write          = rb_simple_write,
8929         .release        = tracing_release_generic_tr,
8930         .llseek         = default_llseek,
8931 };
8932
8933 static ssize_t
8934 buffer_percent_read(struct file *filp, char __user *ubuf,
8935                     size_t cnt, loff_t *ppos)
8936 {
8937         struct trace_array *tr = filp->private_data;
8938         char buf[64];
8939         int r;
8940
8941         r = tr->buffer_percent;
8942         r = sprintf(buf, "%d\n", r);
8943
8944         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8945 }
8946
8947 static ssize_t
8948 buffer_percent_write(struct file *filp, const char __user *ubuf,
8949                      size_t cnt, loff_t *ppos)
8950 {
8951         struct trace_array *tr = filp->private_data;
8952         unsigned long val;
8953         int ret;
8954
8955         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8956         if (ret)
8957                 return ret;
8958
8959         if (val > 100)
8960                 return -EINVAL;
8961
8962         if (!val)
8963                 val = 1;
8964
8965         tr->buffer_percent = val;
8966
8967         (*ppos)++;
8968
8969         return cnt;
8970 }
8971
8972 static const struct file_operations buffer_percent_fops = {
8973         .open           = tracing_open_generic_tr,
8974         .read           = buffer_percent_read,
8975         .write          = buffer_percent_write,
8976         .release        = tracing_release_generic_tr,
8977         .llseek         = default_llseek,
8978 };
8979
8980 static struct dentry *trace_instance_dir;
8981
8982 static void
8983 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8984
8985 static int
8986 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8987 {
8988         enum ring_buffer_flags rb_flags;
8989
8990         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8991
8992         buf->tr = tr;
8993
8994         buf->buffer = ring_buffer_alloc(size, rb_flags);
8995         if (!buf->buffer)
8996                 return -ENOMEM;
8997
8998         buf->data = alloc_percpu(struct trace_array_cpu);
8999         if (!buf->data) {
9000                 ring_buffer_free(buf->buffer);
9001                 buf->buffer = NULL;
9002                 return -ENOMEM;
9003         }
9004
9005         /* Allocate the first page for all buffers */
9006         set_buffer_entries(&tr->array_buffer,
9007                            ring_buffer_size(tr->array_buffer.buffer, 0));
9008
9009         return 0;
9010 }
9011
9012 static int allocate_trace_buffers(struct trace_array *tr, int size)
9013 {
9014         int ret;
9015
9016         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9017         if (ret)
9018                 return ret;
9019
9020 #ifdef CONFIG_TRACER_MAX_TRACE
9021         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9022                                     allocate_snapshot ? size : 1);
9023         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9024                 ring_buffer_free(tr->array_buffer.buffer);
9025                 tr->array_buffer.buffer = NULL;
9026                 free_percpu(tr->array_buffer.data);
9027                 tr->array_buffer.data = NULL;
9028                 return -ENOMEM;
9029         }
9030         tr->allocated_snapshot = allocate_snapshot;
9031
9032         /*
9033          * Only the top level trace array gets its snapshot allocated
9034          * from the kernel command line.
9035          */
9036         allocate_snapshot = false;
9037 #endif
9038
9039         return 0;
9040 }
9041
9042 static void free_trace_buffer(struct array_buffer *buf)
9043 {
9044         if (buf->buffer) {
9045                 ring_buffer_free(buf->buffer);
9046                 buf->buffer = NULL;
9047                 free_percpu(buf->data);
9048                 buf->data = NULL;
9049         }
9050 }
9051
9052 static void free_trace_buffers(struct trace_array *tr)
9053 {
9054         if (!tr)
9055                 return;
9056
9057         free_trace_buffer(&tr->array_buffer);
9058
9059 #ifdef CONFIG_TRACER_MAX_TRACE
9060         free_trace_buffer(&tr->max_buffer);
9061 #endif
9062 }
9063
9064 static void init_trace_flags_index(struct trace_array *tr)
9065 {
9066         int i;
9067
9068         /* Used by the trace options files */
9069         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9070                 tr->trace_flags_index[i] = i;
9071 }
9072
9073 static void __update_tracer_options(struct trace_array *tr)
9074 {
9075         struct tracer *t;
9076
9077         for (t = trace_types; t; t = t->next)
9078                 add_tracer_options(tr, t);
9079 }
9080
9081 static void update_tracer_options(struct trace_array *tr)
9082 {
9083         mutex_lock(&trace_types_lock);
9084         __update_tracer_options(tr);
9085         mutex_unlock(&trace_types_lock);
9086 }
9087
9088 /* Must have trace_types_lock held */
9089 struct trace_array *trace_array_find(const char *instance)
9090 {
9091         struct trace_array *tr, *found = NULL;
9092
9093         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9094                 if (tr->name && strcmp(tr->name, instance) == 0) {
9095                         found = tr;
9096                         break;
9097                 }
9098         }
9099
9100         return found;
9101 }
9102
9103 struct trace_array *trace_array_find_get(const char *instance)
9104 {
9105         struct trace_array *tr;
9106
9107         mutex_lock(&trace_types_lock);
9108         tr = trace_array_find(instance);
9109         if (tr)
9110                 tr->ref++;
9111         mutex_unlock(&trace_types_lock);
9112
9113         return tr;
9114 }
9115
9116 static int trace_array_create_dir(struct trace_array *tr)
9117 {
9118         int ret;
9119
9120         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9121         if (!tr->dir)
9122                 return -EINVAL;
9123
9124         ret = event_trace_add_tracer(tr->dir, tr);
9125         if (ret)
9126                 tracefs_remove(tr->dir);
9127
9128         init_tracer_tracefs(tr, tr->dir);
9129         __update_tracer_options(tr);
9130
9131         return ret;
9132 }
9133
9134 static struct trace_array *trace_array_create(const char *name)
9135 {
9136         struct trace_array *tr;
9137         int ret;
9138
9139         ret = -ENOMEM;
9140         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9141         if (!tr)
9142                 return ERR_PTR(ret);
9143
9144         tr->name = kstrdup(name, GFP_KERNEL);
9145         if (!tr->name)
9146                 goto out_free_tr;
9147
9148         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9149                 goto out_free_tr;
9150
9151         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9152
9153         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9154
9155         raw_spin_lock_init(&tr->start_lock);
9156
9157         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9158
9159         tr->current_trace = &nop_trace;
9160
9161         INIT_LIST_HEAD(&tr->systems);
9162         INIT_LIST_HEAD(&tr->events);
9163         INIT_LIST_HEAD(&tr->hist_vars);
9164         INIT_LIST_HEAD(&tr->err_log);
9165
9166         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9167                 goto out_free_tr;
9168
9169         if (ftrace_allocate_ftrace_ops(tr) < 0)
9170                 goto out_free_tr;
9171
9172         ftrace_init_trace_array(tr);
9173
9174         init_trace_flags_index(tr);
9175
9176         if (trace_instance_dir) {
9177                 ret = trace_array_create_dir(tr);
9178                 if (ret)
9179                         goto out_free_tr;
9180         } else
9181                 __trace_early_add_events(tr);
9182
9183         list_add(&tr->list, &ftrace_trace_arrays);
9184
9185         tr->ref++;
9186
9187         return tr;
9188
9189  out_free_tr:
9190         ftrace_free_ftrace_ops(tr);
9191         free_trace_buffers(tr);
9192         free_cpumask_var(tr->tracing_cpumask);
9193         kfree(tr->name);
9194         kfree(tr);
9195
9196         return ERR_PTR(ret);
9197 }
9198
9199 static int instance_mkdir(const char *name)
9200 {
9201         struct trace_array *tr;
9202         int ret;
9203
9204         mutex_lock(&event_mutex);
9205         mutex_lock(&trace_types_lock);
9206
9207         ret = -EEXIST;
9208         if (trace_array_find(name))
9209                 goto out_unlock;
9210
9211         tr = trace_array_create(name);
9212
9213         ret = PTR_ERR_OR_ZERO(tr);
9214
9215 out_unlock:
9216         mutex_unlock(&trace_types_lock);
9217         mutex_unlock(&event_mutex);
9218         return ret;
9219 }
9220
9221 /**
9222  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9223  * @name: The name of the trace array to be looked up/created.
9224  *
9225  * Returns pointer to trace array with given name.
9226  * NULL, if it cannot be created.
9227  *
9228  * NOTE: This function increments the reference counter associated with the
9229  * trace array returned. This makes sure it cannot be freed while in use.
9230  * Use trace_array_put() once the trace array is no longer needed.
9231  * If the trace_array is to be freed, trace_array_destroy() needs to
9232  * be called after the trace_array_put(), or simply let user space delete
9233  * it from the tracefs instances directory. But until the
9234  * trace_array_put() is called, user space can not delete it.
9235  *
9236  */
9237 struct trace_array *trace_array_get_by_name(const char *name)
9238 {
9239         struct trace_array *tr;
9240
9241         mutex_lock(&event_mutex);
9242         mutex_lock(&trace_types_lock);
9243
9244         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9245                 if (tr->name && strcmp(tr->name, name) == 0)
9246                         goto out_unlock;
9247         }
9248
9249         tr = trace_array_create(name);
9250
9251         if (IS_ERR(tr))
9252                 tr = NULL;
9253 out_unlock:
9254         if (tr)
9255                 tr->ref++;
9256
9257         mutex_unlock(&trace_types_lock);
9258         mutex_unlock(&event_mutex);
9259         return tr;
9260 }
9261 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9262
9263 static int __remove_instance(struct trace_array *tr)
9264 {
9265         int i;
9266
9267         /* Reference counter for a newly created trace array = 1. */
9268         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9269                 return -EBUSY;
9270
9271         list_del(&tr->list);
9272
9273         /* Disable all the flags that were enabled coming in */
9274         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9275                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9276                         set_tracer_flag(tr, 1 << i, 0);
9277         }
9278
9279         tracing_set_nop(tr);
9280         clear_ftrace_function_probes(tr);
9281         event_trace_del_tracer(tr);
9282         ftrace_clear_pids(tr);
9283         ftrace_destroy_function_files(tr);
9284         tracefs_remove(tr->dir);
9285         free_percpu(tr->last_func_repeats);
9286         free_trace_buffers(tr);
9287
9288         for (i = 0; i < tr->nr_topts; i++) {
9289                 kfree(tr->topts[i].topts);
9290         }
9291         kfree(tr->topts);
9292
9293         free_cpumask_var(tr->tracing_cpumask);
9294         kfree(tr->name);
9295         kfree(tr);
9296
9297         return 0;
9298 }
9299
9300 int trace_array_destroy(struct trace_array *this_tr)
9301 {
9302         struct trace_array *tr;
9303         int ret;
9304
9305         if (!this_tr)
9306                 return -EINVAL;
9307
9308         mutex_lock(&event_mutex);
9309         mutex_lock(&trace_types_lock);
9310
9311         ret = -ENODEV;
9312
9313         /* Making sure trace array exists before destroying it. */
9314         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9315                 if (tr == this_tr) {
9316                         ret = __remove_instance(tr);
9317                         break;
9318                 }
9319         }
9320
9321         mutex_unlock(&trace_types_lock);
9322         mutex_unlock(&event_mutex);
9323
9324         return ret;
9325 }
9326 EXPORT_SYMBOL_GPL(trace_array_destroy);
9327
9328 static int instance_rmdir(const char *name)
9329 {
9330         struct trace_array *tr;
9331         int ret;
9332
9333         mutex_lock(&event_mutex);
9334         mutex_lock(&trace_types_lock);
9335
9336         ret = -ENODEV;
9337         tr = trace_array_find(name);
9338         if (tr)
9339                 ret = __remove_instance(tr);
9340
9341         mutex_unlock(&trace_types_lock);
9342         mutex_unlock(&event_mutex);
9343
9344         return ret;
9345 }
9346
9347 static __init void create_trace_instances(struct dentry *d_tracer)
9348 {
9349         struct trace_array *tr;
9350
9351         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9352                                                          instance_mkdir,
9353                                                          instance_rmdir);
9354         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9355                 return;
9356
9357         mutex_lock(&event_mutex);
9358         mutex_lock(&trace_types_lock);
9359
9360         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9361                 if (!tr->name)
9362                         continue;
9363                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9364                              "Failed to create instance directory\n"))
9365                         break;
9366         }
9367
9368         mutex_unlock(&trace_types_lock);
9369         mutex_unlock(&event_mutex);
9370 }
9371
9372 static void
9373 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9374 {
9375         struct trace_event_file *file;
9376         int cpu;
9377
9378         trace_create_file("available_tracers", 0444, d_tracer,
9379                         tr, &show_traces_fops);
9380
9381         trace_create_file("current_tracer", 0644, d_tracer,
9382                         tr, &set_tracer_fops);
9383
9384         trace_create_file("tracing_cpumask", 0644, d_tracer,
9385                           tr, &tracing_cpumask_fops);
9386
9387         trace_create_file("trace_options", 0644, d_tracer,
9388                           tr, &tracing_iter_fops);
9389
9390         trace_create_file("trace", 0644, d_tracer,
9391                           tr, &tracing_fops);
9392
9393         trace_create_file("trace_pipe", 0444, d_tracer,
9394                           tr, &tracing_pipe_fops);
9395
9396         trace_create_file("buffer_size_kb", 0644, d_tracer,
9397                           tr, &tracing_entries_fops);
9398
9399         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9400                           tr, &tracing_total_entries_fops);
9401
9402         trace_create_file("free_buffer", 0200, d_tracer,
9403                           tr, &tracing_free_buffer_fops);
9404
9405         trace_create_file("trace_marker", 0220, d_tracer,
9406                           tr, &tracing_mark_fops);
9407
9408         file = __find_event_file(tr, "ftrace", "print");
9409         if (file && file->dir)
9410                 trace_create_file("trigger", 0644, file->dir, file,
9411                                   &event_trigger_fops);
9412         tr->trace_marker_file = file;
9413
9414         trace_create_file("trace_marker_raw", 0220, d_tracer,
9415                           tr, &tracing_mark_raw_fops);
9416
9417         trace_create_file("trace_clock", 0644, d_tracer, tr,
9418                           &trace_clock_fops);
9419
9420         trace_create_file("tracing_on", 0644, d_tracer,
9421                           tr, &rb_simple_fops);
9422
9423         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9424                           &trace_time_stamp_mode_fops);
9425
9426         tr->buffer_percent = 50;
9427
9428         trace_create_file("buffer_percent", 0444, d_tracer,
9429                         tr, &buffer_percent_fops);
9430
9431         create_trace_options_dir(tr);
9432
9433 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9434         trace_create_maxlat_file(tr, d_tracer);
9435 #endif
9436
9437         if (ftrace_create_function_files(tr, d_tracer))
9438                 MEM_FAIL(1, "Could not allocate function filter files");
9439
9440 #ifdef CONFIG_TRACER_SNAPSHOT
9441         trace_create_file("snapshot", 0644, d_tracer,
9442                           tr, &snapshot_fops);
9443 #endif
9444
9445         trace_create_file("error_log", 0644, d_tracer,
9446                           tr, &tracing_err_log_fops);
9447
9448         for_each_tracing_cpu(cpu)
9449                 tracing_init_tracefs_percpu(tr, cpu);
9450
9451         ftrace_init_tracefs(tr, d_tracer);
9452 }
9453
9454 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9455 {
9456         struct vfsmount *mnt;
9457         struct file_system_type *type;
9458
9459         /*
9460          * To maintain backward compatibility for tools that mount
9461          * debugfs to get to the tracing facility, tracefs is automatically
9462          * mounted to the debugfs/tracing directory.
9463          */
9464         type = get_fs_type("tracefs");
9465         if (!type)
9466                 return NULL;
9467         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9468         put_filesystem(type);
9469         if (IS_ERR(mnt))
9470                 return NULL;
9471         mntget(mnt);
9472
9473         return mnt;
9474 }
9475
9476 /**
9477  * tracing_init_dentry - initialize top level trace array
9478  *
9479  * This is called when creating files or directories in the tracing
9480  * directory. It is called via fs_initcall() by any of the boot up code
9481  * and expects to return the dentry of the top level tracing directory.
9482  */
9483 int tracing_init_dentry(void)
9484 {
9485         struct trace_array *tr = &global_trace;
9486
9487         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9488                 pr_warn("Tracing disabled due to lockdown\n");
9489                 return -EPERM;
9490         }
9491
9492         /* The top level trace array uses  NULL as parent */
9493         if (tr->dir)
9494                 return 0;
9495
9496         if (WARN_ON(!tracefs_initialized()))
9497                 return -ENODEV;
9498
9499         /*
9500          * As there may still be users that expect the tracing
9501          * files to exist in debugfs/tracing, we must automount
9502          * the tracefs file system there, so older tools still
9503          * work with the newer kernel.
9504          */
9505         tr->dir = debugfs_create_automount("tracing", NULL,
9506                                            trace_automount, NULL);
9507
9508         return 0;
9509 }
9510
9511 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9512 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9513
9514 static struct workqueue_struct *eval_map_wq __initdata;
9515 static struct work_struct eval_map_work __initdata;
9516
9517 static void __init eval_map_work_func(struct work_struct *work)
9518 {
9519         int len;
9520
9521         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9522         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9523 }
9524
9525 static int __init trace_eval_init(void)
9526 {
9527         INIT_WORK(&eval_map_work, eval_map_work_func);
9528
9529         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9530         if (!eval_map_wq) {
9531                 pr_err("Unable to allocate eval_map_wq\n");
9532                 /* Do work here */
9533                 eval_map_work_func(&eval_map_work);
9534                 return -ENOMEM;
9535         }
9536
9537         queue_work(eval_map_wq, &eval_map_work);
9538         return 0;
9539 }
9540
9541 static int __init trace_eval_sync(void)
9542 {
9543         /* Make sure the eval map updates are finished */
9544         if (eval_map_wq)
9545                 destroy_workqueue(eval_map_wq);
9546         return 0;
9547 }
9548
9549 late_initcall_sync(trace_eval_sync);
9550
9551
9552 #ifdef CONFIG_MODULES
9553 static void trace_module_add_evals(struct module *mod)
9554 {
9555         if (!mod->num_trace_evals)
9556                 return;
9557
9558         /*
9559          * Modules with bad taint do not have events created, do
9560          * not bother with enums either.
9561          */
9562         if (trace_module_has_bad_taint(mod))
9563                 return;
9564
9565         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9566 }
9567
9568 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9569 static void trace_module_remove_evals(struct module *mod)
9570 {
9571         union trace_eval_map_item *map;
9572         union trace_eval_map_item **last = &trace_eval_maps;
9573
9574         if (!mod->num_trace_evals)
9575                 return;
9576
9577         mutex_lock(&trace_eval_mutex);
9578
9579         map = trace_eval_maps;
9580
9581         while (map) {
9582                 if (map->head.mod == mod)
9583                         break;
9584                 map = trace_eval_jmp_to_tail(map);
9585                 last = &map->tail.next;
9586                 map = map->tail.next;
9587         }
9588         if (!map)
9589                 goto out;
9590
9591         *last = trace_eval_jmp_to_tail(map)->tail.next;
9592         kfree(map);
9593  out:
9594         mutex_unlock(&trace_eval_mutex);
9595 }
9596 #else
9597 static inline void trace_module_remove_evals(struct module *mod) { }
9598 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9599
9600 static int trace_module_notify(struct notifier_block *self,
9601                                unsigned long val, void *data)
9602 {
9603         struct module *mod = data;
9604
9605         switch (val) {
9606         case MODULE_STATE_COMING:
9607                 trace_module_add_evals(mod);
9608                 break;
9609         case MODULE_STATE_GOING:
9610                 trace_module_remove_evals(mod);
9611                 break;
9612         }
9613
9614         return NOTIFY_OK;
9615 }
9616
9617 static struct notifier_block trace_module_nb = {
9618         .notifier_call = trace_module_notify,
9619         .priority = 0,
9620 };
9621 #endif /* CONFIG_MODULES */
9622
9623 static __init int tracer_init_tracefs(void)
9624 {
9625         int ret;
9626
9627         trace_access_lock_init();
9628
9629         ret = tracing_init_dentry();
9630         if (ret)
9631                 return 0;
9632
9633         event_trace_init();
9634
9635         init_tracer_tracefs(&global_trace, NULL);
9636         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9637
9638         trace_create_file("tracing_thresh", 0644, NULL,
9639                         &global_trace, &tracing_thresh_fops);
9640
9641         trace_create_file("README", 0444, NULL,
9642                         NULL, &tracing_readme_fops);
9643
9644         trace_create_file("saved_cmdlines", 0444, NULL,
9645                         NULL, &tracing_saved_cmdlines_fops);
9646
9647         trace_create_file("saved_cmdlines_size", 0644, NULL,
9648                           NULL, &tracing_saved_cmdlines_size_fops);
9649
9650         trace_create_file("saved_tgids", 0444, NULL,
9651                         NULL, &tracing_saved_tgids_fops);
9652
9653         trace_eval_init();
9654
9655         trace_create_eval_file(NULL);
9656
9657 #ifdef CONFIG_MODULES
9658         register_module_notifier(&trace_module_nb);
9659 #endif
9660
9661 #ifdef CONFIG_DYNAMIC_FTRACE
9662         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9663                         NULL, &tracing_dyn_info_fops);
9664 #endif
9665
9666         create_trace_instances(NULL);
9667
9668         update_tracer_options(&global_trace);
9669
9670         return 0;
9671 }
9672
9673 fs_initcall(tracer_init_tracefs);
9674
9675 static int trace_panic_handler(struct notifier_block *this,
9676                                unsigned long event, void *unused)
9677 {
9678         if (ftrace_dump_on_oops)
9679                 ftrace_dump(ftrace_dump_on_oops);
9680         return NOTIFY_OK;
9681 }
9682
9683 static struct notifier_block trace_panic_notifier = {
9684         .notifier_call  = trace_panic_handler,
9685         .next           = NULL,
9686         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9687 };
9688
9689 static int trace_die_handler(struct notifier_block *self,
9690                              unsigned long val,
9691                              void *data)
9692 {
9693         switch (val) {
9694         case DIE_OOPS:
9695                 if (ftrace_dump_on_oops)
9696                         ftrace_dump(ftrace_dump_on_oops);
9697                 break;
9698         default:
9699                 break;
9700         }
9701         return NOTIFY_OK;
9702 }
9703
9704 static struct notifier_block trace_die_notifier = {
9705         .notifier_call = trace_die_handler,
9706         .priority = 200
9707 };
9708
9709 /*
9710  * printk is set to max of 1024, we really don't need it that big.
9711  * Nothing should be printing 1000 characters anyway.
9712  */
9713 #define TRACE_MAX_PRINT         1000
9714
9715 /*
9716  * Define here KERN_TRACE so that we have one place to modify
9717  * it if we decide to change what log level the ftrace dump
9718  * should be at.
9719  */
9720 #define KERN_TRACE              KERN_EMERG
9721
9722 void
9723 trace_printk_seq(struct trace_seq *s)
9724 {
9725         /* Probably should print a warning here. */
9726         if (s->seq.len >= TRACE_MAX_PRINT)
9727                 s->seq.len = TRACE_MAX_PRINT;
9728
9729         /*
9730          * More paranoid code. Although the buffer size is set to
9731          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9732          * an extra layer of protection.
9733          */
9734         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9735                 s->seq.len = s->seq.size - 1;
9736
9737         /* should be zero ended, but we are paranoid. */
9738         s->buffer[s->seq.len] = 0;
9739
9740         printk(KERN_TRACE "%s", s->buffer);
9741
9742         trace_seq_init(s);
9743 }
9744
9745 void trace_init_global_iter(struct trace_iterator *iter)
9746 {
9747         iter->tr = &global_trace;
9748         iter->trace = iter->tr->current_trace;
9749         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9750         iter->array_buffer = &global_trace.array_buffer;
9751
9752         if (iter->trace && iter->trace->open)
9753                 iter->trace->open(iter);
9754
9755         /* Annotate start of buffers if we had overruns */
9756         if (ring_buffer_overruns(iter->array_buffer->buffer))
9757                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9758
9759         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9760         if (trace_clocks[iter->tr->clock_id].in_ns)
9761                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9762 }
9763
9764 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9765 {
9766         /* use static because iter can be a bit big for the stack */
9767         static struct trace_iterator iter;
9768         static atomic_t dump_running;
9769         struct trace_array *tr = &global_trace;
9770         unsigned int old_userobj;
9771         unsigned long flags;
9772         int cnt = 0, cpu;
9773
9774         /* Only allow one dump user at a time. */
9775         if (atomic_inc_return(&dump_running) != 1) {
9776                 atomic_dec(&dump_running);
9777                 return;
9778         }
9779
9780         /*
9781          * Always turn off tracing when we dump.
9782          * We don't need to show trace output of what happens
9783          * between multiple crashes.
9784          *
9785          * If the user does a sysrq-z, then they can re-enable
9786          * tracing with echo 1 > tracing_on.
9787          */
9788         tracing_off();
9789
9790         local_irq_save(flags);
9791         printk_nmi_direct_enter();
9792
9793         /* Simulate the iterator */
9794         trace_init_global_iter(&iter);
9795         /* Can not use kmalloc for iter.temp and iter.fmt */
9796         iter.temp = static_temp_buf;
9797         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9798         iter.fmt = static_fmt_buf;
9799         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9800
9801         for_each_tracing_cpu(cpu) {
9802                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9803         }
9804
9805         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9806
9807         /* don't look at user memory in panic mode */
9808         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9809
9810         switch (oops_dump_mode) {
9811         case DUMP_ALL:
9812                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9813                 break;
9814         case DUMP_ORIG:
9815                 iter.cpu_file = raw_smp_processor_id();
9816                 break;
9817         case DUMP_NONE:
9818                 goto out_enable;
9819         default:
9820                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9821                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9822         }
9823
9824         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9825
9826         /* Did function tracer already get disabled? */
9827         if (ftrace_is_dead()) {
9828                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9829                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9830         }
9831
9832         /*
9833          * We need to stop all tracing on all CPUS to read
9834          * the next buffer. This is a bit expensive, but is
9835          * not done often. We fill all what we can read,
9836          * and then release the locks again.
9837          */
9838
9839         while (!trace_empty(&iter)) {
9840
9841                 if (!cnt)
9842                         printk(KERN_TRACE "---------------------------------\n");
9843
9844                 cnt++;
9845
9846                 trace_iterator_reset(&iter);
9847                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9848
9849                 if (trace_find_next_entry_inc(&iter) != NULL) {
9850                         int ret;
9851
9852                         ret = print_trace_line(&iter);
9853                         if (ret != TRACE_TYPE_NO_CONSUME)
9854                                 trace_consume(&iter);
9855                 }
9856                 touch_nmi_watchdog();
9857
9858                 trace_printk_seq(&iter.seq);
9859         }
9860
9861         if (!cnt)
9862                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9863         else
9864                 printk(KERN_TRACE "---------------------------------\n");
9865
9866  out_enable:
9867         tr->trace_flags |= old_userobj;
9868
9869         for_each_tracing_cpu(cpu) {
9870                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9871         }
9872         atomic_dec(&dump_running);
9873         printk_nmi_direct_exit();
9874         local_irq_restore(flags);
9875 }
9876 EXPORT_SYMBOL_GPL(ftrace_dump);
9877
9878 #define WRITE_BUFSIZE  4096
9879
9880 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9881                                 size_t count, loff_t *ppos,
9882                                 int (*createfn)(const char *))
9883 {
9884         char *kbuf, *buf, *tmp;
9885         int ret = 0;
9886         size_t done = 0;
9887         size_t size;
9888
9889         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9890         if (!kbuf)
9891                 return -ENOMEM;
9892
9893         while (done < count) {
9894                 size = count - done;
9895
9896                 if (size >= WRITE_BUFSIZE)
9897                         size = WRITE_BUFSIZE - 1;
9898
9899                 if (copy_from_user(kbuf, buffer + done, size)) {
9900                         ret = -EFAULT;
9901                         goto out;
9902                 }
9903                 kbuf[size] = '\0';
9904                 buf = kbuf;
9905                 do {
9906                         tmp = strchr(buf, '\n');
9907                         if (tmp) {
9908                                 *tmp = '\0';
9909                                 size = tmp - buf + 1;
9910                         } else {
9911                                 size = strlen(buf);
9912                                 if (done + size < count) {
9913                                         if (buf != kbuf)
9914                                                 break;
9915                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9916                                         pr_warn("Line length is too long: Should be less than %d\n",
9917                                                 WRITE_BUFSIZE - 2);
9918                                         ret = -EINVAL;
9919                                         goto out;
9920                                 }
9921                         }
9922                         done += size;
9923
9924                         /* Remove comments */
9925                         tmp = strchr(buf, '#');
9926
9927                         if (tmp)
9928                                 *tmp = '\0';
9929
9930                         ret = createfn(buf);
9931                         if (ret)
9932                                 goto out;
9933                         buf += size;
9934
9935                 } while (done < count);
9936         }
9937         ret = done;
9938
9939 out:
9940         kfree(kbuf);
9941
9942         return ret;
9943 }
9944
9945 __init static int tracer_alloc_buffers(void)
9946 {
9947         int ring_buf_size;
9948         int ret = -ENOMEM;
9949
9950
9951         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9952                 pr_warn("Tracing disabled due to lockdown\n");
9953                 return -EPERM;
9954         }
9955
9956         /*
9957          * Make sure we don't accidentally add more trace options
9958          * than we have bits for.
9959          */
9960         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9961
9962         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9963                 goto out;
9964
9965         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9966                 goto out_free_buffer_mask;
9967
9968         /* Only allocate trace_printk buffers if a trace_printk exists */
9969         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9970                 /* Must be called before global_trace.buffer is allocated */
9971                 trace_printk_init_buffers();
9972
9973         /* To save memory, keep the ring buffer size to its minimum */
9974         if (ring_buffer_expanded)
9975                 ring_buf_size = trace_buf_size;
9976         else
9977                 ring_buf_size = 1;
9978
9979         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9980         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9981
9982         raw_spin_lock_init(&global_trace.start_lock);
9983
9984         /*
9985          * The prepare callbacks allocates some memory for the ring buffer. We
9986          * don't free the buffer if the CPU goes down. If we were to free
9987          * the buffer, then the user would lose any trace that was in the
9988          * buffer. The memory will be removed once the "instance" is removed.
9989          */
9990         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9991                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9992                                       NULL);
9993         if (ret < 0)
9994                 goto out_free_cpumask;
9995         /* Used for event triggers */
9996         ret = -ENOMEM;
9997         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9998         if (!temp_buffer)
9999                 goto out_rm_hp_state;
10000
10001         if (trace_create_savedcmd() < 0)
10002                 goto out_free_temp_buffer;
10003
10004         /* TODO: make the number of buffers hot pluggable with CPUS */
10005         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10006                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10007                 goto out_free_savedcmd;
10008         }
10009
10010         if (global_trace.buffer_disabled)
10011                 tracing_off();
10012
10013         if (trace_boot_clock) {
10014                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10015                 if (ret < 0)
10016                         pr_warn("Trace clock %s not defined, going back to default\n",
10017                                 trace_boot_clock);
10018         }
10019
10020         /*
10021          * register_tracer() might reference current_trace, so it
10022          * needs to be set before we register anything. This is
10023          * just a bootstrap of current_trace anyway.
10024          */
10025         global_trace.current_trace = &nop_trace;
10026
10027         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10028
10029         ftrace_init_global_array_ops(&global_trace);
10030
10031         init_trace_flags_index(&global_trace);
10032
10033         register_tracer(&nop_trace);
10034
10035         /* Function tracing may start here (via kernel command line) */
10036         init_function_trace();
10037
10038         /* All seems OK, enable tracing */
10039         tracing_disabled = 0;
10040
10041         atomic_notifier_chain_register(&panic_notifier_list,
10042                                        &trace_panic_notifier);
10043
10044         register_die_notifier(&trace_die_notifier);
10045
10046         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10047
10048         INIT_LIST_HEAD(&global_trace.systems);
10049         INIT_LIST_HEAD(&global_trace.events);
10050         INIT_LIST_HEAD(&global_trace.hist_vars);
10051         INIT_LIST_HEAD(&global_trace.err_log);
10052         list_add(&global_trace.list, &ftrace_trace_arrays);
10053
10054         apply_trace_boot_options();
10055
10056         register_snapshot_cmd();
10057
10058         test_can_verify();
10059
10060         return 0;
10061
10062 out_free_savedcmd:
10063         free_saved_cmdlines_buffer(savedcmd);
10064 out_free_temp_buffer:
10065         ring_buffer_free(temp_buffer);
10066 out_rm_hp_state:
10067         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10068 out_free_cpumask:
10069         free_cpumask_var(global_trace.tracing_cpumask);
10070 out_free_buffer_mask:
10071         free_cpumask_var(tracing_buffer_mask);
10072 out:
10073         return ret;
10074 }
10075
10076 void __init early_trace_init(void)
10077 {
10078         if (tracepoint_printk) {
10079                 tracepoint_print_iter =
10080                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10081                 if (MEM_FAIL(!tracepoint_print_iter,
10082                              "Failed to allocate trace iterator\n"))
10083                         tracepoint_printk = 0;
10084                 else
10085                         static_key_enable(&tracepoint_printk_key.key);
10086         }
10087         tracer_alloc_buffers();
10088 }
10089
10090 void __init trace_init(void)
10091 {
10092         trace_event_init();
10093 }
10094
10095 __init static void clear_boot_tracer(void)
10096 {
10097         /*
10098          * The default tracer at boot buffer is an init section.
10099          * This function is called in lateinit. If we did not
10100          * find the boot tracer, then clear it out, to prevent
10101          * later registration from accessing the buffer that is
10102          * about to be freed.
10103          */
10104         if (!default_bootup_tracer)
10105                 return;
10106
10107         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10108                default_bootup_tracer);
10109         default_bootup_tracer = NULL;
10110 }
10111
10112 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10113 __init static void tracing_set_default_clock(void)
10114 {
10115         /* sched_clock_stable() is determined in late_initcall */
10116         if (!trace_boot_clock && !sched_clock_stable()) {
10117                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10118                         pr_warn("Can not set tracing clock due to lockdown\n");
10119                         return;
10120                 }
10121
10122                 printk(KERN_WARNING
10123                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10124                        "If you want to keep using the local clock, then add:\n"
10125                        "  \"trace_clock=local\"\n"
10126                        "on the kernel command line\n");
10127                 tracing_set_clock(&global_trace, "global");
10128         }
10129 }
10130 #else
10131 static inline void tracing_set_default_clock(void) { }
10132 #endif
10133
10134 __init static int late_trace_init(void)
10135 {
10136         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10137                 static_key_disable(&tracepoint_printk_key.key);
10138                 tracepoint_printk = 0;
10139         }
10140
10141         tracing_set_default_clock();
10142         clear_boot_tracer();
10143         return 0;
10144 }
10145
10146 late_initcall_sync(late_trace_init);