Merge tag 'arm-soc-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209         unsigned *map_cmdline_to_pid;
2210         unsigned cmdline_num;
2211         int cmdline_idx;
2212         char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227                                     struct saved_cmdlines_buffer *s)
2228 {
2229         s->map_cmdline_to_pid = kmalloc_array(val,
2230                                               sizeof(*s->map_cmdline_to_pid),
2231                                               GFP_KERNEL);
2232         if (!s->map_cmdline_to_pid)
2233                 return -ENOMEM;
2234
2235         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236         if (!s->saved_cmdlines) {
2237                 kfree(s->map_cmdline_to_pid);
2238                 return -ENOMEM;
2239         }
2240
2241         s->cmdline_idx = 0;
2242         s->cmdline_num = val;
2243         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244                sizeof(s->map_pid_to_cmdline));
2245         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246                val * sizeof(*s->map_cmdline_to_pid));
2247
2248         return 0;
2249 }
2250
2251 static int trace_create_savedcmd(void)
2252 {
2253         int ret;
2254
2255         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256         if (!savedcmd)
2257                 return -ENOMEM;
2258
2259         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260         if (ret < 0) {
2261                 kfree(savedcmd);
2262                 savedcmd = NULL;
2263                 return -ENOMEM;
2264         }
2265
2266         return 0;
2267 }
2268
2269 int is_tracing_stopped(void)
2270 {
2271         return global_trace.stop_count;
2272 }
2273
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282         struct trace_buffer *buffer;
2283         unsigned long flags;
2284
2285         if (tracing_disabled)
2286                 return;
2287
2288         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289         if (--global_trace.stop_count) {
2290                 if (global_trace.stop_count < 0) {
2291                         /* Someone screwed up their debugging */
2292                         WARN_ON_ONCE(1);
2293                         global_trace.stop_count = 0;
2294                 }
2295                 goto out;
2296         }
2297
2298         /* Prevent the buffers from switching */
2299         arch_spin_lock(&global_trace.max_lock);
2300
2301         buffer = global_trace.array_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306         buffer = global_trace.max_buffer.buffer;
2307         if (buffer)
2308                 ring_buffer_record_enable(buffer);
2309 #endif
2310
2311         arch_spin_unlock(&global_trace.max_lock);
2312
2313  out:
2314         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319         struct trace_buffer *buffer;
2320         unsigned long flags;
2321
2322         if (tracing_disabled)
2323                 return;
2324
2325         /* If global, we need to also start the max tracer */
2326         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                 return tracing_start();
2328
2329         raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331         if (--tr->stop_count) {
2332                 if (tr->stop_count < 0) {
2333                         /* Someone screwed up their debugging */
2334                         WARN_ON_ONCE(1);
2335                         tr->stop_count = 0;
2336                 }
2337                 goto out;
2338         }
2339
2340         buffer = tr->array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344  out:
2345         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356         struct trace_buffer *buffer;
2357         unsigned long flags;
2358
2359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360         if (global_trace.stop_count++)
2361                 goto out;
2362
2363         /* Prevent the buffers from switching */
2364         arch_spin_lock(&global_trace.max_lock);
2365
2366         buffer = global_trace.array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371         buffer = global_trace.max_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374 #endif
2375
2376         arch_spin_unlock(&global_trace.max_lock);
2377
2378  out:
2379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384         struct trace_buffer *buffer;
2385         unsigned long flags;
2386
2387         /* If global, we need to also stop the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_stop();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392         if (tr->stop_count++)
2393                 goto out;
2394
2395         buffer = tr->array_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_disable(buffer);
2398
2399  out:
2400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405         unsigned tpid, idx;
2406
2407         /* treat recording of idle task as a success */
2408         if (!tsk->pid)
2409                 return 1;
2410
2411         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413         /*
2414          * It's not the end of the world if we don't get
2415          * the lock, but we also don't want to spin
2416          * nor do we want to disable interrupts,
2417          * so if we miss here, then better luck next time.
2418          */
2419         if (!arch_spin_trylock(&trace_cmdline_lock))
2420                 return 0;
2421
2422         idx = savedcmd->map_pid_to_cmdline[tpid];
2423         if (idx == NO_CMDLINE_MAP) {
2424                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                 savedcmd->cmdline_idx = idx;
2428         }
2429
2430         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441         int tpid;
2442
2443         if (!pid) {
2444                 strcpy(comm, "<idle>");
2445                 return;
2446         }
2447
2448         if (WARN_ON_ONCE(pid < 0)) {
2449                 strcpy(comm, "<XXX>");
2450                 return;
2451         }
2452
2453         tpid = pid & (PID_MAX_DEFAULT - 1);
2454         map = savedcmd->map_pid_to_cmdline[tpid];
2455         if (map != NO_CMDLINE_MAP) {
2456                 tpid = savedcmd->map_cmdline_to_pid[map];
2457                 if (tpid == pid) {
2458                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                         return;
2460                 }
2461         }
2462         strcpy(comm, "<...>");
2463 }
2464
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467         preempt_disable();
2468         arch_spin_lock(&trace_cmdline_lock);
2469
2470         __trace_find_cmdline(pid, comm);
2471
2472         arch_spin_unlock(&trace_cmdline_lock);
2473         preempt_enable();
2474 }
2475
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478         /*
2479          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480          * if we observe a non-NULL tgid_map then we also observe the correct
2481          * tgid_map_max.
2482          */
2483         int *map = smp_load_acquire(&tgid_map);
2484
2485         if (unlikely(!map || pid > tgid_map_max))
2486                 return NULL;
2487
2488         return &map[pid];
2489 }
2490
2491 int trace_find_tgid(int pid)
2492 {
2493         int *ptr = trace_find_tgid_ptr(pid);
2494
2495         return ptr ? *ptr : 0;
2496 }
2497
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500         int *ptr;
2501
2502         /* treat recording of idle task as a success */
2503         if (!tsk->pid)
2504                 return 1;
2505
2506         ptr = trace_find_tgid_ptr(tsk->pid);
2507         if (!ptr)
2508                 return 0;
2509
2510         *ptr = tsk->tgid;
2511         return 1;
2512 }
2513
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                 return true;
2518         if (!__this_cpu_read(trace_taskinfo_save))
2519                 return true;
2520         return false;
2521 }
2522
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532         bool done;
2533
2534         if (tracing_record_taskinfo_skip(flags))
2535                 return;
2536
2537         /*
2538          * Record as much task information as possible. If some fail, continue
2539          * to try to record the others.
2540          */
2541         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                           struct task_struct *next, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576         /* If recording any information failed, retry again soon. */
2577         if (!done)
2578                 return;
2579
2580         __this_cpu_write(trace_taskinfo_save, false);
2581 }
2582
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601         return trace_seq_has_overflowed(s) ?
2602                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608         unsigned int trace_flags = irqs_status;
2609         unsigned int pc;
2610
2611         pc = preempt_count();
2612
2613         if (pc & NMI_MASK)
2614                 trace_flags |= TRACE_FLAG_NMI;
2615         if (pc & HARDIRQ_MASK)
2616                 trace_flags |= TRACE_FLAG_HARDIRQ;
2617         if (in_serving_softirq())
2618                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620         if (tif_need_resched())
2621                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622         if (test_preempt_need_resched())
2623                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624         return (trace_flags << 16) | (pc & 0xff);
2625 }
2626
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                           int type,
2630                           unsigned long len,
2631                           unsigned int trace_ctx)
2632 {
2633         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656         struct ring_buffer_event *event;
2657         struct page *page;
2658         int cpu;
2659
2660         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662         if (trace_buffered_event_ref++)
2663                 return;
2664
2665         for_each_tracing_cpu(cpu) {
2666                 page = alloc_pages_node(cpu_to_node(cpu),
2667                                         GFP_KERNEL | __GFP_NORETRY, 0);
2668                 if (!page)
2669                         goto failed;
2670
2671                 event = page_address(page);
2672                 memset(event, 0, sizeof(*event));
2673
2674                 per_cpu(trace_buffered_event, cpu) = event;
2675
2676                 preempt_disable();
2677                 if (cpu == smp_processor_id() &&
2678                     __this_cpu_read(trace_buffered_event) !=
2679                     per_cpu(trace_buffered_event, cpu))
2680                         WARN_ON_ONCE(1);
2681                 preempt_enable();
2682         }
2683
2684         return;
2685  failed:
2686         trace_buffered_event_disable();
2687 }
2688
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691         /* Probably not needed, but do it anyway */
2692         smp_rmb();
2693         this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698         this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711         int cpu;
2712
2713         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                 return;
2717
2718         if (--trace_buffered_event_ref)
2719                 return;
2720
2721         preempt_disable();
2722         /* For each CPU, set the buffer as used. */
2723         smp_call_function_many(tracing_buffer_mask,
2724                                disable_trace_buffered_event, NULL, 1);
2725         preempt_enable();
2726
2727         /* Wait for all current users to finish */
2728         synchronize_rcu();
2729
2730         for_each_tracing_cpu(cpu) {
2731                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                 per_cpu(trace_buffered_event, cpu) = NULL;
2733         }
2734         /*
2735          * Make sure trace_buffered_event is NULL before clearing
2736          * trace_buffered_event_cnt.
2737          */
2738         smp_wmb();
2739
2740         preempt_disable();
2741         /* Do the work on each cpu */
2742         smp_call_function_many(tracing_buffer_mask,
2743                                enable_trace_buffered_event, NULL, 1);
2744         preempt_enable();
2745 }
2746
2747 static struct trace_buffer *temp_buffer;
2748
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                           struct trace_event_file *trace_file,
2752                           int type, unsigned long len,
2753                           unsigned int trace_ctx)
2754 {
2755         struct ring_buffer_event *entry;
2756         struct trace_array *tr = trace_file->tr;
2757         int val;
2758
2759         *current_rb = tr->array_buffer.buffer;
2760
2761         if (!tr->no_filter_buffering_ref &&
2762             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763             (entry = this_cpu_read(trace_buffered_event))) {
2764                 /*
2765                  * Filtering is on, so try to use the per cpu buffer first.
2766                  * This buffer will simulate a ring_buffer_event,
2767                  * where the type_len is zero and the array[0] will
2768                  * hold the full length.
2769                  * (see include/linux/ring-buffer.h for details on
2770                  *  how the ring_buffer_event is structured).
2771                  *
2772                  * Using a temp buffer during filtering and copying it
2773                  * on a matched filter is quicker than writing directly
2774                  * into the ring buffer and then discarding it when
2775                  * it doesn't match. That is because the discard
2776                  * requires several atomic operations to get right.
2777                  * Copying on match and doing nothing on a failed match
2778                  * is still quicker than no copy on match, but having
2779                  * to discard out of the ring buffer on a failed match.
2780                  */
2781                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                 /*
2786                  * Preemption is disabled, but interrupts and NMIs
2787                  * can still come in now. If that happens after
2788                  * the above increment, then it will have to go
2789                  * back to the old method of allocating the event
2790                  * on the ring buffer, and if the filter fails, it
2791                  * will have to call ring_buffer_discard_commit()
2792                  * to remove it.
2793                  *
2794                  * Need to also check the unlikely case that the
2795                  * length is bigger than the temp buffer size.
2796                  * If that happens, then the reserve is pretty much
2797                  * guaranteed to fail, as the ring buffer currently
2798                  * only allows events less than a page. But that may
2799                  * change in the future, so let the ring buffer reserve
2800                  * handle the failure in that case.
2801                  */
2802                 if (val == 1 && likely(len <= max_len)) {
2803                         trace_event_setup(entry, type, trace_ctx);
2804                         entry->array[0] = len;
2805                         return entry;
2806                 }
2807                 this_cpu_dec(trace_buffered_event_cnt);
2808         }
2809
2810         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                             trace_ctx);
2812         /*
2813          * If tracing is off, but we have triggers enabled
2814          * we still need to look at the event data. Use the temp_buffer
2815          * to store the trace event for the trigger to use. It's recursive
2816          * safe and will not be recorded anywhere.
2817          */
2818         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                 *current_rb = temp_buffer;
2820                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                     trace_ctx);
2822         }
2823         return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832         struct trace_event_call *event_call;
2833         struct trace_event_file *file;
2834         struct trace_event *event;
2835         unsigned long flags;
2836         struct trace_iterator *iter = tracepoint_print_iter;
2837
2838         /* We should never get here if iter is NULL */
2839         if (WARN_ON_ONCE(!iter))
2840                 return;
2841
2842         event_call = fbuffer->trace_file->event_call;
2843         if (!event_call || !event_call->event.funcs ||
2844             !event_call->event.funcs->trace)
2845                 return;
2846
2847         file = fbuffer->trace_file;
2848         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850              !filter_match_preds(file->filter, fbuffer->entry)))
2851                 return;
2852
2853         event = &fbuffer->trace_file->event_call->event;
2854
2855         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856         trace_seq_init(&iter->seq);
2857         iter->ent = fbuffer->entry;
2858         event_call->event.funcs->trace(iter, 0, event);
2859         trace_seq_putc(&iter->seq, 0);
2860         printk("%s", iter->seq.buffer);
2861
2862         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                              void *buffer, size_t *lenp,
2867                              loff_t *ppos)
2868 {
2869         int save_tracepoint_printk;
2870         int ret;
2871
2872         mutex_lock(&tracepoint_printk_mutex);
2873         save_tracepoint_printk = tracepoint_printk;
2874
2875         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877         /*
2878          * This will force exiting early, as tracepoint_printk
2879          * is always zero when tracepoint_printk_iter is not allocated
2880          */
2881         if (!tracepoint_print_iter)
2882                 tracepoint_printk = 0;
2883
2884         if (save_tracepoint_printk == tracepoint_printk)
2885                 goto out;
2886
2887         if (tracepoint_printk)
2888                 static_key_enable(&tracepoint_printk_key.key);
2889         else
2890                 static_key_disable(&tracepoint_printk_key.key);
2891
2892  out:
2893         mutex_unlock(&tracepoint_printk_mutex);
2894
2895         return ret;
2896 }
2897
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900         if (static_key_false(&tracepoint_printk_key.key))
2901                 output_printk(fbuffer);
2902
2903         if (static_branch_unlikely(&trace_event_exports_enabled))
2904                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2905         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2906                                     fbuffer->event, fbuffer->entry,
2907                                     fbuffer->trace_ctx, fbuffer->regs);
2908 }
2909 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2910
2911 /*
2912  * Skip 3:
2913  *
2914  *   trace_buffer_unlock_commit_regs()
2915  *   trace_event_buffer_commit()
2916  *   trace_event_raw_event_xxx()
2917  */
2918 # define STACK_SKIP 3
2919
2920 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2921                                      struct trace_buffer *buffer,
2922                                      struct ring_buffer_event *event,
2923                                      unsigned int trace_ctx,
2924                                      struct pt_regs *regs)
2925 {
2926         __buffer_unlock_commit(buffer, event);
2927
2928         /*
2929          * If regs is not set, then skip the necessary functions.
2930          * Note, we can still get here via blktrace, wakeup tracer
2931          * and mmiotrace, but that's ok if they lose a function or
2932          * two. They are not that meaningful.
2933          */
2934         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2935         ftrace_trace_userstack(tr, buffer, trace_ctx);
2936 }
2937
2938 /*
2939  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2940  */
2941 void
2942 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2943                                    struct ring_buffer_event *event)
2944 {
2945         __buffer_unlock_commit(buffer, event);
2946 }
2947
2948 void
2949 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2950                parent_ip, unsigned int trace_ctx)
2951 {
2952         struct trace_event_call *call = &event_function;
2953         struct trace_buffer *buffer = tr->array_buffer.buffer;
2954         struct ring_buffer_event *event;
2955         struct ftrace_entry *entry;
2956
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2958                                             trace_ctx);
2959         if (!event)
2960                 return;
2961         entry   = ring_buffer_event_data(event);
2962         entry->ip                       = ip;
2963         entry->parent_ip                = parent_ip;
2964
2965         if (!call_filter_check_discard(call, entry, buffer, event)) {
2966                 if (static_branch_unlikely(&trace_function_exports_enabled))
2967                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2968                 __buffer_unlock_commit(buffer, event);
2969         }
2970 }
2971
2972 #ifdef CONFIG_STACKTRACE
2973
2974 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2975 #define FTRACE_KSTACK_NESTING   4
2976
2977 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2978
2979 struct ftrace_stack {
2980         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2981 };
2982
2983
2984 struct ftrace_stacks {
2985         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2986 };
2987
2988 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2989 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2990
2991 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2992                                  unsigned int trace_ctx,
2993                                  int skip, struct pt_regs *regs)
2994 {
2995         struct trace_event_call *call = &event_kernel_stack;
2996         struct ring_buffer_event *event;
2997         unsigned int size, nr_entries;
2998         struct ftrace_stack *fstack;
2999         struct stack_entry *entry;
3000         int stackidx;
3001
3002         /*
3003          * Add one, for this function and the call to save_stack_trace()
3004          * If regs is set, then these functions will not be in the way.
3005          */
3006 #ifndef CONFIG_UNWINDER_ORC
3007         if (!regs)
3008                 skip++;
3009 #endif
3010
3011         preempt_disable_notrace();
3012
3013         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3014
3015         /* This should never happen. If it does, yell once and skip */
3016         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3017                 goto out;
3018
3019         /*
3020          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3021          * interrupt will either see the value pre increment or post
3022          * increment. If the interrupt happens pre increment it will have
3023          * restored the counter when it returns.  We just need a barrier to
3024          * keep gcc from moving things around.
3025          */
3026         barrier();
3027
3028         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3029         size = ARRAY_SIZE(fstack->calls);
3030
3031         if (regs) {
3032                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3033                                                    size, skip);
3034         } else {
3035                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3036         }
3037
3038         size = nr_entries * sizeof(unsigned long);
3039         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3040                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3041                                     trace_ctx);
3042         if (!event)
3043                 goto out;
3044         entry = ring_buffer_event_data(event);
3045
3046         memcpy(&entry->caller, fstack->calls, size);
3047         entry->size = nr_entries;
3048
3049         if (!call_filter_check_discard(call, entry, buffer, event))
3050                 __buffer_unlock_commit(buffer, event);
3051
3052  out:
3053         /* Again, don't let gcc optimize things here */
3054         barrier();
3055         __this_cpu_dec(ftrace_stack_reserve);
3056         preempt_enable_notrace();
3057
3058 }
3059
3060 static inline void ftrace_trace_stack(struct trace_array *tr,
3061                                       struct trace_buffer *buffer,
3062                                       unsigned int trace_ctx,
3063                                       int skip, struct pt_regs *regs)
3064 {
3065         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3066                 return;
3067
3068         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3069 }
3070
3071 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3072                    int skip)
3073 {
3074         struct trace_buffer *buffer = tr->array_buffer.buffer;
3075
3076         if (rcu_is_watching()) {
3077                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3078                 return;
3079         }
3080
3081         /*
3082          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3083          * but if the above rcu_is_watching() failed, then the NMI
3084          * triggered someplace critical, and rcu_irq_enter() should
3085          * not be called from NMI.
3086          */
3087         if (unlikely(in_nmi()))
3088                 return;
3089
3090         rcu_irq_enter_irqson();
3091         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3092         rcu_irq_exit_irqson();
3093 }
3094
3095 /**
3096  * trace_dump_stack - record a stack back trace in the trace buffer
3097  * @skip: Number of functions to skip (helper handlers)
3098  */
3099 void trace_dump_stack(int skip)
3100 {
3101         if (tracing_disabled || tracing_selftest_running)
3102                 return;
3103
3104 #ifndef CONFIG_UNWINDER_ORC
3105         /* Skip 1 to skip this function. */
3106         skip++;
3107 #endif
3108         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3109                              tracing_gen_ctx(), skip, NULL);
3110 }
3111 EXPORT_SYMBOL_GPL(trace_dump_stack);
3112
3113 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3114 static DEFINE_PER_CPU(int, user_stack_count);
3115
3116 static void
3117 ftrace_trace_userstack(struct trace_array *tr,
3118                        struct trace_buffer *buffer, unsigned int trace_ctx)
3119 {
3120         struct trace_event_call *call = &event_user_stack;
3121         struct ring_buffer_event *event;
3122         struct userstack_entry *entry;
3123
3124         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3125                 return;
3126
3127         /*
3128          * NMIs can not handle page faults, even with fix ups.
3129          * The save user stack can (and often does) fault.
3130          */
3131         if (unlikely(in_nmi()))
3132                 return;
3133
3134         /*
3135          * prevent recursion, since the user stack tracing may
3136          * trigger other kernel events.
3137          */
3138         preempt_disable();
3139         if (__this_cpu_read(user_stack_count))
3140                 goto out;
3141
3142         __this_cpu_inc(user_stack_count);
3143
3144         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3145                                             sizeof(*entry), trace_ctx);
3146         if (!event)
3147                 goto out_drop_count;
3148         entry   = ring_buffer_event_data(event);
3149
3150         entry->tgid             = current->tgid;
3151         memset(&entry->caller, 0, sizeof(entry->caller));
3152
3153         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3154         if (!call_filter_check_discard(call, entry, buffer, event))
3155                 __buffer_unlock_commit(buffer, event);
3156
3157  out_drop_count:
3158         __this_cpu_dec(user_stack_count);
3159  out:
3160         preempt_enable();
3161 }
3162 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3163 static void ftrace_trace_userstack(struct trace_array *tr,
3164                                    struct trace_buffer *buffer,
3165                                    unsigned int trace_ctx)
3166 {
3167 }
3168 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3169
3170 #endif /* CONFIG_STACKTRACE */
3171
3172 static inline void
3173 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3174                           unsigned long long delta)
3175 {
3176         entry->bottom_delta_ts = delta & U32_MAX;
3177         entry->top_delta_ts = (delta >> 32);
3178 }
3179
3180 void trace_last_func_repeats(struct trace_array *tr,
3181                              struct trace_func_repeats *last_info,
3182                              unsigned int trace_ctx)
3183 {
3184         struct trace_buffer *buffer = tr->array_buffer.buffer;
3185         struct func_repeats_entry *entry;
3186         struct ring_buffer_event *event;
3187         u64 delta;
3188
3189         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3190                                             sizeof(*entry), trace_ctx);
3191         if (!event)
3192                 return;
3193
3194         delta = ring_buffer_event_time_stamp(buffer, event) -
3195                 last_info->ts_last_call;
3196
3197         entry = ring_buffer_event_data(event);
3198         entry->ip = last_info->ip;
3199         entry->parent_ip = last_info->parent_ip;
3200         entry->count = last_info->count;
3201         func_repeats_set_delta_ts(entry, delta);
3202
3203         __buffer_unlock_commit(buffer, event);
3204 }
3205
3206 /* created for use with alloc_percpu */
3207 struct trace_buffer_struct {
3208         int nesting;
3209         char buffer[4][TRACE_BUF_SIZE];
3210 };
3211
3212 static struct trace_buffer_struct *trace_percpu_buffer;
3213
3214 /*
3215  * This allows for lockless recording.  If we're nested too deeply, then
3216  * this returns NULL.
3217  */
3218 static char *get_trace_buf(void)
3219 {
3220         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3221
3222         if (!buffer || buffer->nesting >= 4)
3223                 return NULL;
3224
3225         buffer->nesting++;
3226
3227         /* Interrupts must see nesting incremented before we use the buffer */
3228         barrier();
3229         return &buffer->buffer[buffer->nesting - 1][0];
3230 }
3231
3232 static void put_trace_buf(void)
3233 {
3234         /* Don't let the decrement of nesting leak before this */
3235         barrier();
3236         this_cpu_dec(trace_percpu_buffer->nesting);
3237 }
3238
3239 static int alloc_percpu_trace_buffer(void)
3240 {
3241         struct trace_buffer_struct *buffers;
3242
3243         if (trace_percpu_buffer)
3244                 return 0;
3245
3246         buffers = alloc_percpu(struct trace_buffer_struct);
3247         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3248                 return -ENOMEM;
3249
3250         trace_percpu_buffer = buffers;
3251         return 0;
3252 }
3253
3254 static int buffers_allocated;
3255
3256 void trace_printk_init_buffers(void)
3257 {
3258         if (buffers_allocated)
3259                 return;
3260
3261         if (alloc_percpu_trace_buffer())
3262                 return;
3263
3264         /* trace_printk() is for debug use only. Don't use it in production. */
3265
3266         pr_warn("\n");
3267         pr_warn("**********************************************************\n");
3268         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3269         pr_warn("**                                                      **\n");
3270         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3271         pr_warn("**                                                      **\n");
3272         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3273         pr_warn("** unsafe for production use.                           **\n");
3274         pr_warn("**                                                      **\n");
3275         pr_warn("** If you see this message and you are not debugging    **\n");
3276         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3277         pr_warn("**                                                      **\n");
3278         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3279         pr_warn("**********************************************************\n");
3280
3281         /* Expand the buffers to set size */
3282         tracing_update_buffers();
3283
3284         buffers_allocated = 1;
3285
3286         /*
3287          * trace_printk_init_buffers() can be called by modules.
3288          * If that happens, then we need to start cmdline recording
3289          * directly here. If the global_trace.buffer is already
3290          * allocated here, then this was called by module code.
3291          */
3292         if (global_trace.array_buffer.buffer)
3293                 tracing_start_cmdline_record();
3294 }
3295 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3296
3297 void trace_printk_start_comm(void)
3298 {
3299         /* Start tracing comms if trace printk is set */
3300         if (!buffers_allocated)
3301                 return;
3302         tracing_start_cmdline_record();
3303 }
3304
3305 static void trace_printk_start_stop_comm(int enabled)
3306 {
3307         if (!buffers_allocated)
3308                 return;
3309
3310         if (enabled)
3311                 tracing_start_cmdline_record();
3312         else
3313                 tracing_stop_cmdline_record();
3314 }
3315
3316 /**
3317  * trace_vbprintk - write binary msg to tracing buffer
3318  * @ip:    The address of the caller
3319  * @fmt:   The string format to write to the buffer
3320  * @args:  Arguments for @fmt
3321  */
3322 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3323 {
3324         struct trace_event_call *call = &event_bprint;
3325         struct ring_buffer_event *event;
3326         struct trace_buffer *buffer;
3327         struct trace_array *tr = &global_trace;
3328         struct bprint_entry *entry;
3329         unsigned int trace_ctx;
3330         char *tbuffer;
3331         int len = 0, size;
3332
3333         if (unlikely(tracing_selftest_running || tracing_disabled))
3334                 return 0;
3335
3336         /* Don't pollute graph traces with trace_vprintk internals */
3337         pause_graph_tracing();
3338
3339         trace_ctx = tracing_gen_ctx();
3340         preempt_disable_notrace();
3341
3342         tbuffer = get_trace_buf();
3343         if (!tbuffer) {
3344                 len = 0;
3345                 goto out_nobuffer;
3346         }
3347
3348         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3349
3350         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3351                 goto out_put;
3352
3353         size = sizeof(*entry) + sizeof(u32) * len;
3354         buffer = tr->array_buffer.buffer;
3355         ring_buffer_nest_start(buffer);
3356         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3357                                             trace_ctx);
3358         if (!event)
3359                 goto out;
3360         entry = ring_buffer_event_data(event);
3361         entry->ip                       = ip;
3362         entry->fmt                      = fmt;
3363
3364         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3365         if (!call_filter_check_discard(call, entry, buffer, event)) {
3366                 __buffer_unlock_commit(buffer, event);
3367                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3368         }
3369
3370 out:
3371         ring_buffer_nest_end(buffer);
3372 out_put:
3373         put_trace_buf();
3374
3375 out_nobuffer:
3376         preempt_enable_notrace();
3377         unpause_graph_tracing();
3378
3379         return len;
3380 }
3381 EXPORT_SYMBOL_GPL(trace_vbprintk);
3382
3383 __printf(3, 0)
3384 static int
3385 __trace_array_vprintk(struct trace_buffer *buffer,
3386                       unsigned long ip, const char *fmt, va_list args)
3387 {
3388         struct trace_event_call *call = &event_print;
3389         struct ring_buffer_event *event;
3390         int len = 0, size;
3391         struct print_entry *entry;
3392         unsigned int trace_ctx;
3393         char *tbuffer;
3394
3395         if (tracing_disabled || tracing_selftest_running)
3396                 return 0;
3397
3398         /* Don't pollute graph traces with trace_vprintk internals */
3399         pause_graph_tracing();
3400
3401         trace_ctx = tracing_gen_ctx();
3402         preempt_disable_notrace();
3403
3404
3405         tbuffer = get_trace_buf();
3406         if (!tbuffer) {
3407                 len = 0;
3408                 goto out_nobuffer;
3409         }
3410
3411         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412
3413         size = sizeof(*entry) + len + 1;
3414         ring_buffer_nest_start(buffer);
3415         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416                                             trace_ctx);
3417         if (!event)
3418                 goto out;
3419         entry = ring_buffer_event_data(event);
3420         entry->ip = ip;
3421
3422         memcpy(&entry->buf, tbuffer, len + 1);
3423         if (!call_filter_check_discard(call, entry, buffer, event)) {
3424                 __buffer_unlock_commit(buffer, event);
3425                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3426         }
3427
3428 out:
3429         ring_buffer_nest_end(buffer);
3430         put_trace_buf();
3431
3432 out_nobuffer:
3433         preempt_enable_notrace();
3434         unpause_graph_tracing();
3435
3436         return len;
3437 }
3438
3439 __printf(3, 0)
3440 int trace_array_vprintk(struct trace_array *tr,
3441                         unsigned long ip, const char *fmt, va_list args)
3442 {
3443         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3444 }
3445
3446 /**
3447  * trace_array_printk - Print a message to a specific instance
3448  * @tr: The instance trace_array descriptor
3449  * @ip: The instruction pointer that this is called from.
3450  * @fmt: The format to print (printf format)
3451  *
3452  * If a subsystem sets up its own instance, they have the right to
3453  * printk strings into their tracing instance buffer using this
3454  * function. Note, this function will not write into the top level
3455  * buffer (use trace_printk() for that), as writing into the top level
3456  * buffer should only have events that can be individually disabled.
3457  * trace_printk() is only used for debugging a kernel, and should not
3458  * be ever incorporated in normal use.
3459  *
3460  * trace_array_printk() can be used, as it will not add noise to the
3461  * top level tracing buffer.
3462  *
3463  * Note, trace_array_init_printk() must be called on @tr before this
3464  * can be used.
3465  */
3466 __printf(3, 0)
3467 int trace_array_printk(struct trace_array *tr,
3468                        unsigned long ip, const char *fmt, ...)
3469 {
3470         int ret;
3471         va_list ap;
3472
3473         if (!tr)
3474                 return -ENOENT;
3475
3476         /* This is only allowed for created instances */
3477         if (tr == &global_trace)
3478                 return 0;
3479
3480         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3481                 return 0;
3482
3483         va_start(ap, fmt);
3484         ret = trace_array_vprintk(tr, ip, fmt, ap);
3485         va_end(ap);
3486         return ret;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_array_printk);
3489
3490 /**
3491  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3492  * @tr: The trace array to initialize the buffers for
3493  *
3494  * As trace_array_printk() only writes into instances, they are OK to
3495  * have in the kernel (unlike trace_printk()). This needs to be called
3496  * before trace_array_printk() can be used on a trace_array.
3497  */
3498 int trace_array_init_printk(struct trace_array *tr)
3499 {
3500         if (!tr)
3501                 return -ENOENT;
3502
3503         /* This is only allowed for created instances */
3504         if (tr == &global_trace)
3505                 return -EINVAL;
3506
3507         return alloc_percpu_trace_buffer();
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3510
3511 __printf(3, 4)
3512 int trace_array_printk_buf(struct trace_buffer *buffer,
3513                            unsigned long ip, const char *fmt, ...)
3514 {
3515         int ret;
3516         va_list ap;
3517
3518         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3519                 return 0;
3520
3521         va_start(ap, fmt);
3522         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3523         va_end(ap);
3524         return ret;
3525 }
3526
3527 __printf(2, 0)
3528 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3529 {
3530         return trace_array_vprintk(&global_trace, ip, fmt, args);
3531 }
3532 EXPORT_SYMBOL_GPL(trace_vprintk);
3533
3534 static void trace_iterator_increment(struct trace_iterator *iter)
3535 {
3536         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3537
3538         iter->idx++;
3539         if (buf_iter)
3540                 ring_buffer_iter_advance(buf_iter);
3541 }
3542
3543 static struct trace_entry *
3544 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3545                 unsigned long *lost_events)
3546 {
3547         struct ring_buffer_event *event;
3548         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3549
3550         if (buf_iter) {
3551                 event = ring_buffer_iter_peek(buf_iter, ts);
3552                 if (lost_events)
3553                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3554                                 (unsigned long)-1 : 0;
3555         } else {
3556                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3557                                          lost_events);
3558         }
3559
3560         if (event) {
3561                 iter->ent_size = ring_buffer_event_length(event);
3562                 return ring_buffer_event_data(event);
3563         }
3564         iter->ent_size = 0;
3565         return NULL;
3566 }
3567
3568 static struct trace_entry *
3569 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3570                   unsigned long *missing_events, u64 *ent_ts)
3571 {
3572         struct trace_buffer *buffer = iter->array_buffer->buffer;
3573         struct trace_entry *ent, *next = NULL;
3574         unsigned long lost_events = 0, next_lost = 0;
3575         int cpu_file = iter->cpu_file;
3576         u64 next_ts = 0, ts;
3577         int next_cpu = -1;
3578         int next_size = 0;
3579         int cpu;
3580
3581         /*
3582          * If we are in a per_cpu trace file, don't bother by iterating over
3583          * all cpu and peek directly.
3584          */
3585         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3586                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3587                         return NULL;
3588                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3589                 if (ent_cpu)
3590                         *ent_cpu = cpu_file;
3591
3592                 return ent;
3593         }
3594
3595         for_each_tracing_cpu(cpu) {
3596
3597                 if (ring_buffer_empty_cpu(buffer, cpu))
3598                         continue;
3599
3600                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3601
3602                 /*
3603                  * Pick the entry with the smallest timestamp:
3604                  */
3605                 if (ent && (!next || ts < next_ts)) {
3606                         next = ent;
3607                         next_cpu = cpu;
3608                         next_ts = ts;
3609                         next_lost = lost_events;
3610                         next_size = iter->ent_size;
3611                 }
3612         }
3613
3614         iter->ent_size = next_size;
3615
3616         if (ent_cpu)
3617                 *ent_cpu = next_cpu;
3618
3619         if (ent_ts)
3620                 *ent_ts = next_ts;
3621
3622         if (missing_events)
3623                 *missing_events = next_lost;
3624
3625         return next;
3626 }
3627
3628 #define STATIC_FMT_BUF_SIZE     128
3629 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3630
3631 static char *trace_iter_expand_format(struct trace_iterator *iter)
3632 {
3633         char *tmp;
3634
3635         /*
3636          * iter->tr is NULL when used with tp_printk, which makes
3637          * this get called where it is not safe to call krealloc().
3638          */
3639         if (!iter->tr || iter->fmt == static_fmt_buf)
3640                 return NULL;
3641
3642         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3643                        GFP_KERNEL);
3644         if (tmp) {
3645                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3646                 iter->fmt = tmp;
3647         }
3648
3649         return tmp;
3650 }
3651
3652 /* Returns true if the string is safe to dereference from an event */
3653 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3654 {
3655         unsigned long addr = (unsigned long)str;
3656         struct trace_event *trace_event;
3657         struct trace_event_call *event;
3658
3659         /* OK if part of the event data */
3660         if ((addr >= (unsigned long)iter->ent) &&
3661             (addr < (unsigned long)iter->ent + iter->ent_size))
3662                 return true;
3663
3664         /* OK if part of the temp seq buffer */
3665         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3666             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3667                 return true;
3668
3669         /* Core rodata can not be freed */
3670         if (is_kernel_rodata(addr))
3671                 return true;
3672
3673         if (trace_is_tracepoint_string(str))
3674                 return true;
3675
3676         /*
3677          * Now this could be a module event, referencing core module
3678          * data, which is OK.
3679          */
3680         if (!iter->ent)
3681                 return false;
3682
3683         trace_event = ftrace_find_event(iter->ent->type);
3684         if (!trace_event)
3685                 return false;
3686
3687         event = container_of(trace_event, struct trace_event_call, event);
3688         if (!event->mod)
3689                 return false;
3690
3691         /* Would rather have rodata, but this will suffice */
3692         if (within_module_core(addr, event->mod))
3693                 return true;
3694
3695         return false;
3696 }
3697
3698 static const char *show_buffer(struct trace_seq *s)
3699 {
3700         struct seq_buf *seq = &s->seq;
3701
3702         seq_buf_terminate(seq);
3703
3704         return seq->buffer;
3705 }
3706
3707 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3708
3709 static int test_can_verify_check(const char *fmt, ...)
3710 {
3711         char buf[16];
3712         va_list ap;
3713         int ret;
3714
3715         /*
3716          * The verifier is dependent on vsnprintf() modifies the va_list
3717          * passed to it, where it is sent as a reference. Some architectures
3718          * (like x86_32) passes it by value, which means that vsnprintf()
3719          * does not modify the va_list passed to it, and the verifier
3720          * would then need to be able to understand all the values that
3721          * vsnprintf can use. If it is passed by value, then the verifier
3722          * is disabled.
3723          */
3724         va_start(ap, fmt);
3725         vsnprintf(buf, 16, "%d", ap);
3726         ret = va_arg(ap, int);
3727         va_end(ap);
3728
3729         return ret;
3730 }
3731
3732 static void test_can_verify(void)
3733 {
3734         if (!test_can_verify_check("%d %d", 0, 1)) {
3735                 pr_info("trace event string verifier disabled\n");
3736                 static_branch_inc(&trace_no_verify);
3737         }
3738 }
3739
3740 /**
3741  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3742  * @iter: The iterator that holds the seq buffer and the event being printed
3743  * @fmt: The format used to print the event
3744  * @ap: The va_list holding the data to print from @fmt.
3745  *
3746  * This writes the data into the @iter->seq buffer using the data from
3747  * @fmt and @ap. If the format has a %s, then the source of the string
3748  * is examined to make sure it is safe to print, otherwise it will
3749  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3750  * pointer.
3751  */
3752 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3753                          va_list ap)
3754 {
3755         const char *p = fmt;
3756         const char *str;
3757         int i, j;
3758
3759         if (WARN_ON_ONCE(!fmt))
3760                 return;
3761
3762         if (static_branch_unlikely(&trace_no_verify))
3763                 goto print;
3764
3765         /* Don't bother checking when doing a ftrace_dump() */
3766         if (iter->fmt == static_fmt_buf)
3767                 goto print;
3768
3769         while (*p) {
3770                 bool star = false;
3771                 int len = 0;
3772
3773                 j = 0;
3774
3775                 /* We only care about %s and variants */
3776                 for (i = 0; p[i]; i++) {
3777                         if (i + 1 >= iter->fmt_size) {
3778                                 /*
3779                                  * If we can't expand the copy buffer,
3780                                  * just print it.
3781                                  */
3782                                 if (!trace_iter_expand_format(iter))
3783                                         goto print;
3784                         }
3785
3786                         if (p[i] == '\\' && p[i+1]) {
3787                                 i++;
3788                                 continue;
3789                         }
3790                         if (p[i] == '%') {
3791                                 /* Need to test cases like %08.*s */
3792                                 for (j = 1; p[i+j]; j++) {
3793                                         if (isdigit(p[i+j]) ||
3794                                             p[i+j] == '.')
3795                                                 continue;
3796                                         if (p[i+j] == '*') {
3797                                                 star = true;
3798                                                 continue;
3799                                         }
3800                                         break;
3801                                 }
3802                                 if (p[i+j] == 's')
3803                                         break;
3804                                 star = false;
3805                         }
3806                         j = 0;
3807                 }
3808                 /* If no %s found then just print normally */
3809                 if (!p[i])
3810                         break;
3811
3812                 /* Copy up to the %s, and print that */
3813                 strncpy(iter->fmt, p, i);
3814                 iter->fmt[i] = '\0';
3815                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3816
3817                 if (star)
3818                         len = va_arg(ap, int);
3819
3820                 /* The ap now points to the string data of the %s */
3821                 str = va_arg(ap, const char *);
3822
3823                 /*
3824                  * If you hit this warning, it is likely that the
3825                  * trace event in question used %s on a string that
3826                  * was saved at the time of the event, but may not be
3827                  * around when the trace is read. Use __string(),
3828                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3829                  * instead. See samples/trace_events/trace-events-sample.h
3830                  * for reference.
3831                  */
3832                 if (WARN_ONCE(!trace_safe_str(iter, str),
3833                               "fmt: '%s' current_buffer: '%s'",
3834                               fmt, show_buffer(&iter->seq))) {
3835                         int ret;
3836
3837                         /* Try to safely read the string */
3838                         if (star) {
3839                                 if (len + 1 > iter->fmt_size)
3840                                         len = iter->fmt_size - 1;
3841                                 if (len < 0)
3842                                         len = 0;
3843                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3844                                 iter->fmt[len] = 0;
3845                                 star = false;
3846                         } else {
3847                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3848                                                                   iter->fmt_size);
3849                         }
3850                         if (ret < 0)
3851                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3852                         else
3853                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3854                                                  str, iter->fmt);
3855                         str = "[UNSAFE-MEMORY]";
3856                         strcpy(iter->fmt, "%s");
3857                 } else {
3858                         strncpy(iter->fmt, p + i, j + 1);
3859                         iter->fmt[j+1] = '\0';
3860                 }
3861                 if (star)
3862                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3863                 else
3864                         trace_seq_printf(&iter->seq, iter->fmt, str);
3865
3866                 p += i + j + 1;
3867         }
3868  print:
3869         if (*p)
3870                 trace_seq_vprintf(&iter->seq, p, ap);
3871 }
3872
3873 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3874 {
3875         const char *p, *new_fmt;
3876         char *q;
3877
3878         if (WARN_ON_ONCE(!fmt))
3879                 return fmt;
3880
3881         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3882                 return fmt;
3883
3884         p = fmt;
3885         new_fmt = q = iter->fmt;
3886         while (*p) {
3887                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3888                         if (!trace_iter_expand_format(iter))
3889                                 return fmt;
3890
3891                         q += iter->fmt - new_fmt;
3892                         new_fmt = iter->fmt;
3893                 }
3894
3895                 *q++ = *p++;
3896
3897                 /* Replace %p with %px */
3898                 if (p[-1] == '%') {
3899                         if (p[0] == '%') {
3900                                 *q++ = *p++;
3901                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3902                                 *q++ = *p++;
3903                                 *q++ = 'x';
3904                         }
3905                 }
3906         }
3907         *q = '\0';
3908
3909         return new_fmt;
3910 }
3911
3912 #define STATIC_TEMP_BUF_SIZE    128
3913 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3914
3915 /* Find the next real entry, without updating the iterator itself */
3916 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3917                                           int *ent_cpu, u64 *ent_ts)
3918 {
3919         /* __find_next_entry will reset ent_size */
3920         int ent_size = iter->ent_size;
3921         struct trace_entry *entry;
3922
3923         /*
3924          * If called from ftrace_dump(), then the iter->temp buffer
3925          * will be the static_temp_buf and not created from kmalloc.
3926          * If the entry size is greater than the buffer, we can
3927          * not save it. Just return NULL in that case. This is only
3928          * used to add markers when two consecutive events' time
3929          * stamps have a large delta. See trace_print_lat_context()
3930          */
3931         if (iter->temp == static_temp_buf &&
3932             STATIC_TEMP_BUF_SIZE < ent_size)
3933                 return NULL;
3934
3935         /*
3936          * The __find_next_entry() may call peek_next_entry(), which may
3937          * call ring_buffer_peek() that may make the contents of iter->ent
3938          * undefined. Need to copy iter->ent now.
3939          */
3940         if (iter->ent && iter->ent != iter->temp) {
3941                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3942                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3943                         void *temp;
3944                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3945                         if (!temp)
3946                                 return NULL;
3947                         kfree(iter->temp);
3948                         iter->temp = temp;
3949                         iter->temp_size = iter->ent_size;
3950                 }
3951                 memcpy(iter->temp, iter->ent, iter->ent_size);
3952                 iter->ent = iter->temp;
3953         }
3954         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3955         /* Put back the original ent_size */
3956         iter->ent_size = ent_size;
3957
3958         return entry;
3959 }
3960
3961 /* Find the next real entry, and increment the iterator to the next entry */
3962 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3963 {
3964         iter->ent = __find_next_entry(iter, &iter->cpu,
3965                                       &iter->lost_events, &iter->ts);
3966
3967         if (iter->ent)
3968                 trace_iterator_increment(iter);
3969
3970         return iter->ent ? iter : NULL;
3971 }
3972
3973 static void trace_consume(struct trace_iterator *iter)
3974 {
3975         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3976                             &iter->lost_events);
3977 }
3978
3979 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3980 {
3981         struct trace_iterator *iter = m->private;
3982         int i = (int)*pos;
3983         void *ent;
3984
3985         WARN_ON_ONCE(iter->leftover);
3986
3987         (*pos)++;
3988
3989         /* can't go backwards */
3990         if (iter->idx > i)
3991                 return NULL;
3992
3993         if (iter->idx < 0)
3994                 ent = trace_find_next_entry_inc(iter);
3995         else
3996                 ent = iter;
3997
3998         while (ent && iter->idx < i)
3999                 ent = trace_find_next_entry_inc(iter);
4000
4001         iter->pos = *pos;
4002
4003         return ent;
4004 }
4005
4006 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4007 {
4008         struct ring_buffer_iter *buf_iter;
4009         unsigned long entries = 0;
4010         u64 ts;
4011
4012         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4013
4014         buf_iter = trace_buffer_iter(iter, cpu);
4015         if (!buf_iter)
4016                 return;
4017
4018         ring_buffer_iter_reset(buf_iter);
4019
4020         /*
4021          * We could have the case with the max latency tracers
4022          * that a reset never took place on a cpu. This is evident
4023          * by the timestamp being before the start of the buffer.
4024          */
4025         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4026                 if (ts >= iter->array_buffer->time_start)
4027                         break;
4028                 entries++;
4029                 ring_buffer_iter_advance(buf_iter);
4030         }
4031
4032         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4033 }
4034
4035 /*
4036  * The current tracer is copied to avoid a global locking
4037  * all around.
4038  */
4039 static void *s_start(struct seq_file *m, loff_t *pos)
4040 {
4041         struct trace_iterator *iter = m->private;
4042         struct trace_array *tr = iter->tr;
4043         int cpu_file = iter->cpu_file;
4044         void *p = NULL;
4045         loff_t l = 0;
4046         int cpu;
4047
4048         /*
4049          * copy the tracer to avoid using a global lock all around.
4050          * iter->trace is a copy of current_trace, the pointer to the
4051          * name may be used instead of a strcmp(), as iter->trace->name
4052          * will point to the same string as current_trace->name.
4053          */
4054         mutex_lock(&trace_types_lock);
4055         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4056                 *iter->trace = *tr->current_trace;
4057         mutex_unlock(&trace_types_lock);
4058
4059 #ifdef CONFIG_TRACER_MAX_TRACE
4060         if (iter->snapshot && iter->trace->use_max_tr)
4061                 return ERR_PTR(-EBUSY);
4062 #endif
4063
4064         if (*pos != iter->pos) {
4065                 iter->ent = NULL;
4066                 iter->cpu = 0;
4067                 iter->idx = -1;
4068
4069                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4070                         for_each_tracing_cpu(cpu)
4071                                 tracing_iter_reset(iter, cpu);
4072                 } else
4073                         tracing_iter_reset(iter, cpu_file);
4074
4075                 iter->leftover = 0;
4076                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4077                         ;
4078
4079         } else {
4080                 /*
4081                  * If we overflowed the seq_file before, then we want
4082                  * to just reuse the trace_seq buffer again.
4083                  */
4084                 if (iter->leftover)
4085                         p = iter;
4086                 else {
4087                         l = *pos - 1;
4088                         p = s_next(m, p, &l);
4089                 }
4090         }
4091
4092         trace_event_read_lock();
4093         trace_access_lock(cpu_file);
4094         return p;
4095 }
4096
4097 static void s_stop(struct seq_file *m, void *p)
4098 {
4099         struct trace_iterator *iter = m->private;
4100
4101 #ifdef CONFIG_TRACER_MAX_TRACE
4102         if (iter->snapshot && iter->trace->use_max_tr)
4103                 return;
4104 #endif
4105
4106         trace_access_unlock(iter->cpu_file);
4107         trace_event_read_unlock();
4108 }
4109
4110 static void
4111 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4112                       unsigned long *entries, int cpu)
4113 {
4114         unsigned long count;
4115
4116         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4117         /*
4118          * If this buffer has skipped entries, then we hold all
4119          * entries for the trace and we need to ignore the
4120          * ones before the time stamp.
4121          */
4122         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4123                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4124                 /* total is the same as the entries */
4125                 *total = count;
4126         } else
4127                 *total = count +
4128                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4129         *entries = count;
4130 }
4131
4132 static void
4133 get_total_entries(struct array_buffer *buf,
4134                   unsigned long *total, unsigned long *entries)
4135 {
4136         unsigned long t, e;
4137         int cpu;
4138
4139         *total = 0;
4140         *entries = 0;
4141
4142         for_each_tracing_cpu(cpu) {
4143                 get_total_entries_cpu(buf, &t, &e, cpu);
4144                 *total += t;
4145                 *entries += e;
4146         }
4147 }
4148
4149 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4150 {
4151         unsigned long total, entries;
4152
4153         if (!tr)
4154                 tr = &global_trace;
4155
4156         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4157
4158         return entries;
4159 }
4160
4161 unsigned long trace_total_entries(struct trace_array *tr)
4162 {
4163         unsigned long total, entries;
4164
4165         if (!tr)
4166                 tr = &global_trace;
4167
4168         get_total_entries(&tr->array_buffer, &total, &entries);
4169
4170         return entries;
4171 }
4172
4173 static void print_lat_help_header(struct seq_file *m)
4174 {
4175         seq_puts(m, "#                    _------=> CPU#            \n"
4176                     "#                   / _-----=> irqs-off        \n"
4177                     "#                  | / _----=> need-resched    \n"
4178                     "#                  || / _---=> hardirq/softirq \n"
4179                     "#                  ||| / _--=> preempt-depth   \n"
4180                     "#                  |||| /     delay            \n"
4181                     "#  cmd     pid     ||||| time  |   caller      \n"
4182                     "#     \\   /        |||||  \\    |   /         \n");
4183 }
4184
4185 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4186 {
4187         unsigned long total;
4188         unsigned long entries;
4189
4190         get_total_entries(buf, &total, &entries);
4191         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4192                    entries, total, num_online_cpus());
4193         seq_puts(m, "#\n");
4194 }
4195
4196 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4197                                    unsigned int flags)
4198 {
4199         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4200
4201         print_event_info(buf, m);
4202
4203         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4204         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4205 }
4206
4207 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4208                                        unsigned int flags)
4209 {
4210         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211         const char *space = "            ";
4212         int prec = tgid ? 12 : 2;
4213
4214         print_event_info(buf, m);
4215
4216         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4217         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4218         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4219         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4220         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4221         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4223 }
4224
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229         struct array_buffer *buf = iter->array_buffer;
4230         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231         struct tracer *type = iter->trace;
4232         unsigned long entries;
4233         unsigned long total;
4234         const char *name = "preemption";
4235
4236         name = type->name;
4237
4238         get_total_entries(buf, &total, &entries);
4239
4240         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241                    name, UTS_RELEASE);
4242         seq_puts(m, "# -----------------------------------"
4243                  "---------------------------------\n");
4244         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246                    nsecs_to_usecs(data->saved_latency),
4247                    entries,
4248                    total,
4249                    buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251                    "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253                    "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255                    "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257                    "preempt_rt",
4258 #else
4259                    "unknown",
4260 #endif
4261                    /* These are reserved for later use */
4262                    0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264         seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266         seq_puts(m, ")\n");
4267 #endif
4268         seq_puts(m, "#    -----------------\n");
4269         seq_printf(m, "#    | task: %.16s-%d "
4270                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271                    data->comm, data->pid,
4272                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273                    data->policy, data->rt_priority);
4274         seq_puts(m, "#    -----------------\n");
4275
4276         if (data->critical_start) {
4277                 seq_puts(m, "#  => started at: ");
4278                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279                 trace_print_seq(m, &iter->seq);
4280                 seq_puts(m, "\n#  => ended at:   ");
4281                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282                 trace_print_seq(m, &iter->seq);
4283                 seq_puts(m, "\n#\n");
4284         }
4285
4286         seq_puts(m, "#\n");
4287 }
4288
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291         struct trace_seq *s = &iter->seq;
4292         struct trace_array *tr = iter->tr;
4293
4294         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295                 return;
4296
4297         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298                 return;
4299
4300         if (cpumask_available(iter->started) &&
4301             cpumask_test_cpu(iter->cpu, iter->started))
4302                 return;
4303
4304         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305                 return;
4306
4307         if (cpumask_available(iter->started))
4308                 cpumask_set_cpu(iter->cpu, iter->started);
4309
4310         /* Don't print started cpu buffer for the first entry of the trace */
4311         if (iter->idx > 1)
4312                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313                                 iter->cpu);
4314 }
4315
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318         struct trace_array *tr = iter->tr;
4319         struct trace_seq *s = &iter->seq;
4320         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321         struct trace_entry *entry;
4322         struct trace_event *event;
4323
4324         entry = iter->ent;
4325
4326         test_cpu_buff_start(iter);
4327
4328         event = ftrace_find_event(entry->type);
4329
4330         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332                         trace_print_lat_context(iter);
4333                 else
4334                         trace_print_context(iter);
4335         }
4336
4337         if (trace_seq_has_overflowed(s))
4338                 return TRACE_TYPE_PARTIAL_LINE;
4339
4340         if (event)
4341                 return event->funcs->trace(iter, sym_flags, event);
4342
4343         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344
4345         return trace_handle_return(s);
4346 }
4347
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350         struct trace_array *tr = iter->tr;
4351         struct trace_seq *s = &iter->seq;
4352         struct trace_entry *entry;
4353         struct trace_event *event;
4354
4355         entry = iter->ent;
4356
4357         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358                 trace_seq_printf(s, "%d %d %llu ",
4359                                  entry->pid, iter->cpu, iter->ts);
4360
4361         if (trace_seq_has_overflowed(s))
4362                 return TRACE_TYPE_PARTIAL_LINE;
4363
4364         event = ftrace_find_event(entry->type);
4365         if (event)
4366                 return event->funcs->raw(iter, 0, event);
4367
4368         trace_seq_printf(s, "%d ?\n", entry->type);
4369
4370         return trace_handle_return(s);
4371 }
4372
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375         struct trace_array *tr = iter->tr;
4376         struct trace_seq *s = &iter->seq;
4377         unsigned char newline = '\n';
4378         struct trace_entry *entry;
4379         struct trace_event *event;
4380
4381         entry = iter->ent;
4382
4383         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4385                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4387                 if (trace_seq_has_overflowed(s))
4388                         return TRACE_TYPE_PARTIAL_LINE;
4389         }
4390
4391         event = ftrace_find_event(entry->type);
4392         if (event) {
4393                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394                 if (ret != TRACE_TYPE_HANDLED)
4395                         return ret;
4396         }
4397
4398         SEQ_PUT_FIELD(s, newline);
4399
4400         return trace_handle_return(s);
4401 }
4402
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405         struct trace_array *tr = iter->tr;
4406         struct trace_seq *s = &iter->seq;
4407         struct trace_entry *entry;
4408         struct trace_event *event;
4409
4410         entry = iter->ent;
4411
4412         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413                 SEQ_PUT_FIELD(s, entry->pid);
4414                 SEQ_PUT_FIELD(s, iter->cpu);
4415                 SEQ_PUT_FIELD(s, iter->ts);
4416                 if (trace_seq_has_overflowed(s))
4417                         return TRACE_TYPE_PARTIAL_LINE;
4418         }
4419
4420         event = ftrace_find_event(entry->type);
4421         return event ? event->funcs->binary(iter, 0, event) :
4422                 TRACE_TYPE_HANDLED;
4423 }
4424
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427         struct ring_buffer_iter *buf_iter;
4428         int cpu;
4429
4430         /* If we are looking at one CPU buffer, only check that one */
4431         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432                 cpu = iter->cpu_file;
4433                 buf_iter = trace_buffer_iter(iter, cpu);
4434                 if (buf_iter) {
4435                         if (!ring_buffer_iter_empty(buf_iter))
4436                                 return 0;
4437                 } else {
4438                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439                                 return 0;
4440                 }
4441                 return 1;
4442         }
4443
4444         for_each_tracing_cpu(cpu) {
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453         }
4454
4455         return 1;
4456 }
4457
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461         struct trace_array *tr = iter->tr;
4462         unsigned long trace_flags = tr->trace_flags;
4463         enum print_line_t ret;
4464
4465         if (iter->lost_events) {
4466                 if (iter->lost_events == (unsigned long)-1)
4467                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468                                          iter->cpu);
4469                 else
4470                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471                                          iter->cpu, iter->lost_events);
4472                 if (trace_seq_has_overflowed(&iter->seq))
4473                         return TRACE_TYPE_PARTIAL_LINE;
4474         }
4475
4476         if (iter->trace && iter->trace->print_line) {
4477                 ret = iter->trace->print_line(iter);
4478                 if (ret != TRACE_TYPE_UNHANDLED)
4479                         return ret;
4480         }
4481
4482         if (iter->ent->type == TRACE_BPUTS &&
4483                         trace_flags & TRACE_ITER_PRINTK &&
4484                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485                 return trace_print_bputs_msg_only(iter);
4486
4487         if (iter->ent->type == TRACE_BPRINT &&
4488                         trace_flags & TRACE_ITER_PRINTK &&
4489                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490                 return trace_print_bprintk_msg_only(iter);
4491
4492         if (iter->ent->type == TRACE_PRINT &&
4493                         trace_flags & TRACE_ITER_PRINTK &&
4494                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495                 return trace_print_printk_msg_only(iter);
4496
4497         if (trace_flags & TRACE_ITER_BIN)
4498                 return print_bin_fmt(iter);
4499
4500         if (trace_flags & TRACE_ITER_HEX)
4501                 return print_hex_fmt(iter);
4502
4503         if (trace_flags & TRACE_ITER_RAW)
4504                 return print_raw_fmt(iter);
4505
4506         return print_trace_fmt(iter);
4507 }
4508
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511         struct trace_iterator *iter = m->private;
4512         struct trace_array *tr = iter->tr;
4513
4514         /* print nothing if the buffers are empty */
4515         if (trace_empty(iter))
4516                 return;
4517
4518         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519                 print_trace_header(m, iter);
4520
4521         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522                 print_lat_help_header(m);
4523 }
4524
4525 void trace_default_header(struct seq_file *m)
4526 {
4527         struct trace_iterator *iter = m->private;
4528         struct trace_array *tr = iter->tr;
4529         unsigned long trace_flags = tr->trace_flags;
4530
4531         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532                 return;
4533
4534         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535                 /* print nothing if the buffers are empty */
4536                 if (trace_empty(iter))
4537                         return;
4538                 print_trace_header(m, iter);
4539                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4540                         print_lat_help_header(m);
4541         } else {
4542                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4544                                 print_func_help_header_irq(iter->array_buffer,
4545                                                            m, trace_flags);
4546                         else
4547                                 print_func_help_header(iter->array_buffer, m,
4548                                                        trace_flags);
4549                 }
4550         }
4551 }
4552
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555         if (!ftrace_is_dead())
4556                 return;
4557         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566                     "#                      Takes a snapshot of the main buffer.\n"
4567                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568                     "#                      (Doesn't have to be '2' works with any number that\n"
4569                     "#                       is not a '0' or '1')\n");
4570 }
4571
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580                     "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583                     "#                      (Doesn't have to be '2' works with any number that\n"
4584                     "#                       is not a '0' or '1')\n");
4585 }
4586
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589         if (iter->tr->allocated_snapshot)
4590                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591         else
4592                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593
4594         seq_puts(m, "# Snapshot commands:\n");
4595         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596                 show_snapshot_main_help(m);
4597         else
4598                 show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607         struct trace_iterator *iter = v;
4608         int ret;
4609
4610         if (iter->ent == NULL) {
4611                 if (iter->tr) {
4612                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613                         seq_puts(m, "#\n");
4614                         test_ftrace_alive(m);
4615                 }
4616                 if (iter->snapshot && trace_empty(iter))
4617                         print_snapshot_help(m, iter);
4618                 else if (iter->trace && iter->trace->print_header)
4619                         iter->trace->print_header(m);
4620                 else
4621                         trace_default_header(m);
4622
4623         } else if (iter->leftover) {
4624                 /*
4625                  * If we filled the seq_file buffer earlier, we
4626                  * want to just show it now.
4627                  */
4628                 ret = trace_print_seq(m, &iter->seq);
4629
4630                 /* ret should this time be zero, but you never know */
4631                 iter->leftover = ret;
4632
4633         } else {
4634                 print_trace_line(iter);
4635                 ret = trace_print_seq(m, &iter->seq);
4636                 /*
4637                  * If we overflow the seq_file buffer, then it will
4638                  * ask us for this data again at start up.
4639                  * Use that instead.
4640                  *  ret is 0 if seq_file write succeeded.
4641                  *        -1 otherwise.
4642                  */
4643                 iter->leftover = ret;
4644         }
4645
4646         return 0;
4647 }
4648
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655         if (inode->i_cdev) /* See trace_create_cpu_file() */
4656                 return (long)inode->i_cdev - 1;
4657         return RING_BUFFER_ALL_CPUS;
4658 }
4659
4660 static const struct seq_operations tracer_seq_ops = {
4661         .start          = s_start,
4662         .next           = s_next,
4663         .stop           = s_stop,
4664         .show           = s_show,
4665 };
4666
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670         struct trace_array *tr = inode->i_private;
4671         struct trace_iterator *iter;
4672         int cpu;
4673
4674         if (tracing_disabled)
4675                 return ERR_PTR(-ENODEV);
4676
4677         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678         if (!iter)
4679                 return ERR_PTR(-ENOMEM);
4680
4681         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682                                     GFP_KERNEL);
4683         if (!iter->buffer_iter)
4684                 goto release;
4685
4686         /*
4687          * trace_find_next_entry() may need to save off iter->ent.
4688          * It will place it into the iter->temp buffer. As most
4689          * events are less than 128, allocate a buffer of that size.
4690          * If one is greater, then trace_find_next_entry() will
4691          * allocate a new buffer to adjust for the bigger iter->ent.
4692          * It's not critical if it fails to get allocated here.
4693          */
4694         iter->temp = kmalloc(128, GFP_KERNEL);
4695         if (iter->temp)
4696                 iter->temp_size = 128;
4697
4698         /*
4699          * trace_event_printf() may need to modify given format
4700          * string to replace %p with %px so that it shows real address
4701          * instead of hash value. However, that is only for the event
4702          * tracing, other tracer may not need. Defer the allocation
4703          * until it is needed.
4704          */
4705         iter->fmt = NULL;
4706         iter->fmt_size = 0;
4707
4708         /*
4709          * We make a copy of the current tracer to avoid concurrent
4710          * changes on it while we are reading.
4711          */
4712         mutex_lock(&trace_types_lock);
4713         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714         if (!iter->trace)
4715                 goto fail;
4716
4717         *iter->trace = *tr->current_trace;
4718
4719         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720                 goto fail;
4721
4722         iter->tr = tr;
4723
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725         /* Currently only the top directory has a snapshot */
4726         if (tr->current_trace->print_max || snapshot)
4727                 iter->array_buffer = &tr->max_buffer;
4728         else
4729 #endif
4730                 iter->array_buffer = &tr->array_buffer;
4731         iter->snapshot = snapshot;
4732         iter->pos = -1;
4733         iter->cpu_file = tracing_get_cpu(inode);
4734         mutex_init(&iter->mutex);
4735
4736         /* Notify the tracer early; before we stop tracing. */
4737         if (iter->trace->open)
4738                 iter->trace->open(iter);
4739
4740         /* Annotate start of buffers if we had overruns */
4741         if (ring_buffer_overruns(iter->array_buffer->buffer))
4742                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743
4744         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745         if (trace_clocks[tr->clock_id].in_ns)
4746                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747
4748         /*
4749          * If pause-on-trace is enabled, then stop the trace while
4750          * dumping, unless this is the "snapshot" file
4751          */
4752         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753                 tracing_stop_tr(tr);
4754
4755         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756                 for_each_tracing_cpu(cpu) {
4757                         iter->buffer_iter[cpu] =
4758                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4759                                                          cpu, GFP_KERNEL);
4760                 }
4761                 ring_buffer_read_prepare_sync();
4762                 for_each_tracing_cpu(cpu) {
4763                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4764                         tracing_iter_reset(iter, cpu);
4765                 }
4766         } else {
4767                 cpu = iter->cpu_file;
4768                 iter->buffer_iter[cpu] =
4769                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4770                                                  cpu, GFP_KERNEL);
4771                 ring_buffer_read_prepare_sync();
4772                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4773                 tracing_iter_reset(iter, cpu);
4774         }
4775
4776         mutex_unlock(&trace_types_lock);
4777
4778         return iter;
4779
4780  fail:
4781         mutex_unlock(&trace_types_lock);
4782         kfree(iter->trace);
4783         kfree(iter->temp);
4784         kfree(iter->buffer_iter);
4785 release:
4786         seq_release_private(inode, file);
4787         return ERR_PTR(-ENOMEM);
4788 }
4789
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792         int ret;
4793
4794         ret = tracing_check_open_get_tr(NULL);
4795         if (ret)
4796                 return ret;
4797
4798         filp->private_data = inode->i_private;
4799         return 0;
4800 }
4801
4802 bool tracing_is_disabled(void)
4803 {
4804         return (tracing_disabled) ? true: false;
4805 }
4806
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813         struct trace_array *tr = inode->i_private;
4814         int ret;
4815
4816         ret = tracing_check_open_get_tr(tr);
4817         if (ret)
4818                 return ret;
4819
4820         filp->private_data = inode->i_private;
4821
4822         return 0;
4823 }
4824
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct seq_file *m = file->private_data;
4829         struct trace_iterator *iter;
4830         int cpu;
4831
4832         if (!(file->f_mode & FMODE_READ)) {
4833                 trace_array_put(tr);
4834                 return 0;
4835         }
4836
4837         /* Writes do not use seq_file */
4838         iter = m->private;
4839         mutex_lock(&trace_types_lock);
4840
4841         for_each_tracing_cpu(cpu) {
4842                 if (iter->buffer_iter[cpu])
4843                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844         }
4845
4846         if (iter->trace && iter->trace->close)
4847                 iter->trace->close(iter);
4848
4849         if (!iter->snapshot && tr->stop_count)
4850                 /* reenable tracing if it was previously enabled */
4851                 tracing_start_tr(tr);
4852
4853         __trace_array_put(tr);
4854
4855         mutex_unlock(&trace_types_lock);
4856
4857         mutex_destroy(&iter->mutex);
4858         free_cpumask_var(iter->started);
4859         kfree(iter->fmt);
4860         kfree(iter->temp);
4861         kfree(iter->trace);
4862         kfree(iter->buffer_iter);
4863         seq_release_private(inode, file);
4864
4865         return 0;
4866 }
4867
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871
4872         trace_array_put(tr);
4873         return 0;
4874 }
4875
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878         struct trace_array *tr = inode->i_private;
4879
4880         trace_array_put(tr);
4881
4882         return single_release(inode, file);
4883 }
4884
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887         struct trace_array *tr = inode->i_private;
4888         struct trace_iterator *iter;
4889         int ret;
4890
4891         ret = tracing_check_open_get_tr(tr);
4892         if (ret)
4893                 return ret;
4894
4895         /* If this file was open for write, then erase contents */
4896         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897                 int cpu = tracing_get_cpu(inode);
4898                 struct array_buffer *trace_buf = &tr->array_buffer;
4899
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901                 if (tr->current_trace->print_max)
4902                         trace_buf = &tr->max_buffer;
4903 #endif
4904
4905                 if (cpu == RING_BUFFER_ALL_CPUS)
4906                         tracing_reset_online_cpus(trace_buf);
4907                 else
4908                         tracing_reset_cpu(trace_buf, cpu);
4909         }
4910
4911         if (file->f_mode & FMODE_READ) {
4912                 iter = __tracing_open(inode, file, false);
4913                 if (IS_ERR(iter))
4914                         ret = PTR_ERR(iter);
4915                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917         }
4918
4919         if (ret < 0)
4920                 trace_array_put(tr);
4921
4922         return ret;
4923 }
4924
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940         while (t && !trace_ok_for_array(t, tr))
4941                 t = t->next;
4942
4943         return t;
4944 }
4945
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949         struct trace_array *tr = m->private;
4950         struct tracer *t = v;
4951
4952         (*pos)++;
4953
4954         if (t)
4955                 t = get_tracer_for_array(tr, t->next);
4956
4957         return t;
4958 }
4959
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962         struct trace_array *tr = m->private;
4963         struct tracer *t;
4964         loff_t l = 0;
4965
4966         mutex_lock(&trace_types_lock);
4967
4968         t = get_tracer_for_array(tr, trace_types);
4969         for (; t && l < *pos; t = t_next(m, t, &l))
4970                         ;
4971
4972         return t;
4973 }
4974
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977         mutex_unlock(&trace_types_lock);
4978 }
4979
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982         struct tracer *t = v;
4983
4984         if (!t)
4985                 return 0;
4986
4987         seq_puts(m, t->name);
4988         if (t->next)
4989                 seq_putc(m, ' ');
4990         else
4991                 seq_putc(m, '\n');
4992
4993         return 0;
4994 }
4995
4996 static const struct seq_operations show_traces_seq_ops = {
4997         .start          = t_start,
4998         .next           = t_next,
4999         .stop           = t_stop,
5000         .show           = t_show,
5001 };
5002
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005         struct trace_array *tr = inode->i_private;
5006         struct seq_file *m;
5007         int ret;
5008
5009         ret = tracing_check_open_get_tr(tr);
5010         if (ret)
5011                 return ret;
5012
5013         ret = seq_open(file, &show_traces_seq_ops);
5014         if (ret) {
5015                 trace_array_put(tr);
5016                 return ret;
5017         }
5018
5019         m = file->private_data;
5020         m->private = tr;
5021
5022         return 0;
5023 }
5024
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027         struct trace_array *tr = inode->i_private;
5028
5029         trace_array_put(tr);
5030         return seq_release(inode, file);
5031 }
5032
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035                    size_t count, loff_t *ppos)
5036 {
5037         return count;
5038 }
5039
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042         int ret;
5043
5044         if (file->f_mode & FMODE_READ)
5045                 ret = seq_lseek(file, offset, whence);
5046         else
5047                 file->f_pos = ret = 0;
5048
5049         return ret;
5050 }
5051
5052 static const struct file_operations tracing_fops = {
5053         .open           = tracing_open,
5054         .read           = seq_read,
5055         .write          = tracing_write_stub,
5056         .llseek         = tracing_lseek,
5057         .release        = tracing_release,
5058 };
5059
5060 static const struct file_operations show_traces_fops = {
5061         .open           = show_traces_open,
5062         .read           = seq_read,
5063         .llseek         = seq_lseek,
5064         .release        = show_traces_release,
5065 };
5066
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069                      size_t count, loff_t *ppos)
5070 {
5071         struct trace_array *tr = file_inode(filp)->i_private;
5072         char *mask_str;
5073         int len;
5074
5075         len = snprintf(NULL, 0, "%*pb\n",
5076                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077         mask_str = kmalloc(len, GFP_KERNEL);
5078         if (!mask_str)
5079                 return -ENOMEM;
5080
5081         len = snprintf(mask_str, len, "%*pb\n",
5082                        cpumask_pr_args(tr->tracing_cpumask));
5083         if (len >= count) {
5084                 count = -EINVAL;
5085                 goto out_err;
5086         }
5087         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088
5089 out_err:
5090         kfree(mask_str);
5091
5092         return count;
5093 }
5094
5095 int tracing_set_cpumask(struct trace_array *tr,
5096                         cpumask_var_t tracing_cpumask_new)
5097 {
5098         int cpu;
5099
5100         if (!tr)
5101                 return -EINVAL;
5102
5103         local_irq_disable();
5104         arch_spin_lock(&tr->max_lock);
5105         for_each_tracing_cpu(cpu) {
5106                 /*
5107                  * Increase/decrease the disabled counter if we are
5108                  * about to flip a bit in the cpumask:
5109                  */
5110                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114                 }
5115                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119                 }
5120         }
5121         arch_spin_unlock(&tr->max_lock);
5122         local_irq_enable();
5123
5124         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125
5126         return 0;
5127 }
5128
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131                       size_t count, loff_t *ppos)
5132 {
5133         struct trace_array *tr = file_inode(filp)->i_private;
5134         cpumask_var_t tracing_cpumask_new;
5135         int err;
5136
5137         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138                 return -ENOMEM;
5139
5140         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141         if (err)
5142                 goto err_free;
5143
5144         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145         if (err)
5146                 goto err_free;
5147
5148         free_cpumask_var(tracing_cpumask_new);
5149
5150         return count;
5151
5152 err_free:
5153         free_cpumask_var(tracing_cpumask_new);
5154
5155         return err;
5156 }
5157
5158 static const struct file_operations tracing_cpumask_fops = {
5159         .open           = tracing_open_generic_tr,
5160         .read           = tracing_cpumask_read,
5161         .write          = tracing_cpumask_write,
5162         .release        = tracing_release_generic_tr,
5163         .llseek         = generic_file_llseek,
5164 };
5165
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168         struct tracer_opt *trace_opts;
5169         struct trace_array *tr = m->private;
5170         u32 tracer_flags;
5171         int i;
5172
5173         mutex_lock(&trace_types_lock);
5174         tracer_flags = tr->current_trace->flags->val;
5175         trace_opts = tr->current_trace->flags->opts;
5176
5177         for (i = 0; trace_options[i]; i++) {
5178                 if (tr->trace_flags & (1 << i))
5179                         seq_printf(m, "%s\n", trace_options[i]);
5180                 else
5181                         seq_printf(m, "no%s\n", trace_options[i]);
5182         }
5183
5184         for (i = 0; trace_opts[i].name; i++) {
5185                 if (tracer_flags & trace_opts[i].bit)
5186                         seq_printf(m, "%s\n", trace_opts[i].name);
5187                 else
5188                         seq_printf(m, "no%s\n", trace_opts[i].name);
5189         }
5190         mutex_unlock(&trace_types_lock);
5191
5192         return 0;
5193 }
5194
5195 static int __set_tracer_option(struct trace_array *tr,
5196                                struct tracer_flags *tracer_flags,
5197                                struct tracer_opt *opts, int neg)
5198 {
5199         struct tracer *trace = tracer_flags->trace;
5200         int ret;
5201
5202         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203         if (ret)
5204                 return ret;
5205
5206         if (neg)
5207                 tracer_flags->val &= ~opts->bit;
5208         else
5209                 tracer_flags->val |= opts->bit;
5210         return 0;
5211 }
5212
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216         struct tracer *trace = tr->current_trace;
5217         struct tracer_flags *tracer_flags = trace->flags;
5218         struct tracer_opt *opts = NULL;
5219         int i;
5220
5221         for (i = 0; tracer_flags->opts[i].name; i++) {
5222                 opts = &tracer_flags->opts[i];
5223
5224                 if (strcmp(cmp, opts->name) == 0)
5225                         return __set_tracer_option(tr, trace->flags, opts, neg);
5226         }
5227
5228         return -EINVAL;
5229 }
5230
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235                 return -1;
5236
5237         return 0;
5238 }
5239
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242         int *map;
5243
5244         if ((mask == TRACE_ITER_RECORD_TGID) ||
5245             (mask == TRACE_ITER_RECORD_CMD))
5246                 lockdep_assert_held(&event_mutex);
5247
5248         /* do nothing if flag is already set */
5249         if (!!(tr->trace_flags & mask) == !!enabled)
5250                 return 0;
5251
5252         /* Give the tracer a chance to approve the change */
5253         if (tr->current_trace->flag_changed)
5254                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255                         return -EINVAL;
5256
5257         if (enabled)
5258                 tr->trace_flags |= mask;
5259         else
5260                 tr->trace_flags &= ~mask;
5261
5262         if (mask == TRACE_ITER_RECORD_CMD)
5263                 trace_event_enable_cmd_record(enabled);
5264
5265         if (mask == TRACE_ITER_RECORD_TGID) {
5266                 if (!tgid_map) {
5267                         tgid_map_max = pid_max;
5268                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269                                        GFP_KERNEL);
5270
5271                         /*
5272                          * Pairs with smp_load_acquire() in
5273                          * trace_find_tgid_ptr() to ensure that if it observes
5274                          * the tgid_map we just allocated then it also observes
5275                          * the corresponding tgid_map_max value.
5276                          */
5277                         smp_store_release(&tgid_map, map);
5278                 }
5279                 if (!tgid_map) {
5280                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281                         return -ENOMEM;
5282                 }
5283
5284                 trace_event_enable_tgid_record(enabled);
5285         }
5286
5287         if (mask == TRACE_ITER_EVENT_FORK)
5288                 trace_event_follow_fork(tr, enabled);
5289
5290         if (mask == TRACE_ITER_FUNC_FORK)
5291                 ftrace_pid_follow_fork(tr, enabled);
5292
5293         if (mask == TRACE_ITER_OVERWRITE) {
5294                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298         }
5299
5300         if (mask == TRACE_ITER_PRINTK) {
5301                 trace_printk_start_stop_comm(enabled);
5302                 trace_printk_control(enabled);
5303         }
5304
5305         return 0;
5306 }
5307
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310         char *cmp;
5311         int neg = 0;
5312         int ret;
5313         size_t orig_len = strlen(option);
5314         int len;
5315
5316         cmp = strstrip(option);
5317
5318         len = str_has_prefix(cmp, "no");
5319         if (len)
5320                 neg = 1;
5321
5322         cmp += len;
5323
5324         mutex_lock(&event_mutex);
5325         mutex_lock(&trace_types_lock);
5326
5327         ret = match_string(trace_options, -1, cmp);
5328         /* If no option could be set, test the specific tracer options */
5329         if (ret < 0)
5330                 ret = set_tracer_option(tr, cmp, neg);
5331         else
5332                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5333
5334         mutex_unlock(&trace_types_lock);
5335         mutex_unlock(&event_mutex);
5336
5337         /*
5338          * If the first trailing whitespace is replaced with '\0' by strstrip,
5339          * turn it back into a space.
5340          */
5341         if (orig_len > strlen(option))
5342                 option[strlen(option)] = ' ';
5343
5344         return ret;
5345 }
5346
5347 static void __init apply_trace_boot_options(void)
5348 {
5349         char *buf = trace_boot_options_buf;
5350         char *option;
5351
5352         while (true) {
5353                 option = strsep(&buf, ",");
5354
5355                 if (!option)
5356                         break;
5357
5358                 if (*option)
5359                         trace_set_options(&global_trace, option);
5360
5361                 /* Put back the comma to allow this to be called again */
5362                 if (buf)
5363                         *(buf - 1) = ',';
5364         }
5365 }
5366
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369                         size_t cnt, loff_t *ppos)
5370 {
5371         struct seq_file *m = filp->private_data;
5372         struct trace_array *tr = m->private;
5373         char buf[64];
5374         int ret;
5375
5376         if (cnt >= sizeof(buf))
5377                 return -EINVAL;
5378
5379         if (copy_from_user(buf, ubuf, cnt))
5380                 return -EFAULT;
5381
5382         buf[cnt] = 0;
5383
5384         ret = trace_set_options(tr, buf);
5385         if (ret < 0)
5386                 return ret;
5387
5388         *ppos += cnt;
5389
5390         return cnt;
5391 }
5392
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395         struct trace_array *tr = inode->i_private;
5396         int ret;
5397
5398         ret = tracing_check_open_get_tr(tr);
5399         if (ret)
5400                 return ret;
5401
5402         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403         if (ret < 0)
5404                 trace_array_put(tr);
5405
5406         return ret;
5407 }
5408
5409 static const struct file_operations tracing_iter_fops = {
5410         .open           = tracing_trace_options_open,
5411         .read           = seq_read,
5412         .llseek         = seq_lseek,
5413         .release        = tracing_single_release_tr,
5414         .write          = tracing_trace_options_write,
5415 };
5416
5417 static const char readme_msg[] =
5418         "tracing mini-HOWTO:\n\n"
5419         "# echo 0 > tracing_on : quick way to disable tracing\n"
5420         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421         " Important files:\n"
5422         "  trace\t\t\t- The static contents of the buffer\n"
5423         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425         "  current_tracer\t- function and latency tracers\n"
5426         "  available_tracers\t- list of configured tracers for current_tracer\n"
5427         "  error_log\t- error log for failed commands (that support it)\n"
5428         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430         "  trace_clock\t\t-change the clock used to order events\n"
5431         "       local:   Per cpu clock but may not be synced across CPUs\n"
5432         "      global:   Synced across CPUs but slows tracing down.\n"
5433         "     counter:   Not a clock, but just an increment\n"
5434         "      uptime:   Jiffy counter from time of boot\n"
5435         "        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437         "     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440         "       delta:   Delta difference against a buffer-wide timestamp\n"
5441         "    absolute:   Absolute (standalone) timestamp\n"
5442         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5445         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446         "\t\t\t  Remove sub-buffer with rmdir\n"
5447         "  trace_options\t\t- Set format or modify how tracing happens\n"
5448         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5449         "\t\t\t  option name\n"
5450         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452         "\n  available_filter_functions - list of functions that can be filtered on\n"
5453         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454         "\t\t\t  functions\n"
5455         "\t     accepts: func_full_name or glob-matching-pattern\n"
5456         "\t     modules: Can select a group via module\n"
5457         "\t      Format: :mod:<module-name>\n"
5458         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459         "\t    triggers: a command to perform when function is hit\n"
5460         "\t      Format: <function>:<trigger>[:count]\n"
5461         "\t     trigger: traceon, traceoff\n"
5462         "\t\t      enable_event:<system>:<event>\n"
5463         "\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465         "\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468         "\t\t      snapshot\n"
5469 #endif
5470         "\t\t      dump\n"
5471         "\t\t      cpudump\n"
5472         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474         "\t     The first one will disable tracing every time do_fault is hit\n"
5475         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476         "\t       The first time do trap is hit and it disables tracing, the\n"
5477         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5478         "\t       the counter will not decrement. It only decrements when the\n"
5479         "\t       trigger did work\n"
5480         "\t     To remove trigger without count:\n"
5481         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482         "\t     To remove trigger with a count:\n"
5483         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486         "\t    modules: Can select a group via module command :mod:\n"
5487         "\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491         "\t\t    (function)\n"
5492         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493         "\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502         "\t\t\t  snapshot buffer. Read the contents for more\n"
5503         "\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506         "  stack_trace\t\t- Shows the max stack trace when active\n"
5507         "  stack_max_size\t- Shows current max stack size that was traced\n"
5508         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5509         "\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512         "\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528         "\t  accepts: event-definitions (one definition per line)\n"
5529         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534         "\t           -:[<group>/]<event>\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542         "\t     args: <name>=fetcharg[:type]\n"
5543         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #else
5547         "\t           $stack<index>, $stack, $retval, $comm,\n"
5548 #endif
5549         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5550         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5551         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5552         "\t           <type>\\[<array-size>\\]\n"
5553 #ifdef CONFIG_HIST_TRIGGERS
5554         "\t    field: <stype> <name>;\n"
5555         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5556         "\t           [unsigned] char/int/long\n"
5557 #endif
5558 #endif
5559         "  events/\t\t- Directory containing all trace event subsystems:\n"
5560         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5561         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5562         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5563         "\t\t\t  events\n"
5564         "      filter\t\t- If set, only events passing filter are traced\n"
5565         "  events/<system>/<event>/\t- Directory containing control files for\n"
5566         "\t\t\t  <event>:\n"
5567         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5568         "      filter\t\t- If set, only events passing filter are traced\n"
5569         "      trigger\t\t- If set, a command to perform when event is hit\n"
5570         "\t    Format: <trigger>[:count][if <filter>]\n"
5571         "\t   trigger: traceon, traceoff\n"
5572         "\t            enable_event:<system>:<event>\n"
5573         "\t            disable_event:<system>:<event>\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575         "\t            enable_hist:<system>:<event>\n"
5576         "\t            disable_hist:<system>:<event>\n"
5577 #endif
5578 #ifdef CONFIG_STACKTRACE
5579         "\t\t    stacktrace\n"
5580 #endif
5581 #ifdef CONFIG_TRACER_SNAPSHOT
5582         "\t\t    snapshot\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585         "\t\t    hist (see below)\n"
5586 #endif
5587         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5588         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5589         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5590         "\t                  events/block/block_unplug/trigger\n"
5591         "\t   The first disables tracing every time block_unplug is hit.\n"
5592         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5593         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5594         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5595         "\t   Like function triggers, the counter is only decremented if it\n"
5596         "\t    enabled or disabled tracing.\n"
5597         "\t   To remove a trigger without a count:\n"
5598         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5599         "\t   To remove a trigger with a count:\n"
5600         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5601         "\t   Filters can be ignored when removing a trigger.\n"
5602 #ifdef CONFIG_HIST_TRIGGERS
5603         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5604         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5605         "\t            [:values=<field1[,field2,...]>]\n"
5606         "\t            [:sort=<field1[,field2,...]>]\n"
5607         "\t            [:size=#entries]\n"
5608         "\t            [:pause][:continue][:clear]\n"
5609         "\t            [:name=histname1]\n"
5610         "\t            [:<handler>.<action>]\n"
5611         "\t            [if <filter>]\n\n"
5612         "\t    When a matching event is hit, an entry is added to a hash\n"
5613         "\t    table using the key(s) and value(s) named, and the value of a\n"
5614         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5615         "\t    correspond to fields in the event's format description.  Keys\n"
5616         "\t    can be any field, or the special string 'stacktrace'.\n"
5617         "\t    Compound keys consisting of up to two fields can be specified\n"
5618         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5619         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5620         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5621         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5622         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5623         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5624         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5625         "\t    its histogram data will be shared with other triggers of the\n"
5626         "\t    same name, and trigger hits will update this common data.\n\n"
5627         "\t    Reading the 'hist' file for the event will dump the hash\n"
5628         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5629         "\t    triggers attached to an event, there will be a table for each\n"
5630         "\t    trigger in the output.  The table displayed for a named\n"
5631         "\t    trigger will be the same as any other instance having the\n"
5632         "\t    same name.  The default format used to display a given field\n"
5633         "\t    can be modified by appending any of the following modifiers\n"
5634         "\t    to the field name, as applicable:\n\n"
5635         "\t            .hex        display a number as a hex value\n"
5636         "\t            .sym        display an address as a symbol\n"
5637         "\t            .sym-offset display an address as a symbol and offset\n"
5638         "\t            .execname   display a common_pid as a program name\n"
5639         "\t            .syscall    display a syscall id as a syscall name\n"
5640         "\t            .log2       display log2 value rather than raw number\n"
5641         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5642         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5643         "\t    trigger or to start a hist trigger but not log any events\n"
5644         "\t    until told to do so.  'continue' can be used to start or\n"
5645         "\t    restart a paused hist trigger.\n\n"
5646         "\t    The 'clear' parameter will clear the contents of a running\n"
5647         "\t    hist trigger and leave its current paused/active state\n"
5648         "\t    unchanged.\n\n"
5649         "\t    The enable_hist and disable_hist triggers can be used to\n"
5650         "\t    have one event conditionally start and stop another event's\n"
5651         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5652         "\t    the enable_event and disable_event triggers.\n\n"
5653         "\t    Hist trigger handlers and actions are executed whenever a\n"
5654         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5655         "\t        <handler>.<action>\n\n"
5656         "\t    The available handlers are:\n\n"
5657         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5658         "\t        onmax(var)               - invoke if var exceeds current max\n"
5659         "\t        onchange(var)            - invoke action if var changes\n\n"
5660         "\t    The available actions are:\n\n"
5661         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5662         "\t        save(field,...)                      - save current event fields\n"
5663 #ifdef CONFIG_TRACER_SNAPSHOT
5664         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5665 #endif
5666 #ifdef CONFIG_SYNTH_EVENTS
5667         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5668         "\t  Write into this file to define/undefine new synthetic events.\n"
5669         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5670 #endif
5671 #endif
5672 ;
5673
5674 static ssize_t
5675 tracing_readme_read(struct file *filp, char __user *ubuf,
5676                        size_t cnt, loff_t *ppos)
5677 {
5678         return simple_read_from_buffer(ubuf, cnt, ppos,
5679                                         readme_msg, strlen(readme_msg));
5680 }
5681
5682 static const struct file_operations tracing_readme_fops = {
5683         .open           = tracing_open_generic,
5684         .read           = tracing_readme_read,
5685         .llseek         = generic_file_llseek,
5686 };
5687
5688 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5689 {
5690         int pid = ++(*pos);
5691
5692         return trace_find_tgid_ptr(pid);
5693 }
5694
5695 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5696 {
5697         int pid = *pos;
5698
5699         return trace_find_tgid_ptr(pid);
5700 }
5701
5702 static void saved_tgids_stop(struct seq_file *m, void *v)
5703 {
5704 }
5705
5706 static int saved_tgids_show(struct seq_file *m, void *v)
5707 {
5708         int *entry = (int *)v;
5709         int pid = entry - tgid_map;
5710         int tgid = *entry;
5711
5712         if (tgid == 0)
5713                 return SEQ_SKIP;
5714
5715         seq_printf(m, "%d %d\n", pid, tgid);
5716         return 0;
5717 }
5718
5719 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5720         .start          = saved_tgids_start,
5721         .stop           = saved_tgids_stop,
5722         .next           = saved_tgids_next,
5723         .show           = saved_tgids_show,
5724 };
5725
5726 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5727 {
5728         int ret;
5729
5730         ret = tracing_check_open_get_tr(NULL);
5731         if (ret)
5732                 return ret;
5733
5734         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5735 }
5736
5737
5738 static const struct file_operations tracing_saved_tgids_fops = {
5739         .open           = tracing_saved_tgids_open,
5740         .read           = seq_read,
5741         .llseek         = seq_lseek,
5742         .release        = seq_release,
5743 };
5744
5745 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5746 {
5747         unsigned int *ptr = v;
5748
5749         if (*pos || m->count)
5750                 ptr++;
5751
5752         (*pos)++;
5753
5754         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5755              ptr++) {
5756                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5757                         continue;
5758
5759                 return ptr;
5760         }
5761
5762         return NULL;
5763 }
5764
5765 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5766 {
5767         void *v;
5768         loff_t l = 0;
5769
5770         preempt_disable();
5771         arch_spin_lock(&trace_cmdline_lock);
5772
5773         v = &savedcmd->map_cmdline_to_pid[0];
5774         while (l <= *pos) {
5775                 v = saved_cmdlines_next(m, v, &l);
5776                 if (!v)
5777                         return NULL;
5778         }
5779
5780         return v;
5781 }
5782
5783 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5784 {
5785         arch_spin_unlock(&trace_cmdline_lock);
5786         preempt_enable();
5787 }
5788
5789 static int saved_cmdlines_show(struct seq_file *m, void *v)
5790 {
5791         char buf[TASK_COMM_LEN];
5792         unsigned int *pid = v;
5793
5794         __trace_find_cmdline(*pid, buf);
5795         seq_printf(m, "%d %s\n", *pid, buf);
5796         return 0;
5797 }
5798
5799 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5800         .start          = saved_cmdlines_start,
5801         .next           = saved_cmdlines_next,
5802         .stop           = saved_cmdlines_stop,
5803         .show           = saved_cmdlines_show,
5804 };
5805
5806 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5807 {
5808         int ret;
5809
5810         ret = tracing_check_open_get_tr(NULL);
5811         if (ret)
5812                 return ret;
5813
5814         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5815 }
5816
5817 static const struct file_operations tracing_saved_cmdlines_fops = {
5818         .open           = tracing_saved_cmdlines_open,
5819         .read           = seq_read,
5820         .llseek         = seq_lseek,
5821         .release        = seq_release,
5822 };
5823
5824 static ssize_t
5825 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5826                                  size_t cnt, loff_t *ppos)
5827 {
5828         char buf[64];
5829         int r;
5830
5831         arch_spin_lock(&trace_cmdline_lock);
5832         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5833         arch_spin_unlock(&trace_cmdline_lock);
5834
5835         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5836 }
5837
5838 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5839 {
5840         kfree(s->saved_cmdlines);
5841         kfree(s->map_cmdline_to_pid);
5842         kfree(s);
5843 }
5844
5845 static int tracing_resize_saved_cmdlines(unsigned int val)
5846 {
5847         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5848
5849         s = kmalloc(sizeof(*s), GFP_KERNEL);
5850         if (!s)
5851                 return -ENOMEM;
5852
5853         if (allocate_cmdlines_buffer(val, s) < 0) {
5854                 kfree(s);
5855                 return -ENOMEM;
5856         }
5857
5858         arch_spin_lock(&trace_cmdline_lock);
5859         savedcmd_temp = savedcmd;
5860         savedcmd = s;
5861         arch_spin_unlock(&trace_cmdline_lock);
5862         free_saved_cmdlines_buffer(savedcmd_temp);
5863
5864         return 0;
5865 }
5866
5867 static ssize_t
5868 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5869                                   size_t cnt, loff_t *ppos)
5870 {
5871         unsigned long val;
5872         int ret;
5873
5874         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5875         if (ret)
5876                 return ret;
5877
5878         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5879         if (!val || val > PID_MAX_DEFAULT)
5880                 return -EINVAL;
5881
5882         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5883         if (ret < 0)
5884                 return ret;
5885
5886         *ppos += cnt;
5887
5888         return cnt;
5889 }
5890
5891 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5892         .open           = tracing_open_generic,
5893         .read           = tracing_saved_cmdlines_size_read,
5894         .write          = tracing_saved_cmdlines_size_write,
5895 };
5896
5897 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5898 static union trace_eval_map_item *
5899 update_eval_map(union trace_eval_map_item *ptr)
5900 {
5901         if (!ptr->map.eval_string) {
5902                 if (ptr->tail.next) {
5903                         ptr = ptr->tail.next;
5904                         /* Set ptr to the next real item (skip head) */
5905                         ptr++;
5906                 } else
5907                         return NULL;
5908         }
5909         return ptr;
5910 }
5911
5912 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5913 {
5914         union trace_eval_map_item *ptr = v;
5915
5916         /*
5917          * Paranoid! If ptr points to end, we don't want to increment past it.
5918          * This really should never happen.
5919          */
5920         (*pos)++;
5921         ptr = update_eval_map(ptr);
5922         if (WARN_ON_ONCE(!ptr))
5923                 return NULL;
5924
5925         ptr++;
5926         ptr = update_eval_map(ptr);
5927
5928         return ptr;
5929 }
5930
5931 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5932 {
5933         union trace_eval_map_item *v;
5934         loff_t l = 0;
5935
5936         mutex_lock(&trace_eval_mutex);
5937
5938         v = trace_eval_maps;
5939         if (v)
5940                 v++;
5941
5942         while (v && l < *pos) {
5943                 v = eval_map_next(m, v, &l);
5944         }
5945
5946         return v;
5947 }
5948
5949 static void eval_map_stop(struct seq_file *m, void *v)
5950 {
5951         mutex_unlock(&trace_eval_mutex);
5952 }
5953
5954 static int eval_map_show(struct seq_file *m, void *v)
5955 {
5956         union trace_eval_map_item *ptr = v;
5957
5958         seq_printf(m, "%s %ld (%s)\n",
5959                    ptr->map.eval_string, ptr->map.eval_value,
5960                    ptr->map.system);
5961
5962         return 0;
5963 }
5964
5965 static const struct seq_operations tracing_eval_map_seq_ops = {
5966         .start          = eval_map_start,
5967         .next           = eval_map_next,
5968         .stop           = eval_map_stop,
5969         .show           = eval_map_show,
5970 };
5971
5972 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5973 {
5974         int ret;
5975
5976         ret = tracing_check_open_get_tr(NULL);
5977         if (ret)
5978                 return ret;
5979
5980         return seq_open(filp, &tracing_eval_map_seq_ops);
5981 }
5982
5983 static const struct file_operations tracing_eval_map_fops = {
5984         .open           = tracing_eval_map_open,
5985         .read           = seq_read,
5986         .llseek         = seq_lseek,
5987         .release        = seq_release,
5988 };
5989
5990 static inline union trace_eval_map_item *
5991 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5992 {
5993         /* Return tail of array given the head */
5994         return ptr + ptr->head.length + 1;
5995 }
5996
5997 static void
5998 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5999                            int len)
6000 {
6001         struct trace_eval_map **stop;
6002         struct trace_eval_map **map;
6003         union trace_eval_map_item *map_array;
6004         union trace_eval_map_item *ptr;
6005
6006         stop = start + len;
6007
6008         /*
6009          * The trace_eval_maps contains the map plus a head and tail item,
6010          * where the head holds the module and length of array, and the
6011          * tail holds a pointer to the next list.
6012          */
6013         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6014         if (!map_array) {
6015                 pr_warn("Unable to allocate trace eval mapping\n");
6016                 return;
6017         }
6018
6019         mutex_lock(&trace_eval_mutex);
6020
6021         if (!trace_eval_maps)
6022                 trace_eval_maps = map_array;
6023         else {
6024                 ptr = trace_eval_maps;
6025                 for (;;) {
6026                         ptr = trace_eval_jmp_to_tail(ptr);
6027                         if (!ptr->tail.next)
6028                                 break;
6029                         ptr = ptr->tail.next;
6030
6031                 }
6032                 ptr->tail.next = map_array;
6033         }
6034         map_array->head.mod = mod;
6035         map_array->head.length = len;
6036         map_array++;
6037
6038         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6039                 map_array->map = **map;
6040                 map_array++;
6041         }
6042         memset(map_array, 0, sizeof(*map_array));
6043
6044         mutex_unlock(&trace_eval_mutex);
6045 }
6046
6047 static void trace_create_eval_file(struct dentry *d_tracer)
6048 {
6049         trace_create_file("eval_map", 0444, d_tracer,
6050                           NULL, &tracing_eval_map_fops);
6051 }
6052
6053 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6054 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6055 static inline void trace_insert_eval_map_file(struct module *mod,
6056                               struct trace_eval_map **start, int len) { }
6057 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6058
6059 static void trace_insert_eval_map(struct module *mod,
6060                                   struct trace_eval_map **start, int len)
6061 {
6062         struct trace_eval_map **map;
6063
6064         if (len <= 0)
6065                 return;
6066
6067         map = start;
6068
6069         trace_event_eval_update(map, len);
6070
6071         trace_insert_eval_map_file(mod, start, len);
6072 }
6073
6074 static ssize_t
6075 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6076                        size_t cnt, loff_t *ppos)
6077 {
6078         struct trace_array *tr = filp->private_data;
6079         char buf[MAX_TRACER_SIZE+2];
6080         int r;
6081
6082         mutex_lock(&trace_types_lock);
6083         r = sprintf(buf, "%s\n", tr->current_trace->name);
6084         mutex_unlock(&trace_types_lock);
6085
6086         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6087 }
6088
6089 int tracer_init(struct tracer *t, struct trace_array *tr)
6090 {
6091         tracing_reset_online_cpus(&tr->array_buffer);
6092         return t->init(tr);
6093 }
6094
6095 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6096 {
6097         int cpu;
6098
6099         for_each_tracing_cpu(cpu)
6100                 per_cpu_ptr(buf->data, cpu)->entries = val;
6101 }
6102
6103 #ifdef CONFIG_TRACER_MAX_TRACE
6104 /* resize @tr's buffer to the size of @size_tr's entries */
6105 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6106                                         struct array_buffer *size_buf, int cpu_id)
6107 {
6108         int cpu, ret = 0;
6109
6110         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6111                 for_each_tracing_cpu(cpu) {
6112                         ret = ring_buffer_resize(trace_buf->buffer,
6113                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6114                         if (ret < 0)
6115                                 break;
6116                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6117                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6118                 }
6119         } else {
6120                 ret = ring_buffer_resize(trace_buf->buffer,
6121                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6122                 if (ret == 0)
6123                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6124                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6125         }
6126
6127         return ret;
6128 }
6129 #endif /* CONFIG_TRACER_MAX_TRACE */
6130
6131 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6132                                         unsigned long size, int cpu)
6133 {
6134         int ret;
6135
6136         /*
6137          * If kernel or user changes the size of the ring buffer
6138          * we use the size that was given, and we can forget about
6139          * expanding it later.
6140          */
6141         ring_buffer_expanded = true;
6142
6143         /* May be called before buffers are initialized */
6144         if (!tr->array_buffer.buffer)
6145                 return 0;
6146
6147         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6148         if (ret < 0)
6149                 return ret;
6150
6151 #ifdef CONFIG_TRACER_MAX_TRACE
6152         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6153             !tr->current_trace->use_max_tr)
6154                 goto out;
6155
6156         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6157         if (ret < 0) {
6158                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6159                                                      &tr->array_buffer, cpu);
6160                 if (r < 0) {
6161                         /*
6162                          * AARGH! We are left with different
6163                          * size max buffer!!!!
6164                          * The max buffer is our "snapshot" buffer.
6165                          * When a tracer needs a snapshot (one of the
6166                          * latency tracers), it swaps the max buffer
6167                          * with the saved snap shot. We succeeded to
6168                          * update the size of the main buffer, but failed to
6169                          * update the size of the max buffer. But when we tried
6170                          * to reset the main buffer to the original size, we
6171                          * failed there too. This is very unlikely to
6172                          * happen, but if it does, warn and kill all
6173                          * tracing.
6174                          */
6175                         WARN_ON(1);
6176                         tracing_disabled = 1;
6177                 }
6178                 return ret;
6179         }
6180
6181         if (cpu == RING_BUFFER_ALL_CPUS)
6182                 set_buffer_entries(&tr->max_buffer, size);
6183         else
6184                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6185
6186  out:
6187 #endif /* CONFIG_TRACER_MAX_TRACE */
6188
6189         if (cpu == RING_BUFFER_ALL_CPUS)
6190                 set_buffer_entries(&tr->array_buffer, size);
6191         else
6192                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6193
6194         return ret;
6195 }
6196
6197 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6198                                   unsigned long size, int cpu_id)
6199 {
6200         int ret;
6201
6202         mutex_lock(&trace_types_lock);
6203
6204         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6205                 /* make sure, this cpu is enabled in the mask */
6206                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6207                         ret = -EINVAL;
6208                         goto out;
6209                 }
6210         }
6211
6212         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6213         if (ret < 0)
6214                 ret = -ENOMEM;
6215
6216 out:
6217         mutex_unlock(&trace_types_lock);
6218
6219         return ret;
6220 }
6221
6222
6223 /**
6224  * tracing_update_buffers - used by tracing facility to expand ring buffers
6225  *
6226  * To save on memory when the tracing is never used on a system with it
6227  * configured in. The ring buffers are set to a minimum size. But once
6228  * a user starts to use the tracing facility, then they need to grow
6229  * to their default size.
6230  *
6231  * This function is to be called when a tracer is about to be used.
6232  */
6233 int tracing_update_buffers(void)
6234 {
6235         int ret = 0;
6236
6237         mutex_lock(&trace_types_lock);
6238         if (!ring_buffer_expanded)
6239                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6240                                                 RING_BUFFER_ALL_CPUS);
6241         mutex_unlock(&trace_types_lock);
6242
6243         return ret;
6244 }
6245
6246 struct trace_option_dentry;
6247
6248 static void
6249 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6250
6251 /*
6252  * Used to clear out the tracer before deletion of an instance.
6253  * Must have trace_types_lock held.
6254  */
6255 static void tracing_set_nop(struct trace_array *tr)
6256 {
6257         if (tr->current_trace == &nop_trace)
6258                 return;
6259         
6260         tr->current_trace->enabled--;
6261
6262         if (tr->current_trace->reset)
6263                 tr->current_trace->reset(tr);
6264
6265         tr->current_trace = &nop_trace;
6266 }
6267
6268 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6269 {
6270         /* Only enable if the directory has been created already. */
6271         if (!tr->dir)
6272                 return;
6273
6274         create_trace_option_files(tr, t);
6275 }
6276
6277 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6278 {
6279         struct tracer *t;
6280 #ifdef CONFIG_TRACER_MAX_TRACE
6281         bool had_max_tr;
6282 #endif
6283         int ret = 0;
6284
6285         mutex_lock(&trace_types_lock);
6286
6287         if (!ring_buffer_expanded) {
6288                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6289                                                 RING_BUFFER_ALL_CPUS);
6290                 if (ret < 0)
6291                         goto out;
6292                 ret = 0;
6293         }
6294
6295         for (t = trace_types; t; t = t->next) {
6296                 if (strcmp(t->name, buf) == 0)
6297                         break;
6298         }
6299         if (!t) {
6300                 ret = -EINVAL;
6301                 goto out;
6302         }
6303         if (t == tr->current_trace)
6304                 goto out;
6305
6306 #ifdef CONFIG_TRACER_SNAPSHOT
6307         if (t->use_max_tr) {
6308                 arch_spin_lock(&tr->max_lock);
6309                 if (tr->cond_snapshot)
6310                         ret = -EBUSY;
6311                 arch_spin_unlock(&tr->max_lock);
6312                 if (ret)
6313                         goto out;
6314         }
6315 #endif
6316         /* Some tracers won't work on kernel command line */
6317         if (system_state < SYSTEM_RUNNING && t->noboot) {
6318                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6319                         t->name);
6320                 goto out;
6321         }
6322
6323         /* Some tracers are only allowed for the top level buffer */
6324         if (!trace_ok_for_array(t, tr)) {
6325                 ret = -EINVAL;
6326                 goto out;
6327         }
6328
6329         /* If trace pipe files are being read, we can't change the tracer */
6330         if (tr->trace_ref) {
6331                 ret = -EBUSY;
6332                 goto out;
6333         }
6334
6335         trace_branch_disable();
6336
6337         tr->current_trace->enabled--;
6338
6339         if (tr->current_trace->reset)
6340                 tr->current_trace->reset(tr);
6341
6342         /* Current trace needs to be nop_trace before synchronize_rcu */
6343         tr->current_trace = &nop_trace;
6344
6345 #ifdef CONFIG_TRACER_MAX_TRACE
6346         had_max_tr = tr->allocated_snapshot;
6347
6348         if (had_max_tr && !t->use_max_tr) {
6349                 /*
6350                  * We need to make sure that the update_max_tr sees that
6351                  * current_trace changed to nop_trace to keep it from
6352                  * swapping the buffers after we resize it.
6353                  * The update_max_tr is called from interrupts disabled
6354                  * so a synchronized_sched() is sufficient.
6355                  */
6356                 synchronize_rcu();
6357                 free_snapshot(tr);
6358         }
6359 #endif
6360
6361 #ifdef CONFIG_TRACER_MAX_TRACE
6362         if (t->use_max_tr && !had_max_tr) {
6363                 ret = tracing_alloc_snapshot_instance(tr);
6364                 if (ret < 0)
6365                         goto out;
6366         }
6367 #endif
6368
6369         if (t->init) {
6370                 ret = tracer_init(t, tr);
6371                 if (ret)
6372                         goto out;
6373         }
6374
6375         tr->current_trace = t;
6376         tr->current_trace->enabled++;
6377         trace_branch_enable(tr);
6378  out:
6379         mutex_unlock(&trace_types_lock);
6380
6381         return ret;
6382 }
6383
6384 static ssize_t
6385 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6386                         size_t cnt, loff_t *ppos)
6387 {
6388         struct trace_array *tr = filp->private_data;
6389         char buf[MAX_TRACER_SIZE+1];
6390         int i;
6391         size_t ret;
6392         int err;
6393
6394         ret = cnt;
6395
6396         if (cnt > MAX_TRACER_SIZE)
6397                 cnt = MAX_TRACER_SIZE;
6398
6399         if (copy_from_user(buf, ubuf, cnt))
6400                 return -EFAULT;
6401
6402         buf[cnt] = 0;
6403
6404         /* strip ending whitespace. */
6405         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6406                 buf[i] = 0;
6407
6408         err = tracing_set_tracer(tr, buf);
6409         if (err)
6410                 return err;
6411
6412         *ppos += ret;
6413
6414         return ret;
6415 }
6416
6417 static ssize_t
6418 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6419                    size_t cnt, loff_t *ppos)
6420 {
6421         char buf[64];
6422         int r;
6423
6424         r = snprintf(buf, sizeof(buf), "%ld\n",
6425                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6426         if (r > sizeof(buf))
6427                 r = sizeof(buf);
6428         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6429 }
6430
6431 static ssize_t
6432 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6433                     size_t cnt, loff_t *ppos)
6434 {
6435         unsigned long val;
6436         int ret;
6437
6438         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6439         if (ret)
6440                 return ret;
6441
6442         *ptr = val * 1000;
6443
6444         return cnt;
6445 }
6446
6447 static ssize_t
6448 tracing_thresh_read(struct file *filp, char __user *ubuf,
6449                     size_t cnt, loff_t *ppos)
6450 {
6451         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6452 }
6453
6454 static ssize_t
6455 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6456                      size_t cnt, loff_t *ppos)
6457 {
6458         struct trace_array *tr = filp->private_data;
6459         int ret;
6460
6461         mutex_lock(&trace_types_lock);
6462         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6463         if (ret < 0)
6464                 goto out;
6465
6466         if (tr->current_trace->update_thresh) {
6467                 ret = tr->current_trace->update_thresh(tr);
6468                 if (ret < 0)
6469                         goto out;
6470         }
6471
6472         ret = cnt;
6473 out:
6474         mutex_unlock(&trace_types_lock);
6475
6476         return ret;
6477 }
6478
6479 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6480
6481 static ssize_t
6482 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6483                      size_t cnt, loff_t *ppos)
6484 {
6485         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6486 }
6487
6488 static ssize_t
6489 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6490                       size_t cnt, loff_t *ppos)
6491 {
6492         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6493 }
6494
6495 #endif
6496
6497 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6498 {
6499         struct trace_array *tr = inode->i_private;
6500         struct trace_iterator *iter;
6501         int ret;
6502
6503         ret = tracing_check_open_get_tr(tr);
6504         if (ret)
6505                 return ret;
6506
6507         mutex_lock(&trace_types_lock);
6508
6509         /* create a buffer to store the information to pass to userspace */
6510         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6511         if (!iter) {
6512                 ret = -ENOMEM;
6513                 __trace_array_put(tr);
6514                 goto out;
6515         }
6516
6517         trace_seq_init(&iter->seq);
6518         iter->trace = tr->current_trace;
6519
6520         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6521                 ret = -ENOMEM;
6522                 goto fail;
6523         }
6524
6525         /* trace pipe does not show start of buffer */
6526         cpumask_setall(iter->started);
6527
6528         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6529                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6530
6531         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6532         if (trace_clocks[tr->clock_id].in_ns)
6533                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6534
6535         iter->tr = tr;
6536         iter->array_buffer = &tr->array_buffer;
6537         iter->cpu_file = tracing_get_cpu(inode);
6538         mutex_init(&iter->mutex);
6539         filp->private_data = iter;
6540
6541         if (iter->trace->pipe_open)
6542                 iter->trace->pipe_open(iter);
6543
6544         nonseekable_open(inode, filp);
6545
6546         tr->trace_ref++;
6547 out:
6548         mutex_unlock(&trace_types_lock);
6549         return ret;
6550
6551 fail:
6552         kfree(iter);
6553         __trace_array_put(tr);
6554         mutex_unlock(&trace_types_lock);
6555         return ret;
6556 }
6557
6558 static int tracing_release_pipe(struct inode *inode, struct file *file)
6559 {
6560         struct trace_iterator *iter = file->private_data;
6561         struct trace_array *tr = inode->i_private;
6562
6563         mutex_lock(&trace_types_lock);
6564
6565         tr->trace_ref--;
6566
6567         if (iter->trace->pipe_close)
6568                 iter->trace->pipe_close(iter);
6569
6570         mutex_unlock(&trace_types_lock);
6571
6572         free_cpumask_var(iter->started);
6573         mutex_destroy(&iter->mutex);
6574         kfree(iter);
6575
6576         trace_array_put(tr);
6577
6578         return 0;
6579 }
6580
6581 static __poll_t
6582 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6583 {
6584         struct trace_array *tr = iter->tr;
6585
6586         /* Iterators are static, they should be filled or empty */
6587         if (trace_buffer_iter(iter, iter->cpu_file))
6588                 return EPOLLIN | EPOLLRDNORM;
6589
6590         if (tr->trace_flags & TRACE_ITER_BLOCK)
6591                 /*
6592                  * Always select as readable when in blocking mode
6593                  */
6594                 return EPOLLIN | EPOLLRDNORM;
6595         else
6596                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6597                                              filp, poll_table);
6598 }
6599
6600 static __poll_t
6601 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6602 {
6603         struct trace_iterator *iter = filp->private_data;
6604
6605         return trace_poll(iter, filp, poll_table);
6606 }
6607
6608 /* Must be called with iter->mutex held. */
6609 static int tracing_wait_pipe(struct file *filp)
6610 {
6611         struct trace_iterator *iter = filp->private_data;
6612         int ret;
6613
6614         while (trace_empty(iter)) {
6615
6616                 if ((filp->f_flags & O_NONBLOCK)) {
6617                         return -EAGAIN;
6618                 }
6619
6620                 /*
6621                  * We block until we read something and tracing is disabled.
6622                  * We still block if tracing is disabled, but we have never
6623                  * read anything. This allows a user to cat this file, and
6624                  * then enable tracing. But after we have read something,
6625                  * we give an EOF when tracing is again disabled.
6626                  *
6627                  * iter->pos will be 0 if we haven't read anything.
6628                  */
6629                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6630                         break;
6631
6632                 mutex_unlock(&iter->mutex);
6633
6634                 ret = wait_on_pipe(iter, 0);
6635
6636                 mutex_lock(&iter->mutex);
6637
6638                 if (ret)
6639                         return ret;
6640         }
6641
6642         return 1;
6643 }
6644
6645 /*
6646  * Consumer reader.
6647  */
6648 static ssize_t
6649 tracing_read_pipe(struct file *filp, char __user *ubuf,
6650                   size_t cnt, loff_t *ppos)
6651 {
6652         struct trace_iterator *iter = filp->private_data;
6653         ssize_t sret;
6654
6655         /*
6656          * Avoid more than one consumer on a single file descriptor
6657          * This is just a matter of traces coherency, the ring buffer itself
6658          * is protected.
6659          */
6660         mutex_lock(&iter->mutex);
6661
6662         /* return any leftover data */
6663         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6664         if (sret != -EBUSY)
6665                 goto out;
6666
6667         trace_seq_init(&iter->seq);
6668
6669         if (iter->trace->read) {
6670                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6671                 if (sret)
6672                         goto out;
6673         }
6674
6675 waitagain:
6676         sret = tracing_wait_pipe(filp);
6677         if (sret <= 0)
6678                 goto out;
6679
6680         /* stop when tracing is finished */
6681         if (trace_empty(iter)) {
6682                 sret = 0;
6683                 goto out;
6684         }
6685
6686         if (cnt >= PAGE_SIZE)
6687                 cnt = PAGE_SIZE - 1;
6688
6689         /* reset all but tr, trace, and overruns */
6690         memset(&iter->seq, 0,
6691                sizeof(struct trace_iterator) -
6692                offsetof(struct trace_iterator, seq));
6693         cpumask_clear(iter->started);
6694         trace_seq_init(&iter->seq);
6695         iter->pos = -1;
6696
6697         trace_event_read_lock();
6698         trace_access_lock(iter->cpu_file);
6699         while (trace_find_next_entry_inc(iter) != NULL) {
6700                 enum print_line_t ret;
6701                 int save_len = iter->seq.seq.len;
6702
6703                 ret = print_trace_line(iter);
6704                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6705                         /* don't print partial lines */
6706                         iter->seq.seq.len = save_len;
6707                         break;
6708                 }
6709                 if (ret != TRACE_TYPE_NO_CONSUME)
6710                         trace_consume(iter);
6711
6712                 if (trace_seq_used(&iter->seq) >= cnt)
6713                         break;
6714
6715                 /*
6716                  * Setting the full flag means we reached the trace_seq buffer
6717                  * size and we should leave by partial output condition above.
6718                  * One of the trace_seq_* functions is not used properly.
6719                  */
6720                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6721                           iter->ent->type);
6722         }
6723         trace_access_unlock(iter->cpu_file);
6724         trace_event_read_unlock();
6725
6726         /* Now copy what we have to the user */
6727         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6729                 trace_seq_init(&iter->seq);
6730
6731         /*
6732          * If there was nothing to send to user, in spite of consuming trace
6733          * entries, go back to wait for more entries.
6734          */
6735         if (sret == -EBUSY)
6736                 goto waitagain;
6737
6738 out:
6739         mutex_unlock(&iter->mutex);
6740
6741         return sret;
6742 }
6743
6744 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6745                                      unsigned int idx)
6746 {
6747         __free_page(spd->pages[idx]);
6748 }
6749
6750 static size_t
6751 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6752 {
6753         size_t count;
6754         int save_len;
6755         int ret;
6756
6757         /* Seq buffer is page-sized, exactly what we need. */
6758         for (;;) {
6759                 save_len = iter->seq.seq.len;
6760                 ret = print_trace_line(iter);
6761
6762                 if (trace_seq_has_overflowed(&iter->seq)) {
6763                         iter->seq.seq.len = save_len;
6764                         break;
6765                 }
6766
6767                 /*
6768                  * This should not be hit, because it should only
6769                  * be set if the iter->seq overflowed. But check it
6770                  * anyway to be safe.
6771                  */
6772                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6773                         iter->seq.seq.len = save_len;
6774                         break;
6775                 }
6776
6777                 count = trace_seq_used(&iter->seq) - save_len;
6778                 if (rem < count) {
6779                         rem = 0;
6780                         iter->seq.seq.len = save_len;
6781                         break;
6782                 }
6783
6784                 if (ret != TRACE_TYPE_NO_CONSUME)
6785                         trace_consume(iter);
6786                 rem -= count;
6787                 if (!trace_find_next_entry_inc(iter))   {
6788                         rem = 0;
6789                         iter->ent = NULL;
6790                         break;
6791                 }
6792         }
6793
6794         return rem;
6795 }
6796
6797 static ssize_t tracing_splice_read_pipe(struct file *filp,
6798                                         loff_t *ppos,
6799                                         struct pipe_inode_info *pipe,
6800                                         size_t len,
6801                                         unsigned int flags)
6802 {
6803         struct page *pages_def[PIPE_DEF_BUFFERS];
6804         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6805         struct trace_iterator *iter = filp->private_data;
6806         struct splice_pipe_desc spd = {
6807                 .pages          = pages_def,
6808                 .partial        = partial_def,
6809                 .nr_pages       = 0, /* This gets updated below. */
6810                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6811                 .ops            = &default_pipe_buf_ops,
6812                 .spd_release    = tracing_spd_release_pipe,
6813         };
6814         ssize_t ret;
6815         size_t rem;
6816         unsigned int i;
6817
6818         if (splice_grow_spd(pipe, &spd))
6819                 return -ENOMEM;
6820
6821         mutex_lock(&iter->mutex);
6822
6823         if (iter->trace->splice_read) {
6824                 ret = iter->trace->splice_read(iter, filp,
6825                                                ppos, pipe, len, flags);
6826                 if (ret)
6827                         goto out_err;
6828         }
6829
6830         ret = tracing_wait_pipe(filp);
6831         if (ret <= 0)
6832                 goto out_err;
6833
6834         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6835                 ret = -EFAULT;
6836                 goto out_err;
6837         }
6838
6839         trace_event_read_lock();
6840         trace_access_lock(iter->cpu_file);
6841
6842         /* Fill as many pages as possible. */
6843         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6844                 spd.pages[i] = alloc_page(GFP_KERNEL);
6845                 if (!spd.pages[i])
6846                         break;
6847
6848                 rem = tracing_fill_pipe_page(rem, iter);
6849
6850                 /* Copy the data into the page, so we can start over. */
6851                 ret = trace_seq_to_buffer(&iter->seq,
6852                                           page_address(spd.pages[i]),
6853                                           trace_seq_used(&iter->seq));
6854                 if (ret < 0) {
6855                         __free_page(spd.pages[i]);
6856                         break;
6857                 }
6858                 spd.partial[i].offset = 0;
6859                 spd.partial[i].len = trace_seq_used(&iter->seq);
6860
6861                 trace_seq_init(&iter->seq);
6862         }
6863
6864         trace_access_unlock(iter->cpu_file);
6865         trace_event_read_unlock();
6866         mutex_unlock(&iter->mutex);
6867
6868         spd.nr_pages = i;
6869
6870         if (i)
6871                 ret = splice_to_pipe(pipe, &spd);
6872         else
6873                 ret = 0;
6874 out:
6875         splice_shrink_spd(&spd);
6876         return ret;
6877
6878 out_err:
6879         mutex_unlock(&iter->mutex);
6880         goto out;
6881 }
6882
6883 static ssize_t
6884 tracing_entries_read(struct file *filp, char __user *ubuf,
6885                      size_t cnt, loff_t *ppos)
6886 {
6887         struct inode *inode = file_inode(filp);
6888         struct trace_array *tr = inode->i_private;
6889         int cpu = tracing_get_cpu(inode);
6890         char buf[64];
6891         int r = 0;
6892         ssize_t ret;
6893
6894         mutex_lock(&trace_types_lock);
6895
6896         if (cpu == RING_BUFFER_ALL_CPUS) {
6897                 int cpu, buf_size_same;
6898                 unsigned long size;
6899
6900                 size = 0;
6901                 buf_size_same = 1;
6902                 /* check if all cpu sizes are same */
6903                 for_each_tracing_cpu(cpu) {
6904                         /* fill in the size from first enabled cpu */
6905                         if (size == 0)
6906                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6907                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6908                                 buf_size_same = 0;
6909                                 break;
6910                         }
6911                 }
6912
6913                 if (buf_size_same) {
6914                         if (!ring_buffer_expanded)
6915                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6916                                             size >> 10,
6917                                             trace_buf_size >> 10);
6918                         else
6919                                 r = sprintf(buf, "%lu\n", size >> 10);
6920                 } else
6921                         r = sprintf(buf, "X\n");
6922         } else
6923                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6924
6925         mutex_unlock(&trace_types_lock);
6926
6927         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6928         return ret;
6929 }
6930
6931 static ssize_t
6932 tracing_entries_write(struct file *filp, const char __user *ubuf,
6933                       size_t cnt, loff_t *ppos)
6934 {
6935         struct inode *inode = file_inode(filp);
6936         struct trace_array *tr = inode->i_private;
6937         unsigned long val;
6938         int ret;
6939
6940         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6941         if (ret)
6942                 return ret;
6943
6944         /* must have at least 1 entry */
6945         if (!val)
6946                 return -EINVAL;
6947
6948         /* value is in KB */
6949         val <<= 10;
6950         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6951         if (ret < 0)
6952                 return ret;
6953
6954         *ppos += cnt;
6955
6956         return cnt;
6957 }
6958
6959 static ssize_t
6960 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6961                                 size_t cnt, loff_t *ppos)
6962 {
6963         struct trace_array *tr = filp->private_data;
6964         char buf[64];
6965         int r, cpu;
6966         unsigned long size = 0, expanded_size = 0;
6967
6968         mutex_lock(&trace_types_lock);
6969         for_each_tracing_cpu(cpu) {
6970                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6971                 if (!ring_buffer_expanded)
6972                         expanded_size += trace_buf_size >> 10;
6973         }
6974         if (ring_buffer_expanded)
6975                 r = sprintf(buf, "%lu\n", size);
6976         else
6977                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6978         mutex_unlock(&trace_types_lock);
6979
6980         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6981 }
6982
6983 static ssize_t
6984 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6985                           size_t cnt, loff_t *ppos)
6986 {
6987         /*
6988          * There is no need to read what the user has written, this function
6989          * is just to make sure that there is no error when "echo" is used
6990          */
6991
6992         *ppos += cnt;
6993
6994         return cnt;
6995 }
6996
6997 static int
6998 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6999 {
7000         struct trace_array *tr = inode->i_private;
7001
7002         /* disable tracing ? */
7003         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7004                 tracer_tracing_off(tr);
7005         /* resize the ring buffer to 0 */
7006         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7007
7008         trace_array_put(tr);
7009
7010         return 0;
7011 }
7012
7013 static ssize_t
7014 tracing_mark_write(struct file *filp, const char __user *ubuf,
7015                                         size_t cnt, loff_t *fpos)
7016 {
7017         struct trace_array *tr = filp->private_data;
7018         struct ring_buffer_event *event;
7019         enum event_trigger_type tt = ETT_NONE;
7020         struct trace_buffer *buffer;
7021         struct print_entry *entry;
7022         ssize_t written;
7023         int size;
7024         int len;
7025
7026 /* Used in tracing_mark_raw_write() as well */
7027 #define FAULTED_STR "<faulted>"
7028 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7029
7030         if (tracing_disabled)
7031                 return -EINVAL;
7032
7033         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7034                 return -EINVAL;
7035
7036         if (cnt > TRACE_BUF_SIZE)
7037                 cnt = TRACE_BUF_SIZE;
7038
7039         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7040
7041         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7042
7043         /* If less than "<faulted>", then make sure we can still add that */
7044         if (cnt < FAULTED_SIZE)
7045                 size += FAULTED_SIZE - cnt;
7046
7047         buffer = tr->array_buffer.buffer;
7048         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7049                                             tracing_gen_ctx());
7050         if (unlikely(!event))
7051                 /* Ring buffer disabled, return as if not open for write */
7052                 return -EBADF;
7053
7054         entry = ring_buffer_event_data(event);
7055         entry->ip = _THIS_IP_;
7056
7057         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7058         if (len) {
7059                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7060                 cnt = FAULTED_SIZE;
7061                 written = -EFAULT;
7062         } else
7063                 written = cnt;
7064
7065         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7066                 /* do not add \n before testing triggers, but add \0 */
7067                 entry->buf[cnt] = '\0';
7068                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7069         }
7070
7071         if (entry->buf[cnt - 1] != '\n') {
7072                 entry->buf[cnt] = '\n';
7073                 entry->buf[cnt + 1] = '\0';
7074         } else
7075                 entry->buf[cnt] = '\0';
7076
7077         if (static_branch_unlikely(&trace_marker_exports_enabled))
7078                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7079         __buffer_unlock_commit(buffer, event);
7080
7081         if (tt)
7082                 event_triggers_post_call(tr->trace_marker_file, tt);
7083
7084         if (written > 0)
7085                 *fpos += written;
7086
7087         return written;
7088 }
7089
7090 /* Limit it for now to 3K (including tag) */
7091 #define RAW_DATA_MAX_SIZE (1024*3)
7092
7093 static ssize_t
7094 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7095                                         size_t cnt, loff_t *fpos)
7096 {
7097         struct trace_array *tr = filp->private_data;
7098         struct ring_buffer_event *event;
7099         struct trace_buffer *buffer;
7100         struct raw_data_entry *entry;
7101         ssize_t written;
7102         int size;
7103         int len;
7104
7105 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7106
7107         if (tracing_disabled)
7108                 return -EINVAL;
7109
7110         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7111                 return -EINVAL;
7112
7113         /* The marker must at least have a tag id */
7114         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7115                 return -EINVAL;
7116
7117         if (cnt > TRACE_BUF_SIZE)
7118                 cnt = TRACE_BUF_SIZE;
7119
7120         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7121
7122         size = sizeof(*entry) + cnt;
7123         if (cnt < FAULT_SIZE_ID)
7124                 size += FAULT_SIZE_ID - cnt;
7125
7126         buffer = tr->array_buffer.buffer;
7127         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7128                                             tracing_gen_ctx());
7129         if (!event)
7130                 /* Ring buffer disabled, return as if not open for write */
7131                 return -EBADF;
7132
7133         entry = ring_buffer_event_data(event);
7134
7135         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7136         if (len) {
7137                 entry->id = -1;
7138                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7139                 written = -EFAULT;
7140         } else
7141                 written = cnt;
7142
7143         __buffer_unlock_commit(buffer, event);
7144
7145         if (written > 0)
7146                 *fpos += written;
7147
7148         return written;
7149 }
7150
7151 static int tracing_clock_show(struct seq_file *m, void *v)
7152 {
7153         struct trace_array *tr = m->private;
7154         int i;
7155
7156         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7157                 seq_printf(m,
7158                         "%s%s%s%s", i ? " " : "",
7159                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7160                         i == tr->clock_id ? "]" : "");
7161         seq_putc(m, '\n');
7162
7163         return 0;
7164 }
7165
7166 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7167 {
7168         int i;
7169
7170         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7171                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7172                         break;
7173         }
7174         if (i == ARRAY_SIZE(trace_clocks))
7175                 return -EINVAL;
7176
7177         mutex_lock(&trace_types_lock);
7178
7179         tr->clock_id = i;
7180
7181         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7182
7183         /*
7184          * New clock may not be consistent with the previous clock.
7185          * Reset the buffer so that it doesn't have incomparable timestamps.
7186          */
7187         tracing_reset_online_cpus(&tr->array_buffer);
7188
7189 #ifdef CONFIG_TRACER_MAX_TRACE
7190         if (tr->max_buffer.buffer)
7191                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7192         tracing_reset_online_cpus(&tr->max_buffer);
7193 #endif
7194
7195         mutex_unlock(&trace_types_lock);
7196
7197         return 0;
7198 }
7199
7200 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7201                                    size_t cnt, loff_t *fpos)
7202 {
7203         struct seq_file *m = filp->private_data;
7204         struct trace_array *tr = m->private;
7205         char buf[64];
7206         const char *clockstr;
7207         int ret;
7208
7209         if (cnt >= sizeof(buf))
7210                 return -EINVAL;
7211
7212         if (copy_from_user(buf, ubuf, cnt))
7213                 return -EFAULT;
7214
7215         buf[cnt] = 0;
7216
7217         clockstr = strstrip(buf);
7218
7219         ret = tracing_set_clock(tr, clockstr);
7220         if (ret)
7221                 return ret;
7222
7223         *fpos += cnt;
7224
7225         return cnt;
7226 }
7227
7228 static int tracing_clock_open(struct inode *inode, struct file *file)
7229 {
7230         struct trace_array *tr = inode->i_private;
7231         int ret;
7232
7233         ret = tracing_check_open_get_tr(tr);
7234         if (ret)
7235                 return ret;
7236
7237         ret = single_open(file, tracing_clock_show, inode->i_private);
7238         if (ret < 0)
7239                 trace_array_put(tr);
7240
7241         return ret;
7242 }
7243
7244 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7245 {
7246         struct trace_array *tr = m->private;
7247
7248         mutex_lock(&trace_types_lock);
7249
7250         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7251                 seq_puts(m, "delta [absolute]\n");
7252         else
7253                 seq_puts(m, "[delta] absolute\n");
7254
7255         mutex_unlock(&trace_types_lock);
7256
7257         return 0;
7258 }
7259
7260 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7261 {
7262         struct trace_array *tr = inode->i_private;
7263         int ret;
7264
7265         ret = tracing_check_open_get_tr(tr);
7266         if (ret)
7267                 return ret;
7268
7269         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7270         if (ret < 0)
7271                 trace_array_put(tr);
7272
7273         return ret;
7274 }
7275
7276 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7277 {
7278         if (rbe == this_cpu_read(trace_buffered_event))
7279                 return ring_buffer_time_stamp(buffer);
7280
7281         return ring_buffer_event_time_stamp(buffer, rbe);
7282 }
7283
7284 /*
7285  * Set or disable using the per CPU trace_buffer_event when possible.
7286  */
7287 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7288 {
7289         int ret = 0;
7290
7291         mutex_lock(&trace_types_lock);
7292
7293         if (set && tr->no_filter_buffering_ref++)
7294                 goto out;
7295
7296         if (!set) {
7297                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7298                         ret = -EINVAL;
7299                         goto out;
7300                 }
7301
7302                 --tr->no_filter_buffering_ref;
7303         }
7304  out:
7305         mutex_unlock(&trace_types_lock);
7306
7307         return ret;
7308 }
7309
7310 struct ftrace_buffer_info {
7311         struct trace_iterator   iter;
7312         void                    *spare;
7313         unsigned int            spare_cpu;
7314         unsigned int            read;
7315 };
7316
7317 #ifdef CONFIG_TRACER_SNAPSHOT
7318 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7319 {
7320         struct trace_array *tr = inode->i_private;
7321         struct trace_iterator *iter;
7322         struct seq_file *m;
7323         int ret;
7324
7325         ret = tracing_check_open_get_tr(tr);
7326         if (ret)
7327                 return ret;
7328
7329         if (file->f_mode & FMODE_READ) {
7330                 iter = __tracing_open(inode, file, true);
7331                 if (IS_ERR(iter))
7332                         ret = PTR_ERR(iter);
7333         } else {
7334                 /* Writes still need the seq_file to hold the private data */
7335                 ret = -ENOMEM;
7336                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7337                 if (!m)
7338                         goto out;
7339                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7340                 if (!iter) {
7341                         kfree(m);
7342                         goto out;
7343                 }
7344                 ret = 0;
7345
7346                 iter->tr = tr;
7347                 iter->array_buffer = &tr->max_buffer;
7348                 iter->cpu_file = tracing_get_cpu(inode);
7349                 m->private = iter;
7350                 file->private_data = m;
7351         }
7352 out:
7353         if (ret < 0)
7354                 trace_array_put(tr);
7355
7356         return ret;
7357 }
7358
7359 static ssize_t
7360 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7361                        loff_t *ppos)
7362 {
7363         struct seq_file *m = filp->private_data;
7364         struct trace_iterator *iter = m->private;
7365         struct trace_array *tr = iter->tr;
7366         unsigned long val;
7367         int ret;
7368
7369         ret = tracing_update_buffers();
7370         if (ret < 0)
7371                 return ret;
7372
7373         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7374         if (ret)
7375                 return ret;
7376
7377         mutex_lock(&trace_types_lock);
7378
7379         if (tr->current_trace->use_max_tr) {
7380                 ret = -EBUSY;
7381                 goto out;
7382         }
7383
7384         arch_spin_lock(&tr->max_lock);
7385         if (tr->cond_snapshot)
7386                 ret = -EBUSY;
7387         arch_spin_unlock(&tr->max_lock);
7388         if (ret)
7389                 goto out;
7390
7391         switch (val) {
7392         case 0:
7393                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7394                         ret = -EINVAL;
7395                         break;
7396                 }
7397                 if (tr->allocated_snapshot)
7398                         free_snapshot(tr);
7399                 break;
7400         case 1:
7401 /* Only allow per-cpu swap if the ring buffer supports it */
7402 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7403                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7404                         ret = -EINVAL;
7405                         break;
7406                 }
7407 #endif
7408                 if (tr->allocated_snapshot)
7409                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7410                                         &tr->array_buffer, iter->cpu_file);
7411                 else
7412                         ret = tracing_alloc_snapshot_instance(tr);
7413                 if (ret < 0)
7414                         break;
7415                 local_irq_disable();
7416                 /* Now, we're going to swap */
7417                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7418                         update_max_tr(tr, current, smp_processor_id(), NULL);
7419                 else
7420                         update_max_tr_single(tr, current, iter->cpu_file);
7421                 local_irq_enable();
7422                 break;
7423         default:
7424                 if (tr->allocated_snapshot) {
7425                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7426                                 tracing_reset_online_cpus(&tr->max_buffer);
7427                         else
7428                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7429                 }
7430                 break;
7431         }
7432
7433         if (ret >= 0) {
7434                 *ppos += cnt;
7435                 ret = cnt;
7436         }
7437 out:
7438         mutex_unlock(&trace_types_lock);
7439         return ret;
7440 }
7441
7442 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7443 {
7444         struct seq_file *m = file->private_data;
7445         int ret;
7446
7447         ret = tracing_release(inode, file);
7448
7449         if (file->f_mode & FMODE_READ)
7450                 return ret;
7451
7452         /* If write only, the seq_file is just a stub */
7453         if (m)
7454                 kfree(m->private);
7455         kfree(m);
7456
7457         return 0;
7458 }
7459
7460 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7461 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7462                                     size_t count, loff_t *ppos);
7463 static int tracing_buffers_release(struct inode *inode, struct file *file);
7464 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7465                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7466
7467 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7468 {
7469         struct ftrace_buffer_info *info;
7470         int ret;
7471
7472         /* The following checks for tracefs lockdown */
7473         ret = tracing_buffers_open(inode, filp);
7474         if (ret < 0)
7475                 return ret;
7476
7477         info = filp->private_data;
7478
7479         if (info->iter.trace->use_max_tr) {
7480                 tracing_buffers_release(inode, filp);
7481                 return -EBUSY;
7482         }
7483
7484         info->iter.snapshot = true;
7485         info->iter.array_buffer = &info->iter.tr->max_buffer;
7486
7487         return ret;
7488 }
7489
7490 #endif /* CONFIG_TRACER_SNAPSHOT */
7491
7492
7493 static const struct file_operations tracing_thresh_fops = {
7494         .open           = tracing_open_generic,
7495         .read           = tracing_thresh_read,
7496         .write          = tracing_thresh_write,
7497         .llseek         = generic_file_llseek,
7498 };
7499
7500 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7501 static const struct file_operations tracing_max_lat_fops = {
7502         .open           = tracing_open_generic,
7503         .read           = tracing_max_lat_read,
7504         .write          = tracing_max_lat_write,
7505         .llseek         = generic_file_llseek,
7506 };
7507 #endif
7508
7509 static const struct file_operations set_tracer_fops = {
7510         .open           = tracing_open_generic,
7511         .read           = tracing_set_trace_read,
7512         .write          = tracing_set_trace_write,
7513         .llseek         = generic_file_llseek,
7514 };
7515
7516 static const struct file_operations tracing_pipe_fops = {
7517         .open           = tracing_open_pipe,
7518         .poll           = tracing_poll_pipe,
7519         .read           = tracing_read_pipe,
7520         .splice_read    = tracing_splice_read_pipe,
7521         .release        = tracing_release_pipe,
7522         .llseek         = no_llseek,
7523 };
7524
7525 static const struct file_operations tracing_entries_fops = {
7526         .open           = tracing_open_generic_tr,
7527         .read           = tracing_entries_read,
7528         .write          = tracing_entries_write,
7529         .llseek         = generic_file_llseek,
7530         .release        = tracing_release_generic_tr,
7531 };
7532
7533 static const struct file_operations tracing_total_entries_fops = {
7534         .open           = tracing_open_generic_tr,
7535         .read           = tracing_total_entries_read,
7536         .llseek         = generic_file_llseek,
7537         .release        = tracing_release_generic_tr,
7538 };
7539
7540 static const struct file_operations tracing_free_buffer_fops = {
7541         .open           = tracing_open_generic_tr,
7542         .write          = tracing_free_buffer_write,
7543         .release        = tracing_free_buffer_release,
7544 };
7545
7546 static const struct file_operations tracing_mark_fops = {
7547         .open           = tracing_open_generic_tr,
7548         .write          = tracing_mark_write,
7549         .llseek         = generic_file_llseek,
7550         .release        = tracing_release_generic_tr,
7551 };
7552
7553 static const struct file_operations tracing_mark_raw_fops = {
7554         .open           = tracing_open_generic_tr,
7555         .write          = tracing_mark_raw_write,
7556         .llseek         = generic_file_llseek,
7557         .release        = tracing_release_generic_tr,
7558 };
7559
7560 static const struct file_operations trace_clock_fops = {
7561         .open           = tracing_clock_open,
7562         .read           = seq_read,
7563         .llseek         = seq_lseek,
7564         .release        = tracing_single_release_tr,
7565         .write          = tracing_clock_write,
7566 };
7567
7568 static const struct file_operations trace_time_stamp_mode_fops = {
7569         .open           = tracing_time_stamp_mode_open,
7570         .read           = seq_read,
7571         .llseek         = seq_lseek,
7572         .release        = tracing_single_release_tr,
7573 };
7574
7575 #ifdef CONFIG_TRACER_SNAPSHOT
7576 static const struct file_operations snapshot_fops = {
7577         .open           = tracing_snapshot_open,
7578         .read           = seq_read,
7579         .write          = tracing_snapshot_write,
7580         .llseek         = tracing_lseek,
7581         .release        = tracing_snapshot_release,
7582 };
7583
7584 static const struct file_operations snapshot_raw_fops = {
7585         .open           = snapshot_raw_open,
7586         .read           = tracing_buffers_read,
7587         .release        = tracing_buffers_release,
7588         .splice_read    = tracing_buffers_splice_read,
7589         .llseek         = no_llseek,
7590 };
7591
7592 #endif /* CONFIG_TRACER_SNAPSHOT */
7593
7594 /*
7595  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7596  * @filp: The active open file structure
7597  * @ubuf: The userspace provided buffer to read value into
7598  * @cnt: The maximum number of bytes to read
7599  * @ppos: The current "file" position
7600  *
7601  * This function implements the write interface for a struct trace_min_max_param.
7602  * The filp->private_data must point to a trace_min_max_param structure that
7603  * defines where to write the value, the min and the max acceptable values,
7604  * and a lock to protect the write.
7605  */
7606 static ssize_t
7607 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7608 {
7609         struct trace_min_max_param *param = filp->private_data;
7610         u64 val;
7611         int err;
7612
7613         if (!param)
7614                 return -EFAULT;
7615
7616         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7617         if (err)
7618                 return err;
7619
7620         if (param->lock)
7621                 mutex_lock(param->lock);
7622
7623         if (param->min && val < *param->min)
7624                 err = -EINVAL;
7625
7626         if (param->max && val > *param->max)
7627                 err = -EINVAL;
7628
7629         if (!err)
7630                 *param->val = val;
7631
7632         if (param->lock)
7633                 mutex_unlock(param->lock);
7634
7635         if (err)
7636                 return err;
7637
7638         return cnt;
7639 }
7640
7641 /*
7642  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7643  * @filp: The active open file structure
7644  * @ubuf: The userspace provided buffer to read value into
7645  * @cnt: The maximum number of bytes to read
7646  * @ppos: The current "file" position
7647  *
7648  * This function implements the read interface for a struct trace_min_max_param.
7649  * The filp->private_data must point to a trace_min_max_param struct with valid
7650  * data.
7651  */
7652 static ssize_t
7653 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7654 {
7655         struct trace_min_max_param *param = filp->private_data;
7656         char buf[U64_STR_SIZE];
7657         int len;
7658         u64 val;
7659
7660         if (!param)
7661                 return -EFAULT;
7662
7663         val = *param->val;
7664
7665         if (cnt > sizeof(buf))
7666                 cnt = sizeof(buf);
7667
7668         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7669
7670         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7671 }
7672
7673 const struct file_operations trace_min_max_fops = {
7674         .open           = tracing_open_generic,
7675         .read           = trace_min_max_read,
7676         .write          = trace_min_max_write,
7677 };
7678
7679 #define TRACING_LOG_ERRS_MAX    8
7680 #define TRACING_LOG_LOC_MAX     128
7681
7682 #define CMD_PREFIX "  Command: "
7683
7684 struct err_info {
7685         const char      **errs; /* ptr to loc-specific array of err strings */
7686         u8              type;   /* index into errs -> specific err string */
7687         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7688         u64             ts;
7689 };
7690
7691 struct tracing_log_err {
7692         struct list_head        list;
7693         struct err_info         info;
7694         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7695         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7696 };
7697
7698 static DEFINE_MUTEX(tracing_err_log_lock);
7699
7700 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7701 {
7702         struct tracing_log_err *err;
7703
7704         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7705                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7706                 if (!err)
7707                         err = ERR_PTR(-ENOMEM);
7708                 tr->n_err_log_entries++;
7709
7710                 return err;
7711         }
7712
7713         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7714         list_del(&err->list);
7715
7716         return err;
7717 }
7718
7719 /**
7720  * err_pos - find the position of a string within a command for error careting
7721  * @cmd: The tracing command that caused the error
7722  * @str: The string to position the caret at within @cmd
7723  *
7724  * Finds the position of the first occurrence of @str within @cmd.  The
7725  * return value can be passed to tracing_log_err() for caret placement
7726  * within @cmd.
7727  *
7728  * Returns the index within @cmd of the first occurrence of @str or 0
7729  * if @str was not found.
7730  */
7731 unsigned int err_pos(char *cmd, const char *str)
7732 {
7733         char *found;
7734
7735         if (WARN_ON(!strlen(cmd)))
7736                 return 0;
7737
7738         found = strstr(cmd, str);
7739         if (found)
7740                 return found - cmd;
7741
7742         return 0;
7743 }
7744
7745 /**
7746  * tracing_log_err - write an error to the tracing error log
7747  * @tr: The associated trace array for the error (NULL for top level array)
7748  * @loc: A string describing where the error occurred
7749  * @cmd: The tracing command that caused the error
7750  * @errs: The array of loc-specific static error strings
7751  * @type: The index into errs[], which produces the specific static err string
7752  * @pos: The position the caret should be placed in the cmd
7753  *
7754  * Writes an error into tracing/error_log of the form:
7755  *
7756  * <loc>: error: <text>
7757  *   Command: <cmd>
7758  *              ^
7759  *
7760  * tracing/error_log is a small log file containing the last
7761  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7762  * unless there has been a tracing error, and the error log can be
7763  * cleared and have its memory freed by writing the empty string in
7764  * truncation mode to it i.e. echo > tracing/error_log.
7765  *
7766  * NOTE: the @errs array along with the @type param are used to
7767  * produce a static error string - this string is not copied and saved
7768  * when the error is logged - only a pointer to it is saved.  See
7769  * existing callers for examples of how static strings are typically
7770  * defined for use with tracing_log_err().
7771  */
7772 void tracing_log_err(struct trace_array *tr,
7773                      const char *loc, const char *cmd,
7774                      const char **errs, u8 type, u8 pos)
7775 {
7776         struct tracing_log_err *err;
7777
7778         if (!tr)
7779                 tr = &global_trace;
7780
7781         mutex_lock(&tracing_err_log_lock);
7782         err = get_tracing_log_err(tr);
7783         if (PTR_ERR(err) == -ENOMEM) {
7784                 mutex_unlock(&tracing_err_log_lock);
7785                 return;
7786         }
7787
7788         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7789         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7790
7791         err->info.errs = errs;
7792         err->info.type = type;
7793         err->info.pos = pos;
7794         err->info.ts = local_clock();
7795
7796         list_add_tail(&err->list, &tr->err_log);
7797         mutex_unlock(&tracing_err_log_lock);
7798 }
7799
7800 static void clear_tracing_err_log(struct trace_array *tr)
7801 {
7802         struct tracing_log_err *err, *next;
7803
7804         mutex_lock(&tracing_err_log_lock);
7805         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7806                 list_del(&err->list);
7807                 kfree(err);
7808         }
7809
7810         tr->n_err_log_entries = 0;
7811         mutex_unlock(&tracing_err_log_lock);
7812 }
7813
7814 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7815 {
7816         struct trace_array *tr = m->private;
7817
7818         mutex_lock(&tracing_err_log_lock);
7819
7820         return seq_list_start(&tr->err_log, *pos);
7821 }
7822
7823 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7824 {
7825         struct trace_array *tr = m->private;
7826
7827         return seq_list_next(v, &tr->err_log, pos);
7828 }
7829
7830 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7831 {
7832         mutex_unlock(&tracing_err_log_lock);
7833 }
7834
7835 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7836 {
7837         u8 i;
7838
7839         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7840                 seq_putc(m, ' ');
7841         for (i = 0; i < pos; i++)
7842                 seq_putc(m, ' ');
7843         seq_puts(m, "^\n");
7844 }
7845
7846 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7847 {
7848         struct tracing_log_err *err = v;
7849
7850         if (err) {
7851                 const char *err_text = err->info.errs[err->info.type];
7852                 u64 sec = err->info.ts;
7853                 u32 nsec;
7854
7855                 nsec = do_div(sec, NSEC_PER_SEC);
7856                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7857                            err->loc, err_text);
7858                 seq_printf(m, "%s", err->cmd);
7859                 tracing_err_log_show_pos(m, err->info.pos);
7860         }
7861
7862         return 0;
7863 }
7864
7865 static const struct seq_operations tracing_err_log_seq_ops = {
7866         .start  = tracing_err_log_seq_start,
7867         .next   = tracing_err_log_seq_next,
7868         .stop   = tracing_err_log_seq_stop,
7869         .show   = tracing_err_log_seq_show
7870 };
7871
7872 static int tracing_err_log_open(struct inode *inode, struct file *file)
7873 {
7874         struct trace_array *tr = inode->i_private;
7875         int ret = 0;
7876
7877         ret = tracing_check_open_get_tr(tr);
7878         if (ret)
7879                 return ret;
7880
7881         /* If this file was opened for write, then erase contents */
7882         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7883                 clear_tracing_err_log(tr);
7884
7885         if (file->f_mode & FMODE_READ) {
7886                 ret = seq_open(file, &tracing_err_log_seq_ops);
7887                 if (!ret) {
7888                         struct seq_file *m = file->private_data;
7889                         m->private = tr;
7890                 } else {
7891                         trace_array_put(tr);
7892                 }
7893         }
7894         return ret;
7895 }
7896
7897 static ssize_t tracing_err_log_write(struct file *file,
7898                                      const char __user *buffer,
7899                                      size_t count, loff_t *ppos)
7900 {
7901         return count;
7902 }
7903
7904 static int tracing_err_log_release(struct inode *inode, struct file *file)
7905 {
7906         struct trace_array *tr = inode->i_private;
7907
7908         trace_array_put(tr);
7909
7910         if (file->f_mode & FMODE_READ)
7911                 seq_release(inode, file);
7912
7913         return 0;
7914 }
7915
7916 static const struct file_operations tracing_err_log_fops = {
7917         .open           = tracing_err_log_open,
7918         .write          = tracing_err_log_write,
7919         .read           = seq_read,
7920         .llseek         = seq_lseek,
7921         .release        = tracing_err_log_release,
7922 };
7923
7924 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7925 {
7926         struct trace_array *tr = inode->i_private;
7927         struct ftrace_buffer_info *info;
7928         int ret;
7929
7930         ret = tracing_check_open_get_tr(tr);
7931         if (ret)
7932                 return ret;
7933
7934         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7935         if (!info) {
7936                 trace_array_put(tr);
7937                 return -ENOMEM;
7938         }
7939
7940         mutex_lock(&trace_types_lock);
7941
7942         info->iter.tr           = tr;
7943         info->iter.cpu_file     = tracing_get_cpu(inode);
7944         info->iter.trace        = tr->current_trace;
7945         info->iter.array_buffer = &tr->array_buffer;
7946         info->spare             = NULL;
7947         /* Force reading ring buffer for first read */
7948         info->read              = (unsigned int)-1;
7949
7950         filp->private_data = info;
7951
7952         tr->trace_ref++;
7953
7954         mutex_unlock(&trace_types_lock);
7955
7956         ret = nonseekable_open(inode, filp);
7957         if (ret < 0)
7958                 trace_array_put(tr);
7959
7960         return ret;
7961 }
7962
7963 static __poll_t
7964 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7965 {
7966         struct ftrace_buffer_info *info = filp->private_data;
7967         struct trace_iterator *iter = &info->iter;
7968
7969         return trace_poll(iter, filp, poll_table);
7970 }
7971
7972 static ssize_t
7973 tracing_buffers_read(struct file *filp, char __user *ubuf,
7974                      size_t count, loff_t *ppos)
7975 {
7976         struct ftrace_buffer_info *info = filp->private_data;
7977         struct trace_iterator *iter = &info->iter;
7978         ssize_t ret = 0;
7979         ssize_t size;
7980
7981         if (!count)
7982                 return 0;
7983
7984 #ifdef CONFIG_TRACER_MAX_TRACE
7985         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7986                 return -EBUSY;
7987 #endif
7988
7989         if (!info->spare) {
7990                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7991                                                           iter->cpu_file);
7992                 if (IS_ERR(info->spare)) {
7993                         ret = PTR_ERR(info->spare);
7994                         info->spare = NULL;
7995                 } else {
7996                         info->spare_cpu = iter->cpu_file;
7997                 }
7998         }
7999         if (!info->spare)
8000                 return ret;
8001
8002         /* Do we have previous read data to read? */
8003         if (info->read < PAGE_SIZE)
8004                 goto read;
8005
8006  again:
8007         trace_access_lock(iter->cpu_file);
8008         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8009                                     &info->spare,
8010                                     count,
8011                                     iter->cpu_file, 0);
8012         trace_access_unlock(iter->cpu_file);
8013
8014         if (ret < 0) {
8015                 if (trace_empty(iter)) {
8016                         if ((filp->f_flags & O_NONBLOCK))
8017                                 return -EAGAIN;
8018
8019                         ret = wait_on_pipe(iter, 0);
8020                         if (ret)
8021                                 return ret;
8022
8023                         goto again;
8024                 }
8025                 return 0;
8026         }
8027
8028         info->read = 0;
8029  read:
8030         size = PAGE_SIZE - info->read;
8031         if (size > count)
8032                 size = count;
8033
8034         ret = copy_to_user(ubuf, info->spare + info->read, size);
8035         if (ret == size)
8036                 return -EFAULT;
8037
8038         size -= ret;
8039
8040         *ppos += size;
8041         info->read += size;
8042
8043         return size;
8044 }
8045
8046 static int tracing_buffers_release(struct inode *inode, struct file *file)
8047 {
8048         struct ftrace_buffer_info *info = file->private_data;
8049         struct trace_iterator *iter = &info->iter;
8050
8051         mutex_lock(&trace_types_lock);
8052
8053         iter->tr->trace_ref--;
8054
8055         __trace_array_put(iter->tr);
8056
8057         if (info->spare)
8058                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8059                                            info->spare_cpu, info->spare);
8060         kvfree(info);
8061
8062         mutex_unlock(&trace_types_lock);
8063
8064         return 0;
8065 }
8066
8067 struct buffer_ref {
8068         struct trace_buffer     *buffer;
8069         void                    *page;
8070         int                     cpu;
8071         refcount_t              refcount;
8072 };
8073
8074 static void buffer_ref_release(struct buffer_ref *ref)
8075 {
8076         if (!refcount_dec_and_test(&ref->refcount))
8077                 return;
8078         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8079         kfree(ref);
8080 }
8081
8082 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8083                                     struct pipe_buffer *buf)
8084 {
8085         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8086
8087         buffer_ref_release(ref);
8088         buf->private = 0;
8089 }
8090
8091 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8092                                 struct pipe_buffer *buf)
8093 {
8094         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8095
8096         if (refcount_read(&ref->refcount) > INT_MAX/2)
8097                 return false;
8098
8099         refcount_inc(&ref->refcount);
8100         return true;
8101 }
8102
8103 /* Pipe buffer operations for a buffer. */
8104 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8105         .release                = buffer_pipe_buf_release,
8106         .get                    = buffer_pipe_buf_get,
8107 };
8108
8109 /*
8110  * Callback from splice_to_pipe(), if we need to release some pages
8111  * at the end of the spd in case we error'ed out in filling the pipe.
8112  */
8113 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8114 {
8115         struct buffer_ref *ref =
8116                 (struct buffer_ref *)spd->partial[i].private;
8117
8118         buffer_ref_release(ref);
8119         spd->partial[i].private = 0;
8120 }
8121
8122 static ssize_t
8123 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8124                             struct pipe_inode_info *pipe, size_t len,
8125                             unsigned int flags)
8126 {
8127         struct ftrace_buffer_info *info = file->private_data;
8128         struct trace_iterator *iter = &info->iter;
8129         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8130         struct page *pages_def[PIPE_DEF_BUFFERS];
8131         struct splice_pipe_desc spd = {
8132                 .pages          = pages_def,
8133                 .partial        = partial_def,
8134                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8135                 .ops            = &buffer_pipe_buf_ops,
8136                 .spd_release    = buffer_spd_release,
8137         };
8138         struct buffer_ref *ref;
8139         int entries, i;
8140         ssize_t ret = 0;
8141
8142 #ifdef CONFIG_TRACER_MAX_TRACE
8143         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8144                 return -EBUSY;
8145 #endif
8146
8147         if (*ppos & (PAGE_SIZE - 1))
8148                 return -EINVAL;
8149
8150         if (len & (PAGE_SIZE - 1)) {
8151                 if (len < PAGE_SIZE)
8152                         return -EINVAL;
8153                 len &= PAGE_MASK;
8154         }
8155
8156         if (splice_grow_spd(pipe, &spd))
8157                 return -ENOMEM;
8158
8159  again:
8160         trace_access_lock(iter->cpu_file);
8161         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162
8163         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8164                 struct page *page;
8165                 int r;
8166
8167                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8168                 if (!ref) {
8169                         ret = -ENOMEM;
8170                         break;
8171                 }
8172
8173                 refcount_set(&ref->refcount, 1);
8174                 ref->buffer = iter->array_buffer->buffer;
8175                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8176                 if (IS_ERR(ref->page)) {
8177                         ret = PTR_ERR(ref->page);
8178                         ref->page = NULL;
8179                         kfree(ref);
8180                         break;
8181                 }
8182                 ref->cpu = iter->cpu_file;
8183
8184                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8185                                           len, iter->cpu_file, 1);
8186                 if (r < 0) {
8187                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8188                                                    ref->page);
8189                         kfree(ref);
8190                         break;
8191                 }
8192
8193                 page = virt_to_page(ref->page);
8194
8195                 spd.pages[i] = page;
8196                 spd.partial[i].len = PAGE_SIZE;
8197                 spd.partial[i].offset = 0;
8198                 spd.partial[i].private = (unsigned long)ref;
8199                 spd.nr_pages++;
8200                 *ppos += PAGE_SIZE;
8201
8202                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8203         }
8204
8205         trace_access_unlock(iter->cpu_file);
8206         spd.nr_pages = i;
8207
8208         /* did we read anything? */
8209         if (!spd.nr_pages) {
8210                 if (ret)
8211                         goto out;
8212
8213                 ret = -EAGAIN;
8214                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8215                         goto out;
8216
8217                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8218                 if (ret)
8219                         goto out;
8220
8221                 goto again;
8222         }
8223
8224         ret = splice_to_pipe(pipe, &spd);
8225 out:
8226         splice_shrink_spd(&spd);
8227
8228         return ret;
8229 }
8230
8231 static const struct file_operations tracing_buffers_fops = {
8232         .open           = tracing_buffers_open,
8233         .read           = tracing_buffers_read,
8234         .poll           = tracing_buffers_poll,
8235         .release        = tracing_buffers_release,
8236         .splice_read    = tracing_buffers_splice_read,
8237         .llseek         = no_llseek,
8238 };
8239
8240 static ssize_t
8241 tracing_stats_read(struct file *filp, char __user *ubuf,
8242                    size_t count, loff_t *ppos)
8243 {
8244         struct inode *inode = file_inode(filp);
8245         struct trace_array *tr = inode->i_private;
8246         struct array_buffer *trace_buf = &tr->array_buffer;
8247         int cpu = tracing_get_cpu(inode);
8248         struct trace_seq *s;
8249         unsigned long cnt;
8250         unsigned long long t;
8251         unsigned long usec_rem;
8252
8253         s = kmalloc(sizeof(*s), GFP_KERNEL);
8254         if (!s)
8255                 return -ENOMEM;
8256
8257         trace_seq_init(s);
8258
8259         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8260         trace_seq_printf(s, "entries: %ld\n", cnt);
8261
8262         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8263         trace_seq_printf(s, "overrun: %ld\n", cnt);
8264
8265         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8266         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8267
8268         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8269         trace_seq_printf(s, "bytes: %ld\n", cnt);
8270
8271         if (trace_clocks[tr->clock_id].in_ns) {
8272                 /* local or global for trace_clock */
8273                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8274                 usec_rem = do_div(t, USEC_PER_SEC);
8275                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8276                                                                 t, usec_rem);
8277
8278                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8279                 usec_rem = do_div(t, USEC_PER_SEC);
8280                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8281         } else {
8282                 /* counter or tsc mode for trace_clock */
8283                 trace_seq_printf(s, "oldest event ts: %llu\n",
8284                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8285
8286                 trace_seq_printf(s, "now ts: %llu\n",
8287                                 ring_buffer_time_stamp(trace_buf->buffer));
8288         }
8289
8290         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8291         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8292
8293         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8294         trace_seq_printf(s, "read events: %ld\n", cnt);
8295
8296         count = simple_read_from_buffer(ubuf, count, ppos,
8297                                         s->buffer, trace_seq_used(s));
8298
8299         kfree(s);
8300
8301         return count;
8302 }
8303
8304 static const struct file_operations tracing_stats_fops = {
8305         .open           = tracing_open_generic_tr,
8306         .read           = tracing_stats_read,
8307         .llseek         = generic_file_llseek,
8308         .release        = tracing_release_generic_tr,
8309 };
8310
8311 #ifdef CONFIG_DYNAMIC_FTRACE
8312
8313 static ssize_t
8314 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8315                   size_t cnt, loff_t *ppos)
8316 {
8317         ssize_t ret;
8318         char *buf;
8319         int r;
8320
8321         /* 256 should be plenty to hold the amount needed */
8322         buf = kmalloc(256, GFP_KERNEL);
8323         if (!buf)
8324                 return -ENOMEM;
8325
8326         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8327                       ftrace_update_tot_cnt,
8328                       ftrace_number_of_pages,
8329                       ftrace_number_of_groups);
8330
8331         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8332         kfree(buf);
8333         return ret;
8334 }
8335
8336 static const struct file_operations tracing_dyn_info_fops = {
8337         .open           = tracing_open_generic,
8338         .read           = tracing_read_dyn_info,
8339         .llseek         = generic_file_llseek,
8340 };
8341 #endif /* CONFIG_DYNAMIC_FTRACE */
8342
8343 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8344 static void
8345 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8346                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8347                 void *data)
8348 {
8349         tracing_snapshot_instance(tr);
8350 }
8351
8352 static void
8353 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8354                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8355                       void *data)
8356 {
8357         struct ftrace_func_mapper *mapper = data;
8358         long *count = NULL;
8359
8360         if (mapper)
8361                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8362
8363         if (count) {
8364
8365                 if (*count <= 0)
8366                         return;
8367
8368                 (*count)--;
8369         }
8370
8371         tracing_snapshot_instance(tr);
8372 }
8373
8374 static int
8375 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8376                       struct ftrace_probe_ops *ops, void *data)
8377 {
8378         struct ftrace_func_mapper *mapper = data;
8379         long *count = NULL;
8380
8381         seq_printf(m, "%ps:", (void *)ip);
8382
8383         seq_puts(m, "snapshot");
8384
8385         if (mapper)
8386                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8387
8388         if (count)
8389                 seq_printf(m, ":count=%ld\n", *count);
8390         else
8391                 seq_puts(m, ":unlimited\n");
8392
8393         return 0;
8394 }
8395
8396 static int
8397 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8398                      unsigned long ip, void *init_data, void **data)
8399 {
8400         struct ftrace_func_mapper *mapper = *data;
8401
8402         if (!mapper) {
8403                 mapper = allocate_ftrace_func_mapper();
8404                 if (!mapper)
8405                         return -ENOMEM;
8406                 *data = mapper;
8407         }
8408
8409         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8410 }
8411
8412 static void
8413 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8414                      unsigned long ip, void *data)
8415 {
8416         struct ftrace_func_mapper *mapper = data;
8417
8418         if (!ip) {
8419                 if (!mapper)
8420                         return;
8421                 free_ftrace_func_mapper(mapper, NULL);
8422                 return;
8423         }
8424
8425         ftrace_func_mapper_remove_ip(mapper, ip);
8426 }
8427
8428 static struct ftrace_probe_ops snapshot_probe_ops = {
8429         .func                   = ftrace_snapshot,
8430         .print                  = ftrace_snapshot_print,
8431 };
8432
8433 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8434         .func                   = ftrace_count_snapshot,
8435         .print                  = ftrace_snapshot_print,
8436         .init                   = ftrace_snapshot_init,
8437         .free                   = ftrace_snapshot_free,
8438 };
8439
8440 static int
8441 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8442                                char *glob, char *cmd, char *param, int enable)
8443 {
8444         struct ftrace_probe_ops *ops;
8445         void *count = (void *)-1;
8446         char *number;
8447         int ret;
8448
8449         if (!tr)
8450                 return -ENODEV;
8451
8452         /* hash funcs only work with set_ftrace_filter */
8453         if (!enable)
8454                 return -EINVAL;
8455
8456         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8457
8458         if (glob[0] == '!')
8459                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8460
8461         if (!param)
8462                 goto out_reg;
8463
8464         number = strsep(&param, ":");
8465
8466         if (!strlen(number))
8467                 goto out_reg;
8468
8469         /*
8470          * We use the callback data field (which is a pointer)
8471          * as our counter.
8472          */
8473         ret = kstrtoul(number, 0, (unsigned long *)&count);
8474         if (ret)
8475                 return ret;
8476
8477  out_reg:
8478         ret = tracing_alloc_snapshot_instance(tr);
8479         if (ret < 0)
8480                 goto out;
8481
8482         ret = register_ftrace_function_probe(glob, tr, ops, count);
8483
8484  out:
8485         return ret < 0 ? ret : 0;
8486 }
8487
8488 static struct ftrace_func_command ftrace_snapshot_cmd = {
8489         .name                   = "snapshot",
8490         .func                   = ftrace_trace_snapshot_callback,
8491 };
8492
8493 static __init int register_snapshot_cmd(void)
8494 {
8495         return register_ftrace_command(&ftrace_snapshot_cmd);
8496 }
8497 #else
8498 static inline __init int register_snapshot_cmd(void) { return 0; }
8499 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8500
8501 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8502 {
8503         if (WARN_ON(!tr->dir))
8504                 return ERR_PTR(-ENODEV);
8505
8506         /* Top directory uses NULL as the parent */
8507         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8508                 return NULL;
8509
8510         /* All sub buffers have a descriptor */
8511         return tr->dir;
8512 }
8513
8514 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8515 {
8516         struct dentry *d_tracer;
8517
8518         if (tr->percpu_dir)
8519                 return tr->percpu_dir;
8520
8521         d_tracer = tracing_get_dentry(tr);
8522         if (IS_ERR(d_tracer))
8523                 return NULL;
8524
8525         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8526
8527         MEM_FAIL(!tr->percpu_dir,
8528                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8529
8530         return tr->percpu_dir;
8531 }
8532
8533 static struct dentry *
8534 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8535                       void *data, long cpu, const struct file_operations *fops)
8536 {
8537         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8538
8539         if (ret) /* See tracing_get_cpu() */
8540                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8541         return ret;
8542 }
8543
8544 static void
8545 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8546 {
8547         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8548         struct dentry *d_cpu;
8549         char cpu_dir[30]; /* 30 characters should be more than enough */
8550
8551         if (!d_percpu)
8552                 return;
8553
8554         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8555         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8556         if (!d_cpu) {
8557                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8558                 return;
8559         }
8560
8561         /* per cpu trace_pipe */
8562         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8563                                 tr, cpu, &tracing_pipe_fops);
8564
8565         /* per cpu trace */
8566         trace_create_cpu_file("trace", 0644, d_cpu,
8567                                 tr, cpu, &tracing_fops);
8568
8569         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8570                                 tr, cpu, &tracing_buffers_fops);
8571
8572         trace_create_cpu_file("stats", 0444, d_cpu,
8573                                 tr, cpu, &tracing_stats_fops);
8574
8575         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8576                                 tr, cpu, &tracing_entries_fops);
8577
8578 #ifdef CONFIG_TRACER_SNAPSHOT
8579         trace_create_cpu_file("snapshot", 0644, d_cpu,
8580                                 tr, cpu, &snapshot_fops);
8581
8582         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8583                                 tr, cpu, &snapshot_raw_fops);
8584 #endif
8585 }
8586
8587 #ifdef CONFIG_FTRACE_SELFTEST
8588 /* Let selftest have access to static functions in this file */
8589 #include "trace_selftest.c"
8590 #endif
8591
8592 static ssize_t
8593 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8594                         loff_t *ppos)
8595 {
8596         struct trace_option_dentry *topt = filp->private_data;
8597         char *buf;
8598
8599         if (topt->flags->val & topt->opt->bit)
8600                 buf = "1\n";
8601         else
8602                 buf = "0\n";
8603
8604         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8605 }
8606
8607 static ssize_t
8608 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8609                          loff_t *ppos)
8610 {
8611         struct trace_option_dentry *topt = filp->private_data;
8612         unsigned long val;
8613         int ret;
8614
8615         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8616         if (ret)
8617                 return ret;
8618
8619         if (val != 0 && val != 1)
8620                 return -EINVAL;
8621
8622         if (!!(topt->flags->val & topt->opt->bit) != val) {
8623                 mutex_lock(&trace_types_lock);
8624                 ret = __set_tracer_option(topt->tr, topt->flags,
8625                                           topt->opt, !val);
8626                 mutex_unlock(&trace_types_lock);
8627                 if (ret)
8628                         return ret;
8629         }
8630
8631         *ppos += cnt;
8632
8633         return cnt;
8634 }
8635
8636
8637 static const struct file_operations trace_options_fops = {
8638         .open = tracing_open_generic,
8639         .read = trace_options_read,
8640         .write = trace_options_write,
8641         .llseek = generic_file_llseek,
8642 };
8643
8644 /*
8645  * In order to pass in both the trace_array descriptor as well as the index
8646  * to the flag that the trace option file represents, the trace_array
8647  * has a character array of trace_flags_index[], which holds the index
8648  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8649  * The address of this character array is passed to the flag option file
8650  * read/write callbacks.
8651  *
8652  * In order to extract both the index and the trace_array descriptor,
8653  * get_tr_index() uses the following algorithm.
8654  *
8655  *   idx = *ptr;
8656  *
8657  * As the pointer itself contains the address of the index (remember
8658  * index[1] == 1).
8659  *
8660  * Then to get the trace_array descriptor, by subtracting that index
8661  * from the ptr, we get to the start of the index itself.
8662  *
8663  *   ptr - idx == &index[0]
8664  *
8665  * Then a simple container_of() from that pointer gets us to the
8666  * trace_array descriptor.
8667  */
8668 static void get_tr_index(void *data, struct trace_array **ptr,
8669                          unsigned int *pindex)
8670 {
8671         *pindex = *(unsigned char *)data;
8672
8673         *ptr = container_of(data - *pindex, struct trace_array,
8674                             trace_flags_index);
8675 }
8676
8677 static ssize_t
8678 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8679                         loff_t *ppos)
8680 {
8681         void *tr_index = filp->private_data;
8682         struct trace_array *tr;
8683         unsigned int index;
8684         char *buf;
8685
8686         get_tr_index(tr_index, &tr, &index);
8687
8688         if (tr->trace_flags & (1 << index))
8689                 buf = "1\n";
8690         else
8691                 buf = "0\n";
8692
8693         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8694 }
8695
8696 static ssize_t
8697 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8698                          loff_t *ppos)
8699 {
8700         void *tr_index = filp->private_data;
8701         struct trace_array *tr;
8702         unsigned int index;
8703         unsigned long val;
8704         int ret;
8705
8706         get_tr_index(tr_index, &tr, &index);
8707
8708         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8709         if (ret)
8710                 return ret;
8711
8712         if (val != 0 && val != 1)
8713                 return -EINVAL;
8714
8715         mutex_lock(&event_mutex);
8716         mutex_lock(&trace_types_lock);
8717         ret = set_tracer_flag(tr, 1 << index, val);
8718         mutex_unlock(&trace_types_lock);
8719         mutex_unlock(&event_mutex);
8720
8721         if (ret < 0)
8722                 return ret;
8723
8724         *ppos += cnt;
8725
8726         return cnt;
8727 }
8728
8729 static const struct file_operations trace_options_core_fops = {
8730         .open = tracing_open_generic,
8731         .read = trace_options_core_read,
8732         .write = trace_options_core_write,
8733         .llseek = generic_file_llseek,
8734 };
8735
8736 struct dentry *trace_create_file(const char *name,
8737                                  umode_t mode,
8738                                  struct dentry *parent,
8739                                  void *data,
8740                                  const struct file_operations *fops)
8741 {
8742         struct dentry *ret;
8743
8744         ret = tracefs_create_file(name, mode, parent, data, fops);
8745         if (!ret)
8746                 pr_warn("Could not create tracefs '%s' entry\n", name);
8747
8748         return ret;
8749 }
8750
8751
8752 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8753 {
8754         struct dentry *d_tracer;
8755
8756         if (tr->options)
8757                 return tr->options;
8758
8759         d_tracer = tracing_get_dentry(tr);
8760         if (IS_ERR(d_tracer))
8761                 return NULL;
8762
8763         tr->options = tracefs_create_dir("options", d_tracer);
8764         if (!tr->options) {
8765                 pr_warn("Could not create tracefs directory 'options'\n");
8766                 return NULL;
8767         }
8768
8769         return tr->options;
8770 }
8771
8772 static void
8773 create_trace_option_file(struct trace_array *tr,
8774                          struct trace_option_dentry *topt,
8775                          struct tracer_flags *flags,
8776                          struct tracer_opt *opt)
8777 {
8778         struct dentry *t_options;
8779
8780         t_options = trace_options_init_dentry(tr);
8781         if (!t_options)
8782                 return;
8783
8784         topt->flags = flags;
8785         topt->opt = opt;
8786         topt->tr = tr;
8787
8788         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8789                                     &trace_options_fops);
8790
8791 }
8792
8793 static void
8794 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8795 {
8796         struct trace_option_dentry *topts;
8797         struct trace_options *tr_topts;
8798         struct tracer_flags *flags;
8799         struct tracer_opt *opts;
8800         int cnt;
8801         int i;
8802
8803         if (!tracer)
8804                 return;
8805
8806         flags = tracer->flags;
8807
8808         if (!flags || !flags->opts)
8809                 return;
8810
8811         /*
8812          * If this is an instance, only create flags for tracers
8813          * the instance may have.
8814          */
8815         if (!trace_ok_for_array(tracer, tr))
8816                 return;
8817
8818         for (i = 0; i < tr->nr_topts; i++) {
8819                 /* Make sure there's no duplicate flags. */
8820                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8821                         return;
8822         }
8823
8824         opts = flags->opts;
8825
8826         for (cnt = 0; opts[cnt].name; cnt++)
8827                 ;
8828
8829         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8830         if (!topts)
8831                 return;
8832
8833         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8834                             GFP_KERNEL);
8835         if (!tr_topts) {
8836                 kfree(topts);
8837                 return;
8838         }
8839
8840         tr->topts = tr_topts;
8841         tr->topts[tr->nr_topts].tracer = tracer;
8842         tr->topts[tr->nr_topts].topts = topts;
8843         tr->nr_topts++;
8844
8845         for (cnt = 0; opts[cnt].name; cnt++) {
8846                 create_trace_option_file(tr, &topts[cnt], flags,
8847                                          &opts[cnt]);
8848                 MEM_FAIL(topts[cnt].entry == NULL,
8849                           "Failed to create trace option: %s",
8850                           opts[cnt].name);
8851         }
8852 }
8853
8854 static struct dentry *
8855 create_trace_option_core_file(struct trace_array *tr,
8856                               const char *option, long index)
8857 {
8858         struct dentry *t_options;
8859
8860         t_options = trace_options_init_dentry(tr);
8861         if (!t_options)
8862                 return NULL;
8863
8864         return trace_create_file(option, 0644, t_options,
8865                                  (void *)&tr->trace_flags_index[index],
8866                                  &trace_options_core_fops);
8867 }
8868
8869 static void create_trace_options_dir(struct trace_array *tr)
8870 {
8871         struct dentry *t_options;
8872         bool top_level = tr == &global_trace;
8873         int i;
8874
8875         t_options = trace_options_init_dentry(tr);
8876         if (!t_options)
8877                 return;
8878
8879         for (i = 0; trace_options[i]; i++) {
8880                 if (top_level ||
8881                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8882                         create_trace_option_core_file(tr, trace_options[i], i);
8883         }
8884 }
8885
8886 static ssize_t
8887 rb_simple_read(struct file *filp, char __user *ubuf,
8888                size_t cnt, loff_t *ppos)
8889 {
8890         struct trace_array *tr = filp->private_data;
8891         char buf[64];
8892         int r;
8893
8894         r = tracer_tracing_is_on(tr);
8895         r = sprintf(buf, "%d\n", r);
8896
8897         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8898 }
8899
8900 static ssize_t
8901 rb_simple_write(struct file *filp, const char __user *ubuf,
8902                 size_t cnt, loff_t *ppos)
8903 {
8904         struct trace_array *tr = filp->private_data;
8905         struct trace_buffer *buffer = tr->array_buffer.buffer;
8906         unsigned long val;
8907         int ret;
8908
8909         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8910         if (ret)
8911                 return ret;
8912
8913         if (buffer) {
8914                 mutex_lock(&trace_types_lock);
8915                 if (!!val == tracer_tracing_is_on(tr)) {
8916                         val = 0; /* do nothing */
8917                 } else if (val) {
8918                         tracer_tracing_on(tr);
8919                         if (tr->current_trace->start)
8920                                 tr->current_trace->start(tr);
8921                 } else {
8922                         tracer_tracing_off(tr);
8923                         if (tr->current_trace->stop)
8924                                 tr->current_trace->stop(tr);
8925                 }
8926                 mutex_unlock(&trace_types_lock);
8927         }
8928
8929         (*ppos)++;
8930
8931         return cnt;
8932 }
8933
8934 static const struct file_operations rb_simple_fops = {
8935         .open           = tracing_open_generic_tr,
8936         .read           = rb_simple_read,
8937         .write          = rb_simple_write,
8938         .release        = tracing_release_generic_tr,
8939         .llseek         = default_llseek,
8940 };
8941
8942 static ssize_t
8943 buffer_percent_read(struct file *filp, char __user *ubuf,
8944                     size_t cnt, loff_t *ppos)
8945 {
8946         struct trace_array *tr = filp->private_data;
8947         char buf[64];
8948         int r;
8949
8950         r = tr->buffer_percent;
8951         r = sprintf(buf, "%d\n", r);
8952
8953         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8954 }
8955
8956 static ssize_t
8957 buffer_percent_write(struct file *filp, const char __user *ubuf,
8958                      size_t cnt, loff_t *ppos)
8959 {
8960         struct trace_array *tr = filp->private_data;
8961         unsigned long val;
8962         int ret;
8963
8964         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8965         if (ret)
8966                 return ret;
8967
8968         if (val > 100)
8969                 return -EINVAL;
8970
8971         if (!val)
8972                 val = 1;
8973
8974         tr->buffer_percent = val;
8975
8976         (*ppos)++;
8977
8978         return cnt;
8979 }
8980
8981 static const struct file_operations buffer_percent_fops = {
8982         .open           = tracing_open_generic_tr,
8983         .read           = buffer_percent_read,
8984         .write          = buffer_percent_write,
8985         .release        = tracing_release_generic_tr,
8986         .llseek         = default_llseek,
8987 };
8988
8989 static struct dentry *trace_instance_dir;
8990
8991 static void
8992 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8993
8994 static int
8995 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8996 {
8997         enum ring_buffer_flags rb_flags;
8998
8999         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9000
9001         buf->tr = tr;
9002
9003         buf->buffer = ring_buffer_alloc(size, rb_flags);
9004         if (!buf->buffer)
9005                 return -ENOMEM;
9006
9007         buf->data = alloc_percpu(struct trace_array_cpu);
9008         if (!buf->data) {
9009                 ring_buffer_free(buf->buffer);
9010                 buf->buffer = NULL;
9011                 return -ENOMEM;
9012         }
9013
9014         /* Allocate the first page for all buffers */
9015         set_buffer_entries(&tr->array_buffer,
9016                            ring_buffer_size(tr->array_buffer.buffer, 0));
9017
9018         return 0;
9019 }
9020
9021 static int allocate_trace_buffers(struct trace_array *tr, int size)
9022 {
9023         int ret;
9024
9025         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9026         if (ret)
9027                 return ret;
9028
9029 #ifdef CONFIG_TRACER_MAX_TRACE
9030         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9031                                     allocate_snapshot ? size : 1);
9032         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9033                 ring_buffer_free(tr->array_buffer.buffer);
9034                 tr->array_buffer.buffer = NULL;
9035                 free_percpu(tr->array_buffer.data);
9036                 tr->array_buffer.data = NULL;
9037                 return -ENOMEM;
9038         }
9039         tr->allocated_snapshot = allocate_snapshot;
9040
9041         /*
9042          * Only the top level trace array gets its snapshot allocated
9043          * from the kernel command line.
9044          */
9045         allocate_snapshot = false;
9046 #endif
9047
9048         return 0;
9049 }
9050
9051 static void free_trace_buffer(struct array_buffer *buf)
9052 {
9053         if (buf->buffer) {
9054                 ring_buffer_free(buf->buffer);
9055                 buf->buffer = NULL;
9056                 free_percpu(buf->data);
9057                 buf->data = NULL;
9058         }
9059 }
9060
9061 static void free_trace_buffers(struct trace_array *tr)
9062 {
9063         if (!tr)
9064                 return;
9065
9066         free_trace_buffer(&tr->array_buffer);
9067
9068 #ifdef CONFIG_TRACER_MAX_TRACE
9069         free_trace_buffer(&tr->max_buffer);
9070 #endif
9071 }
9072
9073 static void init_trace_flags_index(struct trace_array *tr)
9074 {
9075         int i;
9076
9077         /* Used by the trace options files */
9078         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9079                 tr->trace_flags_index[i] = i;
9080 }
9081
9082 static void __update_tracer_options(struct trace_array *tr)
9083 {
9084         struct tracer *t;
9085
9086         for (t = trace_types; t; t = t->next)
9087                 add_tracer_options(tr, t);
9088 }
9089
9090 static void update_tracer_options(struct trace_array *tr)
9091 {
9092         mutex_lock(&trace_types_lock);
9093         __update_tracer_options(tr);
9094         mutex_unlock(&trace_types_lock);
9095 }
9096
9097 /* Must have trace_types_lock held */
9098 struct trace_array *trace_array_find(const char *instance)
9099 {
9100         struct trace_array *tr, *found = NULL;
9101
9102         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9103                 if (tr->name && strcmp(tr->name, instance) == 0) {
9104                         found = tr;
9105                         break;
9106                 }
9107         }
9108
9109         return found;
9110 }
9111
9112 struct trace_array *trace_array_find_get(const char *instance)
9113 {
9114         struct trace_array *tr;
9115
9116         mutex_lock(&trace_types_lock);
9117         tr = trace_array_find(instance);
9118         if (tr)
9119                 tr->ref++;
9120         mutex_unlock(&trace_types_lock);
9121
9122         return tr;
9123 }
9124
9125 static int trace_array_create_dir(struct trace_array *tr)
9126 {
9127         int ret;
9128
9129         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9130         if (!tr->dir)
9131                 return -EINVAL;
9132
9133         ret = event_trace_add_tracer(tr->dir, tr);
9134         if (ret)
9135                 tracefs_remove(tr->dir);
9136
9137         init_tracer_tracefs(tr, tr->dir);
9138         __update_tracer_options(tr);
9139
9140         return ret;
9141 }
9142
9143 static struct trace_array *trace_array_create(const char *name)
9144 {
9145         struct trace_array *tr;
9146         int ret;
9147
9148         ret = -ENOMEM;
9149         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9150         if (!tr)
9151                 return ERR_PTR(ret);
9152
9153         tr->name = kstrdup(name, GFP_KERNEL);
9154         if (!tr->name)
9155                 goto out_free_tr;
9156
9157         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9158                 goto out_free_tr;
9159
9160         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9161
9162         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9163
9164         raw_spin_lock_init(&tr->start_lock);
9165
9166         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9167
9168         tr->current_trace = &nop_trace;
9169
9170         INIT_LIST_HEAD(&tr->systems);
9171         INIT_LIST_HEAD(&tr->events);
9172         INIT_LIST_HEAD(&tr->hist_vars);
9173         INIT_LIST_HEAD(&tr->err_log);
9174
9175         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9176                 goto out_free_tr;
9177
9178         if (ftrace_allocate_ftrace_ops(tr) < 0)
9179                 goto out_free_tr;
9180
9181         ftrace_init_trace_array(tr);
9182
9183         init_trace_flags_index(tr);
9184
9185         if (trace_instance_dir) {
9186                 ret = trace_array_create_dir(tr);
9187                 if (ret)
9188                         goto out_free_tr;
9189         } else
9190                 __trace_early_add_events(tr);
9191
9192         list_add(&tr->list, &ftrace_trace_arrays);
9193
9194         tr->ref++;
9195
9196         return tr;
9197
9198  out_free_tr:
9199         ftrace_free_ftrace_ops(tr);
9200         free_trace_buffers(tr);
9201         free_cpumask_var(tr->tracing_cpumask);
9202         kfree(tr->name);
9203         kfree(tr);
9204
9205         return ERR_PTR(ret);
9206 }
9207
9208 static int instance_mkdir(const char *name)
9209 {
9210         struct trace_array *tr;
9211         int ret;
9212
9213         mutex_lock(&event_mutex);
9214         mutex_lock(&trace_types_lock);
9215
9216         ret = -EEXIST;
9217         if (trace_array_find(name))
9218                 goto out_unlock;
9219
9220         tr = trace_array_create(name);
9221
9222         ret = PTR_ERR_OR_ZERO(tr);
9223
9224 out_unlock:
9225         mutex_unlock(&trace_types_lock);
9226         mutex_unlock(&event_mutex);
9227         return ret;
9228 }
9229
9230 /**
9231  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9232  * @name: The name of the trace array to be looked up/created.
9233  *
9234  * Returns pointer to trace array with given name.
9235  * NULL, if it cannot be created.
9236  *
9237  * NOTE: This function increments the reference counter associated with the
9238  * trace array returned. This makes sure it cannot be freed while in use.
9239  * Use trace_array_put() once the trace array is no longer needed.
9240  * If the trace_array is to be freed, trace_array_destroy() needs to
9241  * be called after the trace_array_put(), or simply let user space delete
9242  * it from the tracefs instances directory. But until the
9243  * trace_array_put() is called, user space can not delete it.
9244  *
9245  */
9246 struct trace_array *trace_array_get_by_name(const char *name)
9247 {
9248         struct trace_array *tr;
9249
9250         mutex_lock(&event_mutex);
9251         mutex_lock(&trace_types_lock);
9252
9253         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9254                 if (tr->name && strcmp(tr->name, name) == 0)
9255                         goto out_unlock;
9256         }
9257
9258         tr = trace_array_create(name);
9259
9260         if (IS_ERR(tr))
9261                 tr = NULL;
9262 out_unlock:
9263         if (tr)
9264                 tr->ref++;
9265
9266         mutex_unlock(&trace_types_lock);
9267         mutex_unlock(&event_mutex);
9268         return tr;
9269 }
9270 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9271
9272 static int __remove_instance(struct trace_array *tr)
9273 {
9274         int i;
9275
9276         /* Reference counter for a newly created trace array = 1. */
9277         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9278                 return -EBUSY;
9279
9280         list_del(&tr->list);
9281
9282         /* Disable all the flags that were enabled coming in */
9283         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9284                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9285                         set_tracer_flag(tr, 1 << i, 0);
9286         }
9287
9288         tracing_set_nop(tr);
9289         clear_ftrace_function_probes(tr);
9290         event_trace_del_tracer(tr);
9291         ftrace_clear_pids(tr);
9292         ftrace_destroy_function_files(tr);
9293         tracefs_remove(tr->dir);
9294         free_percpu(tr->last_func_repeats);
9295         free_trace_buffers(tr);
9296
9297         for (i = 0; i < tr->nr_topts; i++) {
9298                 kfree(tr->topts[i].topts);
9299         }
9300         kfree(tr->topts);
9301
9302         free_cpumask_var(tr->tracing_cpumask);
9303         kfree(tr->name);
9304         kfree(tr);
9305
9306         return 0;
9307 }
9308
9309 int trace_array_destroy(struct trace_array *this_tr)
9310 {
9311         struct trace_array *tr;
9312         int ret;
9313
9314         if (!this_tr)
9315                 return -EINVAL;
9316
9317         mutex_lock(&event_mutex);
9318         mutex_lock(&trace_types_lock);
9319
9320         ret = -ENODEV;
9321
9322         /* Making sure trace array exists before destroying it. */
9323         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9324                 if (tr == this_tr) {
9325                         ret = __remove_instance(tr);
9326                         break;
9327                 }
9328         }
9329
9330         mutex_unlock(&trace_types_lock);
9331         mutex_unlock(&event_mutex);
9332
9333         return ret;
9334 }
9335 EXPORT_SYMBOL_GPL(trace_array_destroy);
9336
9337 static int instance_rmdir(const char *name)
9338 {
9339         struct trace_array *tr;
9340         int ret;
9341
9342         mutex_lock(&event_mutex);
9343         mutex_lock(&trace_types_lock);
9344
9345         ret = -ENODEV;
9346         tr = trace_array_find(name);
9347         if (tr)
9348                 ret = __remove_instance(tr);
9349
9350         mutex_unlock(&trace_types_lock);
9351         mutex_unlock(&event_mutex);
9352
9353         return ret;
9354 }
9355
9356 static __init void create_trace_instances(struct dentry *d_tracer)
9357 {
9358         struct trace_array *tr;
9359
9360         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9361                                                          instance_mkdir,
9362                                                          instance_rmdir);
9363         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9364                 return;
9365
9366         mutex_lock(&event_mutex);
9367         mutex_lock(&trace_types_lock);
9368
9369         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9370                 if (!tr->name)
9371                         continue;
9372                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9373                              "Failed to create instance directory\n"))
9374                         break;
9375         }
9376
9377         mutex_unlock(&trace_types_lock);
9378         mutex_unlock(&event_mutex);
9379 }
9380
9381 static void
9382 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9383 {
9384         struct trace_event_file *file;
9385         int cpu;
9386
9387         trace_create_file("available_tracers", 0444, d_tracer,
9388                         tr, &show_traces_fops);
9389
9390         trace_create_file("current_tracer", 0644, d_tracer,
9391                         tr, &set_tracer_fops);
9392
9393         trace_create_file("tracing_cpumask", 0644, d_tracer,
9394                           tr, &tracing_cpumask_fops);
9395
9396         trace_create_file("trace_options", 0644, d_tracer,
9397                           tr, &tracing_iter_fops);
9398
9399         trace_create_file("trace", 0644, d_tracer,
9400                           tr, &tracing_fops);
9401
9402         trace_create_file("trace_pipe", 0444, d_tracer,
9403                           tr, &tracing_pipe_fops);
9404
9405         trace_create_file("buffer_size_kb", 0644, d_tracer,
9406                           tr, &tracing_entries_fops);
9407
9408         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9409                           tr, &tracing_total_entries_fops);
9410
9411         trace_create_file("free_buffer", 0200, d_tracer,
9412                           tr, &tracing_free_buffer_fops);
9413
9414         trace_create_file("trace_marker", 0220, d_tracer,
9415                           tr, &tracing_mark_fops);
9416
9417         file = __find_event_file(tr, "ftrace", "print");
9418         if (file && file->dir)
9419                 trace_create_file("trigger", 0644, file->dir, file,
9420                                   &event_trigger_fops);
9421         tr->trace_marker_file = file;
9422
9423         trace_create_file("trace_marker_raw", 0220, d_tracer,
9424                           tr, &tracing_mark_raw_fops);
9425
9426         trace_create_file("trace_clock", 0644, d_tracer, tr,
9427                           &trace_clock_fops);
9428
9429         trace_create_file("tracing_on", 0644, d_tracer,
9430                           tr, &rb_simple_fops);
9431
9432         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9433                           &trace_time_stamp_mode_fops);
9434
9435         tr->buffer_percent = 50;
9436
9437         trace_create_file("buffer_percent", 0444, d_tracer,
9438                         tr, &buffer_percent_fops);
9439
9440         create_trace_options_dir(tr);
9441
9442 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9443         trace_create_maxlat_file(tr, d_tracer);
9444 #endif
9445
9446         if (ftrace_create_function_files(tr, d_tracer))
9447                 MEM_FAIL(1, "Could not allocate function filter files");
9448
9449 #ifdef CONFIG_TRACER_SNAPSHOT
9450         trace_create_file("snapshot", 0644, d_tracer,
9451                           tr, &snapshot_fops);
9452 #endif
9453
9454         trace_create_file("error_log", 0644, d_tracer,
9455                           tr, &tracing_err_log_fops);
9456
9457         for_each_tracing_cpu(cpu)
9458                 tracing_init_tracefs_percpu(tr, cpu);
9459
9460         ftrace_init_tracefs(tr, d_tracer);
9461 }
9462
9463 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9464 {
9465         struct vfsmount *mnt;
9466         struct file_system_type *type;
9467
9468         /*
9469          * To maintain backward compatibility for tools that mount
9470          * debugfs to get to the tracing facility, tracefs is automatically
9471          * mounted to the debugfs/tracing directory.
9472          */
9473         type = get_fs_type("tracefs");
9474         if (!type)
9475                 return NULL;
9476         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9477         put_filesystem(type);
9478         if (IS_ERR(mnt))
9479                 return NULL;
9480         mntget(mnt);
9481
9482         return mnt;
9483 }
9484
9485 /**
9486  * tracing_init_dentry - initialize top level trace array
9487  *
9488  * This is called when creating files or directories in the tracing
9489  * directory. It is called via fs_initcall() by any of the boot up code
9490  * and expects to return the dentry of the top level tracing directory.
9491  */
9492 int tracing_init_dentry(void)
9493 {
9494         struct trace_array *tr = &global_trace;
9495
9496         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9497                 pr_warn("Tracing disabled due to lockdown\n");
9498                 return -EPERM;
9499         }
9500
9501         /* The top level trace array uses  NULL as parent */
9502         if (tr->dir)
9503                 return 0;
9504
9505         if (WARN_ON(!tracefs_initialized()))
9506                 return -ENODEV;
9507
9508         /*
9509          * As there may still be users that expect the tracing
9510          * files to exist in debugfs/tracing, we must automount
9511          * the tracefs file system there, so older tools still
9512          * work with the newer kernel.
9513          */
9514         tr->dir = debugfs_create_automount("tracing", NULL,
9515                                            trace_automount, NULL);
9516
9517         return 0;
9518 }
9519
9520 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9521 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9522
9523 static struct workqueue_struct *eval_map_wq __initdata;
9524 static struct work_struct eval_map_work __initdata;
9525
9526 static void __init eval_map_work_func(struct work_struct *work)
9527 {
9528         int len;
9529
9530         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9531         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9532 }
9533
9534 static int __init trace_eval_init(void)
9535 {
9536         INIT_WORK(&eval_map_work, eval_map_work_func);
9537
9538         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9539         if (!eval_map_wq) {
9540                 pr_err("Unable to allocate eval_map_wq\n");
9541                 /* Do work here */
9542                 eval_map_work_func(&eval_map_work);
9543                 return -ENOMEM;
9544         }
9545
9546         queue_work(eval_map_wq, &eval_map_work);
9547         return 0;
9548 }
9549
9550 static int __init trace_eval_sync(void)
9551 {
9552         /* Make sure the eval map updates are finished */
9553         if (eval_map_wq)
9554                 destroy_workqueue(eval_map_wq);
9555         return 0;
9556 }
9557
9558 late_initcall_sync(trace_eval_sync);
9559
9560
9561 #ifdef CONFIG_MODULES
9562 static void trace_module_add_evals(struct module *mod)
9563 {
9564         if (!mod->num_trace_evals)
9565                 return;
9566
9567         /*
9568          * Modules with bad taint do not have events created, do
9569          * not bother with enums either.
9570          */
9571         if (trace_module_has_bad_taint(mod))
9572                 return;
9573
9574         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9575 }
9576
9577 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9578 static void trace_module_remove_evals(struct module *mod)
9579 {
9580         union trace_eval_map_item *map;
9581         union trace_eval_map_item **last = &trace_eval_maps;
9582
9583         if (!mod->num_trace_evals)
9584                 return;
9585
9586         mutex_lock(&trace_eval_mutex);
9587
9588         map = trace_eval_maps;
9589
9590         while (map) {
9591                 if (map->head.mod == mod)
9592                         break;
9593                 map = trace_eval_jmp_to_tail(map);
9594                 last = &map->tail.next;
9595                 map = map->tail.next;
9596         }
9597         if (!map)
9598                 goto out;
9599
9600         *last = trace_eval_jmp_to_tail(map)->tail.next;
9601         kfree(map);
9602  out:
9603         mutex_unlock(&trace_eval_mutex);
9604 }
9605 #else
9606 static inline void trace_module_remove_evals(struct module *mod) { }
9607 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9608
9609 static int trace_module_notify(struct notifier_block *self,
9610                                unsigned long val, void *data)
9611 {
9612         struct module *mod = data;
9613
9614         switch (val) {
9615         case MODULE_STATE_COMING:
9616                 trace_module_add_evals(mod);
9617                 break;
9618         case MODULE_STATE_GOING:
9619                 trace_module_remove_evals(mod);
9620                 break;
9621         }
9622
9623         return NOTIFY_OK;
9624 }
9625
9626 static struct notifier_block trace_module_nb = {
9627         .notifier_call = trace_module_notify,
9628         .priority = 0,
9629 };
9630 #endif /* CONFIG_MODULES */
9631
9632 static __init int tracer_init_tracefs(void)
9633 {
9634         int ret;
9635
9636         trace_access_lock_init();
9637
9638         ret = tracing_init_dentry();
9639         if (ret)
9640                 return 0;
9641
9642         event_trace_init();
9643
9644         init_tracer_tracefs(&global_trace, NULL);
9645         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9646
9647         trace_create_file("tracing_thresh", 0644, NULL,
9648                         &global_trace, &tracing_thresh_fops);
9649
9650         trace_create_file("README", 0444, NULL,
9651                         NULL, &tracing_readme_fops);
9652
9653         trace_create_file("saved_cmdlines", 0444, NULL,
9654                         NULL, &tracing_saved_cmdlines_fops);
9655
9656         trace_create_file("saved_cmdlines_size", 0644, NULL,
9657                           NULL, &tracing_saved_cmdlines_size_fops);
9658
9659         trace_create_file("saved_tgids", 0444, NULL,
9660                         NULL, &tracing_saved_tgids_fops);
9661
9662         trace_eval_init();
9663
9664         trace_create_eval_file(NULL);
9665
9666 #ifdef CONFIG_MODULES
9667         register_module_notifier(&trace_module_nb);
9668 #endif
9669
9670 #ifdef CONFIG_DYNAMIC_FTRACE
9671         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9672                         NULL, &tracing_dyn_info_fops);
9673 #endif
9674
9675         create_trace_instances(NULL);
9676
9677         update_tracer_options(&global_trace);
9678
9679         return 0;
9680 }
9681
9682 fs_initcall(tracer_init_tracefs);
9683
9684 static int trace_panic_handler(struct notifier_block *this,
9685                                unsigned long event, void *unused)
9686 {
9687         if (ftrace_dump_on_oops)
9688                 ftrace_dump(ftrace_dump_on_oops);
9689         return NOTIFY_OK;
9690 }
9691
9692 static struct notifier_block trace_panic_notifier = {
9693         .notifier_call  = trace_panic_handler,
9694         .next           = NULL,
9695         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9696 };
9697
9698 static int trace_die_handler(struct notifier_block *self,
9699                              unsigned long val,
9700                              void *data)
9701 {
9702         switch (val) {
9703         case DIE_OOPS:
9704                 if (ftrace_dump_on_oops)
9705                         ftrace_dump(ftrace_dump_on_oops);
9706                 break;
9707         default:
9708                 break;
9709         }
9710         return NOTIFY_OK;
9711 }
9712
9713 static struct notifier_block trace_die_notifier = {
9714         .notifier_call = trace_die_handler,
9715         .priority = 200
9716 };
9717
9718 /*
9719  * printk is set to max of 1024, we really don't need it that big.
9720  * Nothing should be printing 1000 characters anyway.
9721  */
9722 #define TRACE_MAX_PRINT         1000
9723
9724 /*
9725  * Define here KERN_TRACE so that we have one place to modify
9726  * it if we decide to change what log level the ftrace dump
9727  * should be at.
9728  */
9729 #define KERN_TRACE              KERN_EMERG
9730
9731 void
9732 trace_printk_seq(struct trace_seq *s)
9733 {
9734         /* Probably should print a warning here. */
9735         if (s->seq.len >= TRACE_MAX_PRINT)
9736                 s->seq.len = TRACE_MAX_PRINT;
9737
9738         /*
9739          * More paranoid code. Although the buffer size is set to
9740          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9741          * an extra layer of protection.
9742          */
9743         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9744                 s->seq.len = s->seq.size - 1;
9745
9746         /* should be zero ended, but we are paranoid. */
9747         s->buffer[s->seq.len] = 0;
9748
9749         printk(KERN_TRACE "%s", s->buffer);
9750
9751         trace_seq_init(s);
9752 }
9753
9754 void trace_init_global_iter(struct trace_iterator *iter)
9755 {
9756         iter->tr = &global_trace;
9757         iter->trace = iter->tr->current_trace;
9758         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9759         iter->array_buffer = &global_trace.array_buffer;
9760
9761         if (iter->trace && iter->trace->open)
9762                 iter->trace->open(iter);
9763
9764         /* Annotate start of buffers if we had overruns */
9765         if (ring_buffer_overruns(iter->array_buffer->buffer))
9766                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9767
9768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9769         if (trace_clocks[iter->tr->clock_id].in_ns)
9770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9771 }
9772
9773 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9774 {
9775         /* use static because iter can be a bit big for the stack */
9776         static struct trace_iterator iter;
9777         static atomic_t dump_running;
9778         struct trace_array *tr = &global_trace;
9779         unsigned int old_userobj;
9780         unsigned long flags;
9781         int cnt = 0, cpu;
9782
9783         /* Only allow one dump user at a time. */
9784         if (atomic_inc_return(&dump_running) != 1) {
9785                 atomic_dec(&dump_running);
9786                 return;
9787         }
9788
9789         /*
9790          * Always turn off tracing when we dump.
9791          * We don't need to show trace output of what happens
9792          * between multiple crashes.
9793          *
9794          * If the user does a sysrq-z, then they can re-enable
9795          * tracing with echo 1 > tracing_on.
9796          */
9797         tracing_off();
9798
9799         local_irq_save(flags);
9800         printk_nmi_direct_enter();
9801
9802         /* Simulate the iterator */
9803         trace_init_global_iter(&iter);
9804         /* Can not use kmalloc for iter.temp and iter.fmt */
9805         iter.temp = static_temp_buf;
9806         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9807         iter.fmt = static_fmt_buf;
9808         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9809
9810         for_each_tracing_cpu(cpu) {
9811                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9812         }
9813
9814         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9815
9816         /* don't look at user memory in panic mode */
9817         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9818
9819         switch (oops_dump_mode) {
9820         case DUMP_ALL:
9821                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9822                 break;
9823         case DUMP_ORIG:
9824                 iter.cpu_file = raw_smp_processor_id();
9825                 break;
9826         case DUMP_NONE:
9827                 goto out_enable;
9828         default:
9829                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9830                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9831         }
9832
9833         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9834
9835         /* Did function tracer already get disabled? */
9836         if (ftrace_is_dead()) {
9837                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9838                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9839         }
9840
9841         /*
9842          * We need to stop all tracing on all CPUS to read
9843          * the next buffer. This is a bit expensive, but is
9844          * not done often. We fill all what we can read,
9845          * and then release the locks again.
9846          */
9847
9848         while (!trace_empty(&iter)) {
9849
9850                 if (!cnt)
9851                         printk(KERN_TRACE "---------------------------------\n");
9852
9853                 cnt++;
9854
9855                 trace_iterator_reset(&iter);
9856                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9857
9858                 if (trace_find_next_entry_inc(&iter) != NULL) {
9859                         int ret;
9860
9861                         ret = print_trace_line(&iter);
9862                         if (ret != TRACE_TYPE_NO_CONSUME)
9863                                 trace_consume(&iter);
9864                 }
9865                 touch_nmi_watchdog();
9866
9867                 trace_printk_seq(&iter.seq);
9868         }
9869
9870         if (!cnt)
9871                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9872         else
9873                 printk(KERN_TRACE "---------------------------------\n");
9874
9875  out_enable:
9876         tr->trace_flags |= old_userobj;
9877
9878         for_each_tracing_cpu(cpu) {
9879                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9880         }
9881         atomic_dec(&dump_running);
9882         printk_nmi_direct_exit();
9883         local_irq_restore(flags);
9884 }
9885 EXPORT_SYMBOL_GPL(ftrace_dump);
9886
9887 #define WRITE_BUFSIZE  4096
9888
9889 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9890                                 size_t count, loff_t *ppos,
9891                                 int (*createfn)(const char *))
9892 {
9893         char *kbuf, *buf, *tmp;
9894         int ret = 0;
9895         size_t done = 0;
9896         size_t size;
9897
9898         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9899         if (!kbuf)
9900                 return -ENOMEM;
9901
9902         while (done < count) {
9903                 size = count - done;
9904
9905                 if (size >= WRITE_BUFSIZE)
9906                         size = WRITE_BUFSIZE - 1;
9907
9908                 if (copy_from_user(kbuf, buffer + done, size)) {
9909                         ret = -EFAULT;
9910                         goto out;
9911                 }
9912                 kbuf[size] = '\0';
9913                 buf = kbuf;
9914                 do {
9915                         tmp = strchr(buf, '\n');
9916                         if (tmp) {
9917                                 *tmp = '\0';
9918                                 size = tmp - buf + 1;
9919                         } else {
9920                                 size = strlen(buf);
9921                                 if (done + size < count) {
9922                                         if (buf != kbuf)
9923                                                 break;
9924                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9925                                         pr_warn("Line length is too long: Should be less than %d\n",
9926                                                 WRITE_BUFSIZE - 2);
9927                                         ret = -EINVAL;
9928                                         goto out;
9929                                 }
9930                         }
9931                         done += size;
9932
9933                         /* Remove comments */
9934                         tmp = strchr(buf, '#');
9935
9936                         if (tmp)
9937                                 *tmp = '\0';
9938
9939                         ret = createfn(buf);
9940                         if (ret)
9941                                 goto out;
9942                         buf += size;
9943
9944                 } while (done < count);
9945         }
9946         ret = done;
9947
9948 out:
9949         kfree(kbuf);
9950
9951         return ret;
9952 }
9953
9954 __init static int tracer_alloc_buffers(void)
9955 {
9956         int ring_buf_size;
9957         int ret = -ENOMEM;
9958
9959
9960         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9961                 pr_warn("Tracing disabled due to lockdown\n");
9962                 return -EPERM;
9963         }
9964
9965         /*
9966          * Make sure we don't accidentally add more trace options
9967          * than we have bits for.
9968          */
9969         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9970
9971         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9972                 goto out;
9973
9974         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9975                 goto out_free_buffer_mask;
9976
9977         /* Only allocate trace_printk buffers if a trace_printk exists */
9978         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9979                 /* Must be called before global_trace.buffer is allocated */
9980                 trace_printk_init_buffers();
9981
9982         /* To save memory, keep the ring buffer size to its minimum */
9983         if (ring_buffer_expanded)
9984                 ring_buf_size = trace_buf_size;
9985         else
9986                 ring_buf_size = 1;
9987
9988         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9989         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9990
9991         raw_spin_lock_init(&global_trace.start_lock);
9992
9993         /*
9994          * The prepare callbacks allocates some memory for the ring buffer. We
9995          * don't free the buffer if the CPU goes down. If we were to free
9996          * the buffer, then the user would lose any trace that was in the
9997          * buffer. The memory will be removed once the "instance" is removed.
9998          */
9999         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10000                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10001                                       NULL);
10002         if (ret < 0)
10003                 goto out_free_cpumask;
10004         /* Used for event triggers */
10005         ret = -ENOMEM;
10006         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10007         if (!temp_buffer)
10008                 goto out_rm_hp_state;
10009
10010         if (trace_create_savedcmd() < 0)
10011                 goto out_free_temp_buffer;
10012
10013         /* TODO: make the number of buffers hot pluggable with CPUS */
10014         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10015                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10016                 goto out_free_savedcmd;
10017         }
10018
10019         if (global_trace.buffer_disabled)
10020                 tracing_off();
10021
10022         if (trace_boot_clock) {
10023                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10024                 if (ret < 0)
10025                         pr_warn("Trace clock %s not defined, going back to default\n",
10026                                 trace_boot_clock);
10027         }
10028
10029         /*
10030          * register_tracer() might reference current_trace, so it
10031          * needs to be set before we register anything. This is
10032          * just a bootstrap of current_trace anyway.
10033          */
10034         global_trace.current_trace = &nop_trace;
10035
10036         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10037
10038         ftrace_init_global_array_ops(&global_trace);
10039
10040         init_trace_flags_index(&global_trace);
10041
10042         register_tracer(&nop_trace);
10043
10044         /* Function tracing may start here (via kernel command line) */
10045         init_function_trace();
10046
10047         /* All seems OK, enable tracing */
10048         tracing_disabled = 0;
10049
10050         atomic_notifier_chain_register(&panic_notifier_list,
10051                                        &trace_panic_notifier);
10052
10053         register_die_notifier(&trace_die_notifier);
10054
10055         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10056
10057         INIT_LIST_HEAD(&global_trace.systems);
10058         INIT_LIST_HEAD(&global_trace.events);
10059         INIT_LIST_HEAD(&global_trace.hist_vars);
10060         INIT_LIST_HEAD(&global_trace.err_log);
10061         list_add(&global_trace.list, &ftrace_trace_arrays);
10062
10063         apply_trace_boot_options();
10064
10065         register_snapshot_cmd();
10066
10067         test_can_verify();
10068
10069         return 0;
10070
10071 out_free_savedcmd:
10072         free_saved_cmdlines_buffer(savedcmd);
10073 out_free_temp_buffer:
10074         ring_buffer_free(temp_buffer);
10075 out_rm_hp_state:
10076         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10077 out_free_cpumask:
10078         free_cpumask_var(global_trace.tracing_cpumask);
10079 out_free_buffer_mask:
10080         free_cpumask_var(tracing_buffer_mask);
10081 out:
10082         return ret;
10083 }
10084
10085 void __init early_trace_init(void)
10086 {
10087         if (tracepoint_printk) {
10088                 tracepoint_print_iter =
10089                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10090                 if (MEM_FAIL(!tracepoint_print_iter,
10091                              "Failed to allocate trace iterator\n"))
10092                         tracepoint_printk = 0;
10093                 else
10094                         static_key_enable(&tracepoint_printk_key.key);
10095         }
10096         tracer_alloc_buffers();
10097 }
10098
10099 void __init trace_init(void)
10100 {
10101         trace_event_init();
10102 }
10103
10104 __init static void clear_boot_tracer(void)
10105 {
10106         /*
10107          * The default tracer at boot buffer is an init section.
10108          * This function is called in lateinit. If we did not
10109          * find the boot tracer, then clear it out, to prevent
10110          * later registration from accessing the buffer that is
10111          * about to be freed.
10112          */
10113         if (!default_bootup_tracer)
10114                 return;
10115
10116         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10117                default_bootup_tracer);
10118         default_bootup_tracer = NULL;
10119 }
10120
10121 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10122 __init static void tracing_set_default_clock(void)
10123 {
10124         /* sched_clock_stable() is determined in late_initcall */
10125         if (!trace_boot_clock && !sched_clock_stable()) {
10126                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10127                         pr_warn("Can not set tracing clock due to lockdown\n");
10128                         return;
10129                 }
10130
10131                 printk(KERN_WARNING
10132                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10133                        "If you want to keep using the local clock, then add:\n"
10134                        "  \"trace_clock=local\"\n"
10135                        "on the kernel command line\n");
10136                 tracing_set_clock(&global_trace, "global");
10137         }
10138 }
10139 #else
10140 static inline void tracing_set_default_clock(void) { }
10141 #endif
10142
10143 __init static int late_trace_init(void)
10144 {
10145         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10146                 static_key_disable(&tracepoint_printk_key.key);
10147                 tracepoint_printk = 0;
10148         }
10149
10150         tracing_set_default_clock();
10151         clear_boot_tracer();
10152         return 0;
10153 }
10154
10155 late_initcall_sync(late_trace_init);