Merge tag 'trace-v5.16-6' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525         return trace_pid_list_is_set(filtered_pids, search_pid);
526 }
527
528 /**
529  * trace_ignore_this_task - should a task be ignored for tracing
530  * @filtered_pids: The list of pids to check
531  * @filtered_no_pids: The list of pids not to be traced
532  * @task: The task that should be ignored if not filtered
533  *
534  * Checks if @task should be traced or not from @filtered_pids.
535  * Returns true if @task should *NOT* be traced.
536  * Returns false if @task should be traced.
537  */
538 bool
539 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
540                        struct trace_pid_list *filtered_no_pids,
541                        struct task_struct *task)
542 {
543         /*
544          * If filtered_no_pids is not empty, and the task's pid is listed
545          * in filtered_no_pids, then return true.
546          * Otherwise, if filtered_pids is empty, that means we can
547          * trace all tasks. If it has content, then only trace pids
548          * within filtered_pids.
549          */
550
551         return (filtered_pids &&
552                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
553                 (filtered_no_pids &&
554                  trace_find_filtered_pid(filtered_no_pids, task->pid));
555 }
556
557 /**
558  * trace_filter_add_remove_task - Add or remove a task from a pid_list
559  * @pid_list: The list to modify
560  * @self: The current task for fork or NULL for exit
561  * @task: The task to add or remove
562  *
563  * If adding a task, if @self is defined, the task is only added if @self
564  * is also included in @pid_list. This happens on fork and tasks should
565  * only be added when the parent is listed. If @self is NULL, then the
566  * @task pid will be removed from the list, which would happen on exit
567  * of a task.
568  */
569 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
570                                   struct task_struct *self,
571                                   struct task_struct *task)
572 {
573         if (!pid_list)
574                 return;
575
576         /* For forks, we only add if the forking task is listed */
577         if (self) {
578                 if (!trace_find_filtered_pid(pid_list, self->pid))
579                         return;
580         }
581
582         /* "self" is set for forks, and NULL for exits */
583         if (self)
584                 trace_pid_list_set(pid_list, task->pid);
585         else
586                 trace_pid_list_clear(pid_list, task->pid);
587 }
588
589 /**
590  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
591  * @pid_list: The pid list to show
592  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
593  * @pos: The position of the file
594  *
595  * This is used by the seq_file "next" operation to iterate the pids
596  * listed in a trace_pid_list structure.
597  *
598  * Returns the pid+1 as we want to display pid of zero, but NULL would
599  * stop the iteration.
600  */
601 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
602 {
603         long pid = (unsigned long)v;
604         unsigned int next;
605
606         (*pos)++;
607
608         /* pid already is +1 of the actual previous bit */
609         if (trace_pid_list_next(pid_list, pid, &next) < 0)
610                 return NULL;
611
612         pid = next;
613
614         /* Return pid + 1 to allow zero to be represented */
615         return (void *)(pid + 1);
616 }
617
618 /**
619  * trace_pid_start - Used for seq_file to start reading pid lists
620  * @pid_list: The pid list to show
621  * @pos: The position of the file
622  *
623  * This is used by seq_file "start" operation to start the iteration
624  * of listing pids.
625  *
626  * Returns the pid+1 as we want to display pid of zero, but NULL would
627  * stop the iteration.
628  */
629 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
630 {
631         unsigned long pid;
632         unsigned int first;
633         loff_t l = 0;
634
635         if (trace_pid_list_first(pid_list, &first) < 0)
636                 return NULL;
637
638         pid = first;
639
640         /* Return pid + 1 so that zero can be the exit value */
641         for (pid++; pid && l < *pos;
642              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
643                 ;
644         return (void *)pid;
645 }
646
647 /**
648  * trace_pid_show - show the current pid in seq_file processing
649  * @m: The seq_file structure to write into
650  * @v: A void pointer of the pid (+1) value to display
651  *
652  * Can be directly used by seq_file operations to display the current
653  * pid value.
654  */
655 int trace_pid_show(struct seq_file *m, void *v)
656 {
657         unsigned long pid = (unsigned long)v - 1;
658
659         seq_printf(m, "%lu\n", pid);
660         return 0;
661 }
662
663 /* 128 should be much more than enough */
664 #define PID_BUF_SIZE            127
665
666 int trace_pid_write(struct trace_pid_list *filtered_pids,
667                     struct trace_pid_list **new_pid_list,
668                     const char __user *ubuf, size_t cnt)
669 {
670         struct trace_pid_list *pid_list;
671         struct trace_parser parser;
672         unsigned long val;
673         int nr_pids = 0;
674         ssize_t read = 0;
675         ssize_t ret;
676         loff_t pos;
677         pid_t pid;
678
679         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
680                 return -ENOMEM;
681
682         /*
683          * Always recreate a new array. The write is an all or nothing
684          * operation. Always create a new array when adding new pids by
685          * the user. If the operation fails, then the current list is
686          * not modified.
687          */
688         pid_list = trace_pid_list_alloc();
689         if (!pid_list) {
690                 trace_parser_put(&parser);
691                 return -ENOMEM;
692         }
693
694         if (filtered_pids) {
695                 /* copy the current bits to the new max */
696                 ret = trace_pid_list_first(filtered_pids, &pid);
697                 while (!ret) {
698                         trace_pid_list_set(pid_list, pid);
699                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
700                         nr_pids++;
701                 }
702         }
703
704         ret = 0;
705         while (cnt > 0) {
706
707                 pos = 0;
708
709                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
710                 if (ret < 0 || !trace_parser_loaded(&parser))
711                         break;
712
713                 read += ret;
714                 ubuf += ret;
715                 cnt -= ret;
716
717                 ret = -EINVAL;
718                 if (kstrtoul(parser.buffer, 0, &val))
719                         break;
720
721                 pid = (pid_t)val;
722
723                 if (trace_pid_list_set(pid_list, pid) < 0) {
724                         ret = -1;
725                         break;
726                 }
727                 nr_pids++;
728
729                 trace_parser_clear(&parser);
730                 ret = 0;
731         }
732         trace_parser_put(&parser);
733
734         if (ret < 0) {
735                 trace_pid_list_free(pid_list);
736                 return ret;
737         }
738
739         if (!nr_pids) {
740                 /* Cleared the list of pids */
741                 trace_pid_list_free(pid_list);
742                 read = ret;
743                 pid_list = NULL;
744         }
745
746         *new_pid_list = pid_list;
747
748         return read;
749 }
750
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753         u64 ts;
754
755         /* Early boot up does not have a buffer yet */
756         if (!buf->buffer)
757                 return trace_clock_local();
758
759         ts = ring_buffer_time_stamp(buf->buffer);
760         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761
762         return ts;
763 }
764
765 u64 ftrace_now(int cpu)
766 {
767         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769
770 /**
771  * tracing_is_enabled - Show if global_trace has been enabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781         /*
782          * For quick access (irqsoff uses this in fast path), just
783          * return the mirror variable of the state of the ring buffer.
784          * It's a little racy, but we don't really care.
785          */
786         smp_rmb();
787         return !global_trace.buffer_disabled;
788 }
789
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
801
802 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer            *trace_types __read_mostly;
806
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewritten
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837
838 static inline void trace_access_lock(int cpu)
839 {
840         if (cpu == RING_BUFFER_ALL_CPUS) {
841                 /* gain it for accessing the whole ring buffer. */
842                 down_write(&all_cpu_access_lock);
843         } else {
844                 /* gain it for accessing a cpu ring buffer. */
845
846                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847                 down_read(&all_cpu_access_lock);
848
849                 /* Secondly block other access to this @cpu ring buffer. */
850                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
851         }
852 }
853
854 static inline void trace_access_unlock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 up_write(&all_cpu_access_lock);
858         } else {
859                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860                 up_read(&all_cpu_access_lock);
861         }
862 }
863
864 static inline void trace_access_lock_init(void)
865 {
866         int cpu;
867
868         for_each_possible_cpu(cpu)
869                 mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871
872 #else
873
874 static DEFINE_MUTEX(access_lock);
875
876 static inline void trace_access_lock(int cpu)
877 {
878         (void)cpu;
879         mutex_lock(&access_lock);
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884         (void)cpu;
885         mutex_unlock(&access_lock);
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890 }
891
892 #endif
893
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896                                  unsigned int trace_ctx,
897                                  int skip, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899                                       struct trace_buffer *buffer,
900                                       unsigned int trace_ctx,
901                                       int skip, struct pt_regs *regs);
902
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905                                         unsigned int trace_ctx,
906                                         int skip, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910                                       struct trace_buffer *buffer,
911                                       unsigned long trace_ctx,
912                                       int skip, struct pt_regs *regs)
913 {
914 }
915
916 #endif
917
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920                   int type, unsigned int trace_ctx)
921 {
922         struct trace_entry *ent = ring_buffer_event_data(event);
923
924         tracing_generic_entry_update(ent, type, trace_ctx);
925 }
926
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929                           int type,
930                           unsigned long len,
931                           unsigned int trace_ctx)
932 {
933         struct ring_buffer_event *event;
934
935         event = ring_buffer_lock_reserve(buffer, len);
936         if (event != NULL)
937                 trace_event_setup(event, type, trace_ctx);
938
939         return event;
940 }
941
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944         if (tr->array_buffer.buffer)
945                 ring_buffer_record_on(tr->array_buffer.buffer);
946         /*
947          * This flag is looked at when buffers haven't been allocated
948          * yet, or by some tracers (like irqsoff), that just want to
949          * know if the ring buffer has been disabled, but it can handle
950          * races of where it gets disabled but we still do a record.
951          * As the check is in the fast path of the tracers, it is more
952          * important to be fast than accurate.
953          */
954         tr->buffer_disabled = 0;
955         /* Make the flag seen by readers */
956         smp_wmb();
957 }
958
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967         tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970
971
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975         __this_cpu_write(trace_taskinfo_save, true);
976
977         /* If this is the temp buffer, we need to commit fully */
978         if (this_cpu_read(trace_buffered_event) == event) {
979                 /* Length is in event->array[0] */
980                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
981                 /* Release the temp buffer */
982                 this_cpu_dec(trace_buffered_event_cnt);
983         } else
984                 ring_buffer_unlock_commit(buffer, event);
985 }
986
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:    The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995         struct ring_buffer_event *event;
996         struct trace_buffer *buffer;
997         struct print_entry *entry;
998         unsigned int trace_ctx;
999         int alloc;
1000
1001         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1002                 return 0;
1003
1004         if (unlikely(tracing_selftest_running || tracing_disabled))
1005                 return 0;
1006
1007         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1008
1009         trace_ctx = tracing_gen_ctx();
1010         buffer = global_trace.array_buffer.buffer;
1011         ring_buffer_nest_start(buffer);
1012         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1013                                             trace_ctx);
1014         if (!event) {
1015                 size = 0;
1016                 goto out;
1017         }
1018
1019         entry = ring_buffer_event_data(event);
1020         entry->ip = ip;
1021
1022         memcpy(&entry->buf, str, size);
1023
1024         /* Add a newline if necessary */
1025         if (entry->buf[size - 1] != '\n') {
1026                 entry->buf[size] = '\n';
1027                 entry->buf[size + 1] = '\0';
1028         } else
1029                 entry->buf[size] = '\0';
1030
1031         __buffer_unlock_commit(buffer, event);
1032         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1033  out:
1034         ring_buffer_nest_end(buffer);
1035         return size;
1036 }
1037 EXPORT_SYMBOL_GPL(__trace_puts);
1038
1039 /**
1040  * __trace_bputs - write the pointer to a constant string into trace buffer
1041  * @ip:    The address of the caller
1042  * @str:   The constant string to write to the buffer to
1043  */
1044 int __trace_bputs(unsigned long ip, const char *str)
1045 {
1046         struct ring_buffer_event *event;
1047         struct trace_buffer *buffer;
1048         struct bputs_entry *entry;
1049         unsigned int trace_ctx;
1050         int size = sizeof(struct bputs_entry);
1051         int ret = 0;
1052
1053         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1054                 return 0;
1055
1056         if (unlikely(tracing_selftest_running || tracing_disabled))
1057                 return 0;
1058
1059         trace_ctx = tracing_gen_ctx();
1060         buffer = global_trace.array_buffer.buffer;
1061
1062         ring_buffer_nest_start(buffer);
1063         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1064                                             trace_ctx);
1065         if (!event)
1066                 goto out;
1067
1068         entry = ring_buffer_event_data(event);
1069         entry->ip                       = ip;
1070         entry->str                      = str;
1071
1072         __buffer_unlock_commit(buffer, event);
1073         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1074
1075         ret = 1;
1076  out:
1077         ring_buffer_nest_end(buffer);
1078         return ret;
1079 }
1080 EXPORT_SYMBOL_GPL(__trace_bputs);
1081
1082 #ifdef CONFIG_TRACER_SNAPSHOT
1083 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1084                                            void *cond_data)
1085 {
1086         struct tracer *tracer = tr->current_trace;
1087         unsigned long flags;
1088
1089         if (in_nmi()) {
1090                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1091                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1092                 return;
1093         }
1094
1095         if (!tr->allocated_snapshot) {
1096                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1097                 internal_trace_puts("*** stopping trace here!   ***\n");
1098                 tracing_off();
1099                 return;
1100         }
1101
1102         /* Note, snapshot can not be used when the tracer uses it */
1103         if (tracer->use_max_tr) {
1104                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1105                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1106                 return;
1107         }
1108
1109         local_irq_save(flags);
1110         update_max_tr(tr, current, smp_processor_id(), cond_data);
1111         local_irq_restore(flags);
1112 }
1113
1114 void tracing_snapshot_instance(struct trace_array *tr)
1115 {
1116         tracing_snapshot_instance_cond(tr, NULL);
1117 }
1118
1119 /**
1120  * tracing_snapshot - take a snapshot of the current buffer.
1121  *
1122  * This causes a swap between the snapshot buffer and the current live
1123  * tracing buffer. You can use this to take snapshots of the live
1124  * trace when some condition is triggered, but continue to trace.
1125  *
1126  * Note, make sure to allocate the snapshot with either
1127  * a tracing_snapshot_alloc(), or by doing it manually
1128  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1129  *
1130  * If the snapshot buffer is not allocated, it will stop tracing.
1131  * Basically making a permanent snapshot.
1132  */
1133 void tracing_snapshot(void)
1134 {
1135         struct trace_array *tr = &global_trace;
1136
1137         tracing_snapshot_instance(tr);
1138 }
1139 EXPORT_SYMBOL_GPL(tracing_snapshot);
1140
1141 /**
1142  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1143  * @tr:         The tracing instance to snapshot
1144  * @cond_data:  The data to be tested conditionally, and possibly saved
1145  *
1146  * This is the same as tracing_snapshot() except that the snapshot is
1147  * conditional - the snapshot will only happen if the
1148  * cond_snapshot.update() implementation receiving the cond_data
1149  * returns true, which means that the trace array's cond_snapshot
1150  * update() operation used the cond_data to determine whether the
1151  * snapshot should be taken, and if it was, presumably saved it along
1152  * with the snapshot.
1153  */
1154 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1155 {
1156         tracing_snapshot_instance_cond(tr, cond_data);
1157 }
1158 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1159
1160 /**
1161  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1162  * @tr:         The tracing instance
1163  *
1164  * When the user enables a conditional snapshot using
1165  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1166  * with the snapshot.  This accessor is used to retrieve it.
1167  *
1168  * Should not be called from cond_snapshot.update(), since it takes
1169  * the tr->max_lock lock, which the code calling
1170  * cond_snapshot.update() has already done.
1171  *
1172  * Returns the cond_data associated with the trace array's snapshot.
1173  */
1174 void *tracing_cond_snapshot_data(struct trace_array *tr)
1175 {
1176         void *cond_data = NULL;
1177
1178         arch_spin_lock(&tr->max_lock);
1179
1180         if (tr->cond_snapshot)
1181                 cond_data = tr->cond_snapshot->cond_data;
1182
1183         arch_spin_unlock(&tr->max_lock);
1184
1185         return cond_data;
1186 }
1187 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1188
1189 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1190                                         struct array_buffer *size_buf, int cpu_id);
1191 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1192
1193 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1194 {
1195         int ret;
1196
1197         if (!tr->allocated_snapshot) {
1198
1199                 /* allocate spare buffer */
1200                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1201                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1202                 if (ret < 0)
1203                         return ret;
1204
1205                 tr->allocated_snapshot = true;
1206         }
1207
1208         return 0;
1209 }
1210
1211 static void free_snapshot(struct trace_array *tr)
1212 {
1213         /*
1214          * We don't free the ring buffer. instead, resize it because
1215          * The max_tr ring buffer has some state (e.g. ring->clock) and
1216          * we want preserve it.
1217          */
1218         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1219         set_buffer_entries(&tr->max_buffer, 1);
1220         tracing_reset_online_cpus(&tr->max_buffer);
1221         tr->allocated_snapshot = false;
1222 }
1223
1224 /**
1225  * tracing_alloc_snapshot - allocate snapshot buffer.
1226  *
1227  * This only allocates the snapshot buffer if it isn't already
1228  * allocated - it doesn't also take a snapshot.
1229  *
1230  * This is meant to be used in cases where the snapshot buffer needs
1231  * to be set up for events that can't sleep but need to be able to
1232  * trigger a snapshot.
1233  */
1234 int tracing_alloc_snapshot(void)
1235 {
1236         struct trace_array *tr = &global_trace;
1237         int ret;
1238
1239         ret = tracing_alloc_snapshot_instance(tr);
1240         WARN_ON(ret < 0);
1241
1242         return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245
1246 /**
1247  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1248  *
1249  * This is similar to tracing_snapshot(), but it will allocate the
1250  * snapshot buffer if it isn't already allocated. Use this only
1251  * where it is safe to sleep, as the allocation may sleep.
1252  *
1253  * This causes a swap between the snapshot buffer and the current live
1254  * tracing buffer. You can use this to take snapshots of the live
1255  * trace when some condition is triggered, but continue to trace.
1256  */
1257 void tracing_snapshot_alloc(void)
1258 {
1259         int ret;
1260
1261         ret = tracing_alloc_snapshot();
1262         if (ret < 0)
1263                 return;
1264
1265         tracing_snapshot();
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1268
1269 /**
1270  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1271  * @tr:         The tracing instance
1272  * @cond_data:  User data to associate with the snapshot
1273  * @update:     Implementation of the cond_snapshot update function
1274  *
1275  * Check whether the conditional snapshot for the given instance has
1276  * already been enabled, or if the current tracer is already using a
1277  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1278  * save the cond_data and update function inside.
1279  *
1280  * Returns 0 if successful, error otherwise.
1281  */
1282 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1283                                  cond_update_fn_t update)
1284 {
1285         struct cond_snapshot *cond_snapshot;
1286         int ret = 0;
1287
1288         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1289         if (!cond_snapshot)
1290                 return -ENOMEM;
1291
1292         cond_snapshot->cond_data = cond_data;
1293         cond_snapshot->update = update;
1294
1295         mutex_lock(&trace_types_lock);
1296
1297         ret = tracing_alloc_snapshot_instance(tr);
1298         if (ret)
1299                 goto fail_unlock;
1300
1301         if (tr->current_trace->use_max_tr) {
1302                 ret = -EBUSY;
1303                 goto fail_unlock;
1304         }
1305
1306         /*
1307          * The cond_snapshot can only change to NULL without the
1308          * trace_types_lock. We don't care if we race with it going
1309          * to NULL, but we want to make sure that it's not set to
1310          * something other than NULL when we get here, which we can
1311          * do safely with only holding the trace_types_lock and not
1312          * having to take the max_lock.
1313          */
1314         if (tr->cond_snapshot) {
1315                 ret = -EBUSY;
1316                 goto fail_unlock;
1317         }
1318
1319         arch_spin_lock(&tr->max_lock);
1320         tr->cond_snapshot = cond_snapshot;
1321         arch_spin_unlock(&tr->max_lock);
1322
1323         mutex_unlock(&trace_types_lock);
1324
1325         return ret;
1326
1327  fail_unlock:
1328         mutex_unlock(&trace_types_lock);
1329         kfree(cond_snapshot);
1330         return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1333
1334 /**
1335  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1336  * @tr:         The tracing instance
1337  *
1338  * Check whether the conditional snapshot for the given instance is
1339  * enabled; if so, free the cond_snapshot associated with it,
1340  * otherwise return -EINVAL.
1341  *
1342  * Returns 0 if successful, error otherwise.
1343  */
1344 int tracing_snapshot_cond_disable(struct trace_array *tr)
1345 {
1346         int ret = 0;
1347
1348         arch_spin_lock(&tr->max_lock);
1349
1350         if (!tr->cond_snapshot)
1351                 ret = -EINVAL;
1352         else {
1353                 kfree(tr->cond_snapshot);
1354                 tr->cond_snapshot = NULL;
1355         }
1356
1357         arch_spin_unlock(&tr->max_lock);
1358
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1362 #else
1363 void tracing_snapshot(void)
1364 {
1365         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot);
1368 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1369 {
1370         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1371 }
1372 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1373 int tracing_alloc_snapshot(void)
1374 {
1375         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1376         return -ENODEV;
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1379 void tracing_snapshot_alloc(void)
1380 {
1381         /* Give warning */
1382         tracing_snapshot();
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1385 void *tracing_cond_snapshot_data(struct trace_array *tr)
1386 {
1387         return NULL;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1390 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1391 {
1392         return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1395 int tracing_snapshot_cond_disable(struct trace_array *tr)
1396 {
1397         return false;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1400 #endif /* CONFIG_TRACER_SNAPSHOT */
1401
1402 void tracer_tracing_off(struct trace_array *tr)
1403 {
1404         if (tr->array_buffer.buffer)
1405                 ring_buffer_record_off(tr->array_buffer.buffer);
1406         /*
1407          * This flag is looked at when buffers haven't been allocated
1408          * yet, or by some tracers (like irqsoff), that just want to
1409          * know if the ring buffer has been disabled, but it can handle
1410          * races of where it gets disabled but we still do a record.
1411          * As the check is in the fast path of the tracers, it is more
1412          * important to be fast than accurate.
1413          */
1414         tr->buffer_disabled = 1;
1415         /* Make the flag seen by readers */
1416         smp_wmb();
1417 }
1418
1419 /**
1420  * tracing_off - turn off tracing buffers
1421  *
1422  * This function stops the tracing buffers from recording data.
1423  * It does not disable any overhead the tracers themselves may
1424  * be causing. This function simply causes all recording to
1425  * the ring buffers to fail.
1426  */
1427 void tracing_off(void)
1428 {
1429         tracer_tracing_off(&global_trace);
1430 }
1431 EXPORT_SYMBOL_GPL(tracing_off);
1432
1433 void disable_trace_on_warning(void)
1434 {
1435         if (__disable_trace_on_warning) {
1436                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1437                         "Disabling tracing due to warning\n");
1438                 tracing_off();
1439         }
1440 }
1441
1442 /**
1443  * tracer_tracing_is_on - show real state of ring buffer enabled
1444  * @tr : the trace array to know if ring buffer is enabled
1445  *
1446  * Shows real state of the ring buffer if it is enabled or not.
1447  */
1448 bool tracer_tracing_is_on(struct trace_array *tr)
1449 {
1450         if (tr->array_buffer.buffer)
1451                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1452         return !tr->buffer_disabled;
1453 }
1454
1455 /**
1456  * tracing_is_on - show state of ring buffers enabled
1457  */
1458 int tracing_is_on(void)
1459 {
1460         return tracer_tracing_is_on(&global_trace);
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_is_on);
1463
1464 static int __init set_buf_size(char *str)
1465 {
1466         unsigned long buf_size;
1467
1468         if (!str)
1469                 return 0;
1470         buf_size = memparse(str, &str);
1471         /* nr_entries can not be zero */
1472         if (buf_size == 0)
1473                 return 0;
1474         trace_buf_size = buf_size;
1475         return 1;
1476 }
1477 __setup("trace_buf_size=", set_buf_size);
1478
1479 static int __init set_tracing_thresh(char *str)
1480 {
1481         unsigned long threshold;
1482         int ret;
1483
1484         if (!str)
1485                 return 0;
1486         ret = kstrtoul(str, 0, &threshold);
1487         if (ret < 0)
1488                 return 0;
1489         tracing_thresh = threshold * 1000;
1490         return 1;
1491 }
1492 __setup("tracing_thresh=", set_tracing_thresh);
1493
1494 unsigned long nsecs_to_usecs(unsigned long nsecs)
1495 {
1496         return nsecs / 1000;
1497 }
1498
1499 /*
1500  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1501  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1502  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1503  * of strings in the order that the evals (enum) were defined.
1504  */
1505 #undef C
1506 #define C(a, b) b
1507
1508 /* These must match the bit positions in trace_iterator_flags */
1509 static const char *trace_options[] = {
1510         TRACE_FLAGS
1511         NULL
1512 };
1513
1514 static struct {
1515         u64 (*func)(void);
1516         const char *name;
1517         int in_ns;              /* is this clock in nanoseconds? */
1518 } trace_clocks[] = {
1519         { trace_clock_local,            "local",        1 },
1520         { trace_clock_global,           "global",       1 },
1521         { trace_clock_counter,          "counter",      0 },
1522         { trace_clock_jiffies,          "uptime",       0 },
1523         { trace_clock,                  "perf",         1 },
1524         { ktime_get_mono_fast_ns,       "mono",         1 },
1525         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1526         { ktime_get_boot_fast_ns,       "boot",         1 },
1527         ARCH_TRACE_CLOCKS
1528 };
1529
1530 bool trace_clock_in_ns(struct trace_array *tr)
1531 {
1532         if (trace_clocks[tr->clock_id].in_ns)
1533                 return true;
1534
1535         return false;
1536 }
1537
1538 /*
1539  * trace_parser_get_init - gets the buffer for trace parser
1540  */
1541 int trace_parser_get_init(struct trace_parser *parser, int size)
1542 {
1543         memset(parser, 0, sizeof(*parser));
1544
1545         parser->buffer = kmalloc(size, GFP_KERNEL);
1546         if (!parser->buffer)
1547                 return 1;
1548
1549         parser->size = size;
1550         return 0;
1551 }
1552
1553 /*
1554  * trace_parser_put - frees the buffer for trace parser
1555  */
1556 void trace_parser_put(struct trace_parser *parser)
1557 {
1558         kfree(parser->buffer);
1559         parser->buffer = NULL;
1560 }
1561
1562 /*
1563  * trace_get_user - reads the user input string separated by  space
1564  * (matched by isspace(ch))
1565  *
1566  * For each string found the 'struct trace_parser' is updated,
1567  * and the function returns.
1568  *
1569  * Returns number of bytes read.
1570  *
1571  * See kernel/trace/trace.h for 'struct trace_parser' details.
1572  */
1573 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1574         size_t cnt, loff_t *ppos)
1575 {
1576         char ch;
1577         size_t read = 0;
1578         ssize_t ret;
1579
1580         if (!*ppos)
1581                 trace_parser_clear(parser);
1582
1583         ret = get_user(ch, ubuf++);
1584         if (ret)
1585                 goto out;
1586
1587         read++;
1588         cnt--;
1589
1590         /*
1591          * The parser is not finished with the last write,
1592          * continue reading the user input without skipping spaces.
1593          */
1594         if (!parser->cont) {
1595                 /* skip white space */
1596                 while (cnt && isspace(ch)) {
1597                         ret = get_user(ch, ubuf++);
1598                         if (ret)
1599                                 goto out;
1600                         read++;
1601                         cnt--;
1602                 }
1603
1604                 parser->idx = 0;
1605
1606                 /* only spaces were written */
1607                 if (isspace(ch) || !ch) {
1608                         *ppos += read;
1609                         ret = read;
1610                         goto out;
1611                 }
1612         }
1613
1614         /* read the non-space input */
1615         while (cnt && !isspace(ch) && ch) {
1616                 if (parser->idx < parser->size - 1)
1617                         parser->buffer[parser->idx++] = ch;
1618                 else {
1619                         ret = -EINVAL;
1620                         goto out;
1621                 }
1622                 ret = get_user(ch, ubuf++);
1623                 if (ret)
1624                         goto out;
1625                 read++;
1626                 cnt--;
1627         }
1628
1629         /* We either got finished input or we have to wait for another call. */
1630         if (isspace(ch) || !ch) {
1631                 parser->buffer[parser->idx] = 0;
1632                 parser->cont = false;
1633         } else if (parser->idx < parser->size - 1) {
1634                 parser->cont = true;
1635                 parser->buffer[parser->idx++] = ch;
1636                 /* Make sure the parsed string always terminates with '\0'. */
1637                 parser->buffer[parser->idx] = 0;
1638         } else {
1639                 ret = -EINVAL;
1640                 goto out;
1641         }
1642
1643         *ppos += read;
1644         ret = read;
1645
1646 out:
1647         return ret;
1648 }
1649
1650 /* TODO add a seq_buf_to_buffer() */
1651 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1652 {
1653         int len;
1654
1655         if (trace_seq_used(s) <= s->seq.readpos)
1656                 return -EBUSY;
1657
1658         len = trace_seq_used(s) - s->seq.readpos;
1659         if (cnt > len)
1660                 cnt = len;
1661         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1662
1663         s->seq.readpos += cnt;
1664         return cnt;
1665 }
1666
1667 unsigned long __read_mostly     tracing_thresh;
1668 static const struct file_operations tracing_max_lat_fops;
1669
1670 #ifdef LATENCY_FS_NOTIFY
1671
1672 static struct workqueue_struct *fsnotify_wq;
1673
1674 static void latency_fsnotify_workfn(struct work_struct *work)
1675 {
1676         struct trace_array *tr = container_of(work, struct trace_array,
1677                                               fsnotify_work);
1678         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1679 }
1680
1681 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1682 {
1683         struct trace_array *tr = container_of(iwork, struct trace_array,
1684                                               fsnotify_irqwork);
1685         queue_work(fsnotify_wq, &tr->fsnotify_work);
1686 }
1687
1688 static void trace_create_maxlat_file(struct trace_array *tr,
1689                                      struct dentry *d_tracer)
1690 {
1691         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1692         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1693         tr->d_max_latency = trace_create_file("tracing_max_latency",
1694                                               TRACE_MODE_WRITE,
1695                                               d_tracer, &tr->max_latency,
1696                                               &tracing_max_lat_fops);
1697 }
1698
1699 __init static int latency_fsnotify_init(void)
1700 {
1701         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1702                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1703         if (!fsnotify_wq) {
1704                 pr_err("Unable to allocate tr_max_lat_wq\n");
1705                 return -ENOMEM;
1706         }
1707         return 0;
1708 }
1709
1710 late_initcall_sync(latency_fsnotify_init);
1711
1712 void latency_fsnotify(struct trace_array *tr)
1713 {
1714         if (!fsnotify_wq)
1715                 return;
1716         /*
1717          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1718          * possible that we are called from __schedule() or do_idle(), which
1719          * could cause a deadlock.
1720          */
1721         irq_work_queue(&tr->fsnotify_irqwork);
1722 }
1723
1724 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1725         || defined(CONFIG_OSNOISE_TRACER)
1726
1727 #define trace_create_maxlat_file(tr, d_tracer)                          \
1728         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1729                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1730
1731 #else
1732 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1733 #endif
1734
1735 #ifdef CONFIG_TRACER_MAX_TRACE
1736 /*
1737  * Copy the new maximum trace into the separate maximum-trace
1738  * structure. (this way the maximum trace is permanently saved,
1739  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1740  */
1741 static void
1742 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1743 {
1744         struct array_buffer *trace_buf = &tr->array_buffer;
1745         struct array_buffer *max_buf = &tr->max_buffer;
1746         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1747         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1748
1749         max_buf->cpu = cpu;
1750         max_buf->time_start = data->preempt_timestamp;
1751
1752         max_data->saved_latency = tr->max_latency;
1753         max_data->critical_start = data->critical_start;
1754         max_data->critical_end = data->critical_end;
1755
1756         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1757         max_data->pid = tsk->pid;
1758         /*
1759          * If tsk == current, then use current_uid(), as that does not use
1760          * RCU. The irq tracer can be called out of RCU scope.
1761          */
1762         if (tsk == current)
1763                 max_data->uid = current_uid();
1764         else
1765                 max_data->uid = task_uid(tsk);
1766
1767         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1768         max_data->policy = tsk->policy;
1769         max_data->rt_priority = tsk->rt_priority;
1770
1771         /* record this tasks comm */
1772         tracing_record_cmdline(tsk);
1773         latency_fsnotify(tr);
1774 }
1775
1776 /**
1777  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1778  * @tr: tracer
1779  * @tsk: the task with the latency
1780  * @cpu: The cpu that initiated the trace.
1781  * @cond_data: User data associated with a conditional snapshot
1782  *
1783  * Flip the buffers between the @tr and the max_tr and record information
1784  * about which task was the cause of this latency.
1785  */
1786 void
1787 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1788               void *cond_data)
1789 {
1790         if (tr->stop_count)
1791                 return;
1792
1793         WARN_ON_ONCE(!irqs_disabled());
1794
1795         if (!tr->allocated_snapshot) {
1796                 /* Only the nop tracer should hit this when disabling */
1797                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1798                 return;
1799         }
1800
1801         arch_spin_lock(&tr->max_lock);
1802
1803         /* Inherit the recordable setting from array_buffer */
1804         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1805                 ring_buffer_record_on(tr->max_buffer.buffer);
1806         else
1807                 ring_buffer_record_off(tr->max_buffer.buffer);
1808
1809 #ifdef CONFIG_TRACER_SNAPSHOT
1810         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1811                 goto out_unlock;
1812 #endif
1813         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1814
1815         __update_max_tr(tr, tsk, cpu);
1816
1817  out_unlock:
1818         arch_spin_unlock(&tr->max_lock);
1819 }
1820
1821 /**
1822  * update_max_tr_single - only copy one trace over, and reset the rest
1823  * @tr: tracer
1824  * @tsk: task with the latency
1825  * @cpu: the cpu of the buffer to copy.
1826  *
1827  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1828  */
1829 void
1830 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1831 {
1832         int ret;
1833
1834         if (tr->stop_count)
1835                 return;
1836
1837         WARN_ON_ONCE(!irqs_disabled());
1838         if (!tr->allocated_snapshot) {
1839                 /* Only the nop tracer should hit this when disabling */
1840                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1841                 return;
1842         }
1843
1844         arch_spin_lock(&tr->max_lock);
1845
1846         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1847
1848         if (ret == -EBUSY) {
1849                 /*
1850                  * We failed to swap the buffer due to a commit taking
1851                  * place on this CPU. We fail to record, but we reset
1852                  * the max trace buffer (no one writes directly to it)
1853                  * and flag that it failed.
1854                  */
1855                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1856                         "Failed to swap buffers due to commit in progress\n");
1857         }
1858
1859         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1860
1861         __update_max_tr(tr, tsk, cpu);
1862         arch_spin_unlock(&tr->max_lock);
1863 }
1864 #endif /* CONFIG_TRACER_MAX_TRACE */
1865
1866 static int wait_on_pipe(struct trace_iterator *iter, int full)
1867 {
1868         /* Iterators are static, they should be filled or empty */
1869         if (trace_buffer_iter(iter, iter->cpu_file))
1870                 return 0;
1871
1872         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1873                                 full);
1874 }
1875
1876 #ifdef CONFIG_FTRACE_STARTUP_TEST
1877 static bool selftests_can_run;
1878
1879 struct trace_selftests {
1880         struct list_head                list;
1881         struct tracer                   *type;
1882 };
1883
1884 static LIST_HEAD(postponed_selftests);
1885
1886 static int save_selftest(struct tracer *type)
1887 {
1888         struct trace_selftests *selftest;
1889
1890         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1891         if (!selftest)
1892                 return -ENOMEM;
1893
1894         selftest->type = type;
1895         list_add(&selftest->list, &postponed_selftests);
1896         return 0;
1897 }
1898
1899 static int run_tracer_selftest(struct tracer *type)
1900 {
1901         struct trace_array *tr = &global_trace;
1902         struct tracer *saved_tracer = tr->current_trace;
1903         int ret;
1904
1905         if (!type->selftest || tracing_selftest_disabled)
1906                 return 0;
1907
1908         /*
1909          * If a tracer registers early in boot up (before scheduling is
1910          * initialized and such), then do not run its selftests yet.
1911          * Instead, run it a little later in the boot process.
1912          */
1913         if (!selftests_can_run)
1914                 return save_selftest(type);
1915
1916         if (!tracing_is_on()) {
1917                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1918                         type->name);
1919                 return 0;
1920         }
1921
1922         /*
1923          * Run a selftest on this tracer.
1924          * Here we reset the trace buffer, and set the current
1925          * tracer to be this tracer. The tracer can then run some
1926          * internal tracing to verify that everything is in order.
1927          * If we fail, we do not register this tracer.
1928          */
1929         tracing_reset_online_cpus(&tr->array_buffer);
1930
1931         tr->current_trace = type;
1932
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934         if (type->use_max_tr) {
1935                 /* If we expanded the buffers, make sure the max is expanded too */
1936                 if (ring_buffer_expanded)
1937                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1938                                            RING_BUFFER_ALL_CPUS);
1939                 tr->allocated_snapshot = true;
1940         }
1941 #endif
1942
1943         /* the test is responsible for initializing and enabling */
1944         pr_info("Testing tracer %s: ", type->name);
1945         ret = type->selftest(type, tr);
1946         /* the test is responsible for resetting too */
1947         tr->current_trace = saved_tracer;
1948         if (ret) {
1949                 printk(KERN_CONT "FAILED!\n");
1950                 /* Add the warning after printing 'FAILED' */
1951                 WARN_ON(1);
1952                 return -1;
1953         }
1954         /* Only reset on passing, to avoid touching corrupted buffers */
1955         tracing_reset_online_cpus(&tr->array_buffer);
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 tr->allocated_snapshot = false;
1960
1961                 /* Shrink the max buffer again */
1962                 if (ring_buffer_expanded)
1963                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1964                                            RING_BUFFER_ALL_CPUS);
1965         }
1966 #endif
1967
1968         printk(KERN_CONT "PASSED\n");
1969         return 0;
1970 }
1971
1972 static __init int init_trace_selftests(void)
1973 {
1974         struct trace_selftests *p, *n;
1975         struct tracer *t, **last;
1976         int ret;
1977
1978         selftests_can_run = true;
1979
1980         mutex_lock(&trace_types_lock);
1981
1982         if (list_empty(&postponed_selftests))
1983                 goto out;
1984
1985         pr_info("Running postponed tracer tests:\n");
1986
1987         tracing_selftest_running = true;
1988         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1989                 /* This loop can take minutes when sanitizers are enabled, so
1990                  * lets make sure we allow RCU processing.
1991                  */
1992                 cond_resched();
1993                 ret = run_tracer_selftest(p->type);
1994                 /* If the test fails, then warn and remove from available_tracers */
1995                 if (ret < 0) {
1996                         WARN(1, "tracer: %s failed selftest, disabling\n",
1997                              p->type->name);
1998                         last = &trace_types;
1999                         for (t = trace_types; t; t = t->next) {
2000                                 if (t == p->type) {
2001                                         *last = t->next;
2002                                         break;
2003                                 }
2004                                 last = &t->next;
2005                         }
2006                 }
2007                 list_del(&p->list);
2008                 kfree(p);
2009         }
2010         tracing_selftest_running = false;
2011
2012  out:
2013         mutex_unlock(&trace_types_lock);
2014
2015         return 0;
2016 }
2017 core_initcall(init_trace_selftests);
2018 #else
2019 static inline int run_tracer_selftest(struct tracer *type)
2020 {
2021         return 0;
2022 }
2023 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2024
2025 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2026
2027 static void __init apply_trace_boot_options(void);
2028
2029 /**
2030  * register_tracer - register a tracer with the ftrace system.
2031  * @type: the plugin for the tracer
2032  *
2033  * Register a new plugin tracer.
2034  */
2035 int __init register_tracer(struct tracer *type)
2036 {
2037         struct tracer *t;
2038         int ret = 0;
2039
2040         if (!type->name) {
2041                 pr_info("Tracer must have a name\n");
2042                 return -1;
2043         }
2044
2045         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2046                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2047                 return -1;
2048         }
2049
2050         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2051                 pr_warn("Can not register tracer %s due to lockdown\n",
2052                            type->name);
2053                 return -EPERM;
2054         }
2055
2056         mutex_lock(&trace_types_lock);
2057
2058         tracing_selftest_running = true;
2059
2060         for (t = trace_types; t; t = t->next) {
2061                 if (strcmp(type->name, t->name) == 0) {
2062                         /* already found */
2063                         pr_info("Tracer %s already registered\n",
2064                                 type->name);
2065                         ret = -1;
2066                         goto out;
2067                 }
2068         }
2069
2070         if (!type->set_flag)
2071                 type->set_flag = &dummy_set_flag;
2072         if (!type->flags) {
2073                 /*allocate a dummy tracer_flags*/
2074                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2075                 if (!type->flags) {
2076                         ret = -ENOMEM;
2077                         goto out;
2078                 }
2079                 type->flags->val = 0;
2080                 type->flags->opts = dummy_tracer_opt;
2081         } else
2082                 if (!type->flags->opts)
2083                         type->flags->opts = dummy_tracer_opt;
2084
2085         /* store the tracer for __set_tracer_option */
2086         type->flags->trace = type;
2087
2088         ret = run_tracer_selftest(type);
2089         if (ret < 0)
2090                 goto out;
2091
2092         type->next = trace_types;
2093         trace_types = type;
2094         add_tracer_options(&global_trace, type);
2095
2096  out:
2097         tracing_selftest_running = false;
2098         mutex_unlock(&trace_types_lock);
2099
2100         if (ret || !default_bootup_tracer)
2101                 goto out_unlock;
2102
2103         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2104                 goto out_unlock;
2105
2106         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2107         /* Do we want this tracer to start on bootup? */
2108         tracing_set_tracer(&global_trace, type->name);
2109         default_bootup_tracer = NULL;
2110
2111         apply_trace_boot_options();
2112
2113         /* disable other selftests, since this will break it. */
2114         disable_tracing_selftest("running a tracer");
2115
2116  out_unlock:
2117         return ret;
2118 }
2119
2120 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2121 {
2122         struct trace_buffer *buffer = buf->buffer;
2123
2124         if (!buffer)
2125                 return;
2126
2127         ring_buffer_record_disable(buffer);
2128
2129         /* Make sure all commits have finished */
2130         synchronize_rcu();
2131         ring_buffer_reset_cpu(buffer, cpu);
2132
2133         ring_buffer_record_enable(buffer);
2134 }
2135
2136 void tracing_reset_online_cpus(struct array_buffer *buf)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147
2148         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2149
2150         ring_buffer_reset_online_cpus(buffer);
2151
2152         ring_buffer_record_enable(buffer);
2153 }
2154
2155 /* Must have trace_types_lock held */
2156 void tracing_reset_all_online_cpus(void)
2157 {
2158         struct trace_array *tr;
2159
2160         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2161                 if (!tr->clear_trace)
2162                         continue;
2163                 tr->clear_trace = false;
2164                 tracing_reset_online_cpus(&tr->array_buffer);
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166                 tracing_reset_online_cpus(&tr->max_buffer);
2167 #endif
2168         }
2169 }
2170
2171 /*
2172  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2173  * is the tgid last observed corresponding to pid=i.
2174  */
2175 static int *tgid_map;
2176
2177 /* The maximum valid index into tgid_map. */
2178 static size_t tgid_map_max;
2179
2180 #define SAVED_CMDLINES_DEFAULT 128
2181 #define NO_CMDLINE_MAP UINT_MAX
2182 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2183 struct saved_cmdlines_buffer {
2184         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2185         unsigned *map_cmdline_to_pid;
2186         unsigned cmdline_num;
2187         int cmdline_idx;
2188         char *saved_cmdlines;
2189 };
2190 static struct saved_cmdlines_buffer *savedcmd;
2191
2192 static inline char *get_saved_cmdlines(int idx)
2193 {
2194         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2195 }
2196
2197 static inline void set_cmdline(int idx, const char *cmdline)
2198 {
2199         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2200 }
2201
2202 static int allocate_cmdlines_buffer(unsigned int val,
2203                                     struct saved_cmdlines_buffer *s)
2204 {
2205         s->map_cmdline_to_pid = kmalloc_array(val,
2206                                               sizeof(*s->map_cmdline_to_pid),
2207                                               GFP_KERNEL);
2208         if (!s->map_cmdline_to_pid)
2209                 return -ENOMEM;
2210
2211         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2212         if (!s->saved_cmdlines) {
2213                 kfree(s->map_cmdline_to_pid);
2214                 return -ENOMEM;
2215         }
2216
2217         s->cmdline_idx = 0;
2218         s->cmdline_num = val;
2219         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2220                sizeof(s->map_pid_to_cmdline));
2221         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2222                val * sizeof(*s->map_cmdline_to_pid));
2223
2224         return 0;
2225 }
2226
2227 static int trace_create_savedcmd(void)
2228 {
2229         int ret;
2230
2231         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2232         if (!savedcmd)
2233                 return -ENOMEM;
2234
2235         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2236         if (ret < 0) {
2237                 kfree(savedcmd);
2238                 savedcmd = NULL;
2239                 return -ENOMEM;
2240         }
2241
2242         return 0;
2243 }
2244
2245 int is_tracing_stopped(void)
2246 {
2247         return global_trace.stop_count;
2248 }
2249
2250 /**
2251  * tracing_start - quick start of the tracer
2252  *
2253  * If tracing is enabled but was stopped by tracing_stop,
2254  * this will start the tracer back up.
2255  */
2256 void tracing_start(void)
2257 {
2258         struct trace_buffer *buffer;
2259         unsigned long flags;
2260
2261         if (tracing_disabled)
2262                 return;
2263
2264         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2265         if (--global_trace.stop_count) {
2266                 if (global_trace.stop_count < 0) {
2267                         /* Someone screwed up their debugging */
2268                         WARN_ON_ONCE(1);
2269                         global_trace.stop_count = 0;
2270                 }
2271                 goto out;
2272         }
2273
2274         /* Prevent the buffers from switching */
2275         arch_spin_lock(&global_trace.max_lock);
2276
2277         buffer = global_trace.array_buffer.buffer;
2278         if (buffer)
2279                 ring_buffer_record_enable(buffer);
2280
2281 #ifdef CONFIG_TRACER_MAX_TRACE
2282         buffer = global_trace.max_buffer.buffer;
2283         if (buffer)
2284                 ring_buffer_record_enable(buffer);
2285 #endif
2286
2287         arch_spin_unlock(&global_trace.max_lock);
2288
2289  out:
2290         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2291 }
2292
2293 static void tracing_start_tr(struct trace_array *tr)
2294 {
2295         struct trace_buffer *buffer;
2296         unsigned long flags;
2297
2298         if (tracing_disabled)
2299                 return;
2300
2301         /* If global, we need to also start the max tracer */
2302         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2303                 return tracing_start();
2304
2305         raw_spin_lock_irqsave(&tr->start_lock, flags);
2306
2307         if (--tr->stop_count) {
2308                 if (tr->stop_count < 0) {
2309                         /* Someone screwed up their debugging */
2310                         WARN_ON_ONCE(1);
2311                         tr->stop_count = 0;
2312                 }
2313                 goto out;
2314         }
2315
2316         buffer = tr->array_buffer.buffer;
2317         if (buffer)
2318                 ring_buffer_record_enable(buffer);
2319
2320  out:
2321         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2322 }
2323
2324 /**
2325  * tracing_stop - quick stop of the tracer
2326  *
2327  * Light weight way to stop tracing. Use in conjunction with
2328  * tracing_start.
2329  */
2330 void tracing_stop(void)
2331 {
2332         struct trace_buffer *buffer;
2333         unsigned long flags;
2334
2335         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2336         if (global_trace.stop_count++)
2337                 goto out;
2338
2339         /* Prevent the buffers from switching */
2340         arch_spin_lock(&global_trace.max_lock);
2341
2342         buffer = global_trace.array_buffer.buffer;
2343         if (buffer)
2344                 ring_buffer_record_disable(buffer);
2345
2346 #ifdef CONFIG_TRACER_MAX_TRACE
2347         buffer = global_trace.max_buffer.buffer;
2348         if (buffer)
2349                 ring_buffer_record_disable(buffer);
2350 #endif
2351
2352         arch_spin_unlock(&global_trace.max_lock);
2353
2354  out:
2355         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2356 }
2357
2358 static void tracing_stop_tr(struct trace_array *tr)
2359 {
2360         struct trace_buffer *buffer;
2361         unsigned long flags;
2362
2363         /* If global, we need to also stop the max tracer */
2364         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2365                 return tracing_stop();
2366
2367         raw_spin_lock_irqsave(&tr->start_lock, flags);
2368         if (tr->stop_count++)
2369                 goto out;
2370
2371         buffer = tr->array_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374
2375  out:
2376         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2377 }
2378
2379 static int trace_save_cmdline(struct task_struct *tsk)
2380 {
2381         unsigned tpid, idx;
2382
2383         /* treat recording of idle task as a success */
2384         if (!tsk->pid)
2385                 return 1;
2386
2387         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2388
2389         /*
2390          * It's not the end of the world if we don't get
2391          * the lock, but we also don't want to spin
2392          * nor do we want to disable interrupts,
2393          * so if we miss here, then better luck next time.
2394          */
2395         if (!arch_spin_trylock(&trace_cmdline_lock))
2396                 return 0;
2397
2398         idx = savedcmd->map_pid_to_cmdline[tpid];
2399         if (idx == NO_CMDLINE_MAP) {
2400                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2401
2402                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2403                 savedcmd->cmdline_idx = idx;
2404         }
2405
2406         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2407         set_cmdline(idx, tsk->comm);
2408
2409         arch_spin_unlock(&trace_cmdline_lock);
2410
2411         return 1;
2412 }
2413
2414 static void __trace_find_cmdline(int pid, char comm[])
2415 {
2416         unsigned map;
2417         int tpid;
2418
2419         if (!pid) {
2420                 strcpy(comm, "<idle>");
2421                 return;
2422         }
2423
2424         if (WARN_ON_ONCE(pid < 0)) {
2425                 strcpy(comm, "<XXX>");
2426                 return;
2427         }
2428
2429         tpid = pid & (PID_MAX_DEFAULT - 1);
2430         map = savedcmd->map_pid_to_cmdline[tpid];
2431         if (map != NO_CMDLINE_MAP) {
2432                 tpid = savedcmd->map_cmdline_to_pid[map];
2433                 if (tpid == pid) {
2434                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2435                         return;
2436                 }
2437         }
2438         strcpy(comm, "<...>");
2439 }
2440
2441 void trace_find_cmdline(int pid, char comm[])
2442 {
2443         preempt_disable();
2444         arch_spin_lock(&trace_cmdline_lock);
2445
2446         __trace_find_cmdline(pid, comm);
2447
2448         arch_spin_unlock(&trace_cmdline_lock);
2449         preempt_enable();
2450 }
2451
2452 static int *trace_find_tgid_ptr(int pid)
2453 {
2454         /*
2455          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2456          * if we observe a non-NULL tgid_map then we also observe the correct
2457          * tgid_map_max.
2458          */
2459         int *map = smp_load_acquire(&tgid_map);
2460
2461         if (unlikely(!map || pid > tgid_map_max))
2462                 return NULL;
2463
2464         return &map[pid];
2465 }
2466
2467 int trace_find_tgid(int pid)
2468 {
2469         int *ptr = trace_find_tgid_ptr(pid);
2470
2471         return ptr ? *ptr : 0;
2472 }
2473
2474 static int trace_save_tgid(struct task_struct *tsk)
2475 {
2476         int *ptr;
2477
2478         /* treat recording of idle task as a success */
2479         if (!tsk->pid)
2480                 return 1;
2481
2482         ptr = trace_find_tgid_ptr(tsk->pid);
2483         if (!ptr)
2484                 return 0;
2485
2486         *ptr = tsk->tgid;
2487         return 1;
2488 }
2489
2490 static bool tracing_record_taskinfo_skip(int flags)
2491 {
2492         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493                 return true;
2494         if (!__this_cpu_read(trace_taskinfo_save))
2495                 return true;
2496         return false;
2497 }
2498
2499 /**
2500  * tracing_record_taskinfo - record the task info of a task
2501  *
2502  * @task:  task to record
2503  * @flags: TRACE_RECORD_CMDLINE for recording comm
2504  *         TRACE_RECORD_TGID for recording tgid
2505  */
2506 void tracing_record_taskinfo(struct task_struct *task, int flags)
2507 {
2508         bool done;
2509
2510         if (tracing_record_taskinfo_skip(flags))
2511                 return;
2512
2513         /*
2514          * Record as much task information as possible. If some fail, continue
2515          * to try to record the others.
2516          */
2517         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2518         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2519
2520         /* If recording any information failed, retry again soon. */
2521         if (!done)
2522                 return;
2523
2524         __this_cpu_write(trace_taskinfo_save, false);
2525 }
2526
2527 /**
2528  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2529  *
2530  * @prev: previous task during sched_switch
2531  * @next: next task during sched_switch
2532  * @flags: TRACE_RECORD_CMDLINE for recording comm
2533  *         TRACE_RECORD_TGID for recording tgid
2534  */
2535 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2536                                           struct task_struct *next, int flags)
2537 {
2538         bool done;
2539
2540         if (tracing_record_taskinfo_skip(flags))
2541                 return;
2542
2543         /*
2544          * Record as much task information as possible. If some fail, continue
2545          * to try to record the others.
2546          */
2547         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2548         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2549         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2550         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2551
2552         /* If recording any information failed, retry again soon. */
2553         if (!done)
2554                 return;
2555
2556         __this_cpu_write(trace_taskinfo_save, false);
2557 }
2558
2559 /* Helpers to record a specific task information */
2560 void tracing_record_cmdline(struct task_struct *task)
2561 {
2562         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2563 }
2564
2565 void tracing_record_tgid(struct task_struct *task)
2566 {
2567         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2568 }
2569
2570 /*
2571  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2572  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2573  * simplifies those functions and keeps them in sync.
2574  */
2575 enum print_line_t trace_handle_return(struct trace_seq *s)
2576 {
2577         return trace_seq_has_overflowed(s) ?
2578                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2579 }
2580 EXPORT_SYMBOL_GPL(trace_handle_return);
2581
2582 static unsigned short migration_disable_value(void)
2583 {
2584 #if defined(CONFIG_SMP)
2585         return current->migration_disabled;
2586 #else
2587         return 0;
2588 #endif
2589 }
2590
2591 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2592 {
2593         unsigned int trace_flags = irqs_status;
2594         unsigned int pc;
2595
2596         pc = preempt_count();
2597
2598         if (pc & NMI_MASK)
2599                 trace_flags |= TRACE_FLAG_NMI;
2600         if (pc & HARDIRQ_MASK)
2601                 trace_flags |= TRACE_FLAG_HARDIRQ;
2602         if (in_serving_softirq())
2603                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2604
2605         if (tif_need_resched())
2606                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2607         if (test_preempt_need_resched())
2608                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2609         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2610                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2611 }
2612
2613 struct ring_buffer_event *
2614 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2615                           int type,
2616                           unsigned long len,
2617                           unsigned int trace_ctx)
2618 {
2619         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2620 }
2621
2622 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2623 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2624 static int trace_buffered_event_ref;
2625
2626 /**
2627  * trace_buffered_event_enable - enable buffering events
2628  *
2629  * When events are being filtered, it is quicker to use a temporary
2630  * buffer to write the event data into if there's a likely chance
2631  * that it will not be committed. The discard of the ring buffer
2632  * is not as fast as committing, and is much slower than copying
2633  * a commit.
2634  *
2635  * When an event is to be filtered, allocate per cpu buffers to
2636  * write the event data into, and if the event is filtered and discarded
2637  * it is simply dropped, otherwise, the entire data is to be committed
2638  * in one shot.
2639  */
2640 void trace_buffered_event_enable(void)
2641 {
2642         struct ring_buffer_event *event;
2643         struct page *page;
2644         int cpu;
2645
2646         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647
2648         if (trace_buffered_event_ref++)
2649                 return;
2650
2651         for_each_tracing_cpu(cpu) {
2652                 page = alloc_pages_node(cpu_to_node(cpu),
2653                                         GFP_KERNEL | __GFP_NORETRY, 0);
2654                 if (!page)
2655                         goto failed;
2656
2657                 event = page_address(page);
2658                 memset(event, 0, sizeof(*event));
2659
2660                 per_cpu(trace_buffered_event, cpu) = event;
2661
2662                 preempt_disable();
2663                 if (cpu == smp_processor_id() &&
2664                     __this_cpu_read(trace_buffered_event) !=
2665                     per_cpu(trace_buffered_event, cpu))
2666                         WARN_ON_ONCE(1);
2667                 preempt_enable();
2668         }
2669
2670         return;
2671  failed:
2672         trace_buffered_event_disable();
2673 }
2674
2675 static void enable_trace_buffered_event(void *data)
2676 {
2677         /* Probably not needed, but do it anyway */
2678         smp_rmb();
2679         this_cpu_dec(trace_buffered_event_cnt);
2680 }
2681
2682 static void disable_trace_buffered_event(void *data)
2683 {
2684         this_cpu_inc(trace_buffered_event_cnt);
2685 }
2686
2687 /**
2688  * trace_buffered_event_disable - disable buffering events
2689  *
2690  * When a filter is removed, it is faster to not use the buffered
2691  * events, and to commit directly into the ring buffer. Free up
2692  * the temp buffers when there are no more users. This requires
2693  * special synchronization with current events.
2694  */
2695 void trace_buffered_event_disable(void)
2696 {
2697         int cpu;
2698
2699         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2700
2701         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2702                 return;
2703
2704         if (--trace_buffered_event_ref)
2705                 return;
2706
2707         preempt_disable();
2708         /* For each CPU, set the buffer as used. */
2709         smp_call_function_many(tracing_buffer_mask,
2710                                disable_trace_buffered_event, NULL, 1);
2711         preempt_enable();
2712
2713         /* Wait for all current users to finish */
2714         synchronize_rcu();
2715
2716         for_each_tracing_cpu(cpu) {
2717                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2718                 per_cpu(trace_buffered_event, cpu) = NULL;
2719         }
2720         /*
2721          * Make sure trace_buffered_event is NULL before clearing
2722          * trace_buffered_event_cnt.
2723          */
2724         smp_wmb();
2725
2726         preempt_disable();
2727         /* Do the work on each cpu */
2728         smp_call_function_many(tracing_buffer_mask,
2729                                enable_trace_buffered_event, NULL, 1);
2730         preempt_enable();
2731 }
2732
2733 static struct trace_buffer *temp_buffer;
2734
2735 struct ring_buffer_event *
2736 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2737                           struct trace_event_file *trace_file,
2738                           int type, unsigned long len,
2739                           unsigned int trace_ctx)
2740 {
2741         struct ring_buffer_event *entry;
2742         struct trace_array *tr = trace_file->tr;
2743         int val;
2744
2745         *current_rb = tr->array_buffer.buffer;
2746
2747         if (!tr->no_filter_buffering_ref &&
2748             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2749             (entry = this_cpu_read(trace_buffered_event))) {
2750                 /*
2751                  * Filtering is on, so try to use the per cpu buffer first.
2752                  * This buffer will simulate a ring_buffer_event,
2753                  * where the type_len is zero and the array[0] will
2754                  * hold the full length.
2755                  * (see include/linux/ring-buffer.h for details on
2756                  *  how the ring_buffer_event is structured).
2757                  *
2758                  * Using a temp buffer during filtering and copying it
2759                  * on a matched filter is quicker than writing directly
2760                  * into the ring buffer and then discarding it when
2761                  * it doesn't match. That is because the discard
2762                  * requires several atomic operations to get right.
2763                  * Copying on match and doing nothing on a failed match
2764                  * is still quicker than no copy on match, but having
2765                  * to discard out of the ring buffer on a failed match.
2766                  */
2767                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2768
2769                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2770
2771                 /*
2772                  * Preemption is disabled, but interrupts and NMIs
2773                  * can still come in now. If that happens after
2774                  * the above increment, then it will have to go
2775                  * back to the old method of allocating the event
2776                  * on the ring buffer, and if the filter fails, it
2777                  * will have to call ring_buffer_discard_commit()
2778                  * to remove it.
2779                  *
2780                  * Need to also check the unlikely case that the
2781                  * length is bigger than the temp buffer size.
2782                  * If that happens, then the reserve is pretty much
2783                  * guaranteed to fail, as the ring buffer currently
2784                  * only allows events less than a page. But that may
2785                  * change in the future, so let the ring buffer reserve
2786                  * handle the failure in that case.
2787                  */
2788                 if (val == 1 && likely(len <= max_len)) {
2789                         trace_event_setup(entry, type, trace_ctx);
2790                         entry->array[0] = len;
2791                         return entry;
2792                 }
2793                 this_cpu_dec(trace_buffered_event_cnt);
2794         }
2795
2796         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2797                                             trace_ctx);
2798         /*
2799          * If tracing is off, but we have triggers enabled
2800          * we still need to look at the event data. Use the temp_buffer
2801          * to store the trace event for the trigger to use. It's recursive
2802          * safe and will not be recorded anywhere.
2803          */
2804         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2805                 *current_rb = temp_buffer;
2806                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2807                                                     trace_ctx);
2808         }
2809         return entry;
2810 }
2811 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2812
2813 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2814 static DEFINE_MUTEX(tracepoint_printk_mutex);
2815
2816 static void output_printk(struct trace_event_buffer *fbuffer)
2817 {
2818         struct trace_event_call *event_call;
2819         struct trace_event_file *file;
2820         struct trace_event *event;
2821         unsigned long flags;
2822         struct trace_iterator *iter = tracepoint_print_iter;
2823
2824         /* We should never get here if iter is NULL */
2825         if (WARN_ON_ONCE(!iter))
2826                 return;
2827
2828         event_call = fbuffer->trace_file->event_call;
2829         if (!event_call || !event_call->event.funcs ||
2830             !event_call->event.funcs->trace)
2831                 return;
2832
2833         file = fbuffer->trace_file;
2834         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2835             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2836              !filter_match_preds(file->filter, fbuffer->entry)))
2837                 return;
2838
2839         event = &fbuffer->trace_file->event_call->event;
2840
2841         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2842         trace_seq_init(&iter->seq);
2843         iter->ent = fbuffer->entry;
2844         event_call->event.funcs->trace(iter, 0, event);
2845         trace_seq_putc(&iter->seq, 0);
2846         printk("%s", iter->seq.buffer);
2847
2848         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2849 }
2850
2851 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2852                              void *buffer, size_t *lenp,
2853                              loff_t *ppos)
2854 {
2855         int save_tracepoint_printk;
2856         int ret;
2857
2858         mutex_lock(&tracepoint_printk_mutex);
2859         save_tracepoint_printk = tracepoint_printk;
2860
2861         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2862
2863         /*
2864          * This will force exiting early, as tracepoint_printk
2865          * is always zero when tracepoint_printk_iter is not allocated
2866          */
2867         if (!tracepoint_print_iter)
2868                 tracepoint_printk = 0;
2869
2870         if (save_tracepoint_printk == tracepoint_printk)
2871                 goto out;
2872
2873         if (tracepoint_printk)
2874                 static_key_enable(&tracepoint_printk_key.key);
2875         else
2876                 static_key_disable(&tracepoint_printk_key.key);
2877
2878  out:
2879         mutex_unlock(&tracepoint_printk_mutex);
2880
2881         return ret;
2882 }
2883
2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886         enum event_trigger_type tt = ETT_NONE;
2887         struct trace_event_file *file = fbuffer->trace_file;
2888
2889         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890                         fbuffer->entry, &tt))
2891                 goto discard;
2892
2893         if (static_key_false(&tracepoint_printk_key.key))
2894                 output_printk(fbuffer);
2895
2896         if (static_branch_unlikely(&trace_event_exports_enabled))
2897                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898
2899         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901
2902 discard:
2903         if (tt)
2904                 event_triggers_post_call(file, tt);
2905
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917
2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919                                      struct trace_buffer *buffer,
2920                                      struct ring_buffer_event *event,
2921                                      unsigned int trace_ctx,
2922                                      struct pt_regs *regs)
2923 {
2924         __buffer_unlock_commit(buffer, event);
2925
2926         /*
2927          * If regs is not set, then skip the necessary functions.
2928          * Note, we can still get here via blktrace, wakeup tracer
2929          * and mmiotrace, but that's ok if they lose a function or
2930          * two. They are not that meaningful.
2931          */
2932         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933         ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941                                    struct ring_buffer_event *event)
2942 {
2943         __buffer_unlock_commit(buffer, event);
2944 }
2945
2946 void
2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948                parent_ip, unsigned int trace_ctx)
2949 {
2950         struct trace_event_call *call = &event_function;
2951         struct trace_buffer *buffer = tr->array_buffer.buffer;
2952         struct ring_buffer_event *event;
2953         struct ftrace_entry *entry;
2954
2955         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2956                                             trace_ctx);
2957         if (!event)
2958                 return;
2959         entry   = ring_buffer_event_data(event);
2960         entry->ip                       = ip;
2961         entry->parent_ip                = parent_ip;
2962
2963         if (!call_filter_check_discard(call, entry, buffer, event)) {
2964                 if (static_branch_unlikely(&trace_function_exports_enabled))
2965                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2966                 __buffer_unlock_commit(buffer, event);
2967         }
2968 }
2969
2970 #ifdef CONFIG_STACKTRACE
2971
2972 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2973 #define FTRACE_KSTACK_NESTING   4
2974
2975 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2976
2977 struct ftrace_stack {
2978         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2979 };
2980
2981
2982 struct ftrace_stacks {
2983         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2984 };
2985
2986 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2987 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2988
2989 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2990                                  unsigned int trace_ctx,
2991                                  int skip, struct pt_regs *regs)
2992 {
2993         struct trace_event_call *call = &event_kernel_stack;
2994         struct ring_buffer_event *event;
2995         unsigned int size, nr_entries;
2996         struct ftrace_stack *fstack;
2997         struct stack_entry *entry;
2998         int stackidx;
2999
3000         /*
3001          * Add one, for this function and the call to save_stack_trace()
3002          * If regs is set, then these functions will not be in the way.
3003          */
3004 #ifndef CONFIG_UNWINDER_ORC
3005         if (!regs)
3006                 skip++;
3007 #endif
3008
3009         preempt_disable_notrace();
3010
3011         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3012
3013         /* This should never happen. If it does, yell once and skip */
3014         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3015                 goto out;
3016
3017         /*
3018          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3019          * interrupt will either see the value pre increment or post
3020          * increment. If the interrupt happens pre increment it will have
3021          * restored the counter when it returns.  We just need a barrier to
3022          * keep gcc from moving things around.
3023          */
3024         barrier();
3025
3026         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3027         size = ARRAY_SIZE(fstack->calls);
3028
3029         if (regs) {
3030                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3031                                                    size, skip);
3032         } else {
3033                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3034         }
3035
3036         size = nr_entries * sizeof(unsigned long);
3037         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3038                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3039                                     trace_ctx);
3040         if (!event)
3041                 goto out;
3042         entry = ring_buffer_event_data(event);
3043
3044         memcpy(&entry->caller, fstack->calls, size);
3045         entry->size = nr_entries;
3046
3047         if (!call_filter_check_discard(call, entry, buffer, event))
3048                 __buffer_unlock_commit(buffer, event);
3049
3050  out:
3051         /* Again, don't let gcc optimize things here */
3052         barrier();
3053         __this_cpu_dec(ftrace_stack_reserve);
3054         preempt_enable_notrace();
3055
3056 }
3057
3058 static inline void ftrace_trace_stack(struct trace_array *tr,
3059                                       struct trace_buffer *buffer,
3060                                       unsigned int trace_ctx,
3061                                       int skip, struct pt_regs *regs)
3062 {
3063         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3064                 return;
3065
3066         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3067 }
3068
3069 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3070                    int skip)
3071 {
3072         struct trace_buffer *buffer = tr->array_buffer.buffer;
3073
3074         if (rcu_is_watching()) {
3075                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3076                 return;
3077         }
3078
3079         /*
3080          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3081          * but if the above rcu_is_watching() failed, then the NMI
3082          * triggered someplace critical, and rcu_irq_enter() should
3083          * not be called from NMI.
3084          */
3085         if (unlikely(in_nmi()))
3086                 return;
3087
3088         rcu_irq_enter_irqson();
3089         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090         rcu_irq_exit_irqson();
3091 }
3092
3093 /**
3094  * trace_dump_stack - record a stack back trace in the trace buffer
3095  * @skip: Number of functions to skip (helper handlers)
3096  */
3097 void trace_dump_stack(int skip)
3098 {
3099         if (tracing_disabled || tracing_selftest_running)
3100                 return;
3101
3102 #ifndef CONFIG_UNWINDER_ORC
3103         /* Skip 1 to skip this function. */
3104         skip++;
3105 #endif
3106         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3107                              tracing_gen_ctx(), skip, NULL);
3108 }
3109 EXPORT_SYMBOL_GPL(trace_dump_stack);
3110
3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3112 static DEFINE_PER_CPU(int, user_stack_count);
3113
3114 static void
3115 ftrace_trace_userstack(struct trace_array *tr,
3116                        struct trace_buffer *buffer, unsigned int trace_ctx)
3117 {
3118         struct trace_event_call *call = &event_user_stack;
3119         struct ring_buffer_event *event;
3120         struct userstack_entry *entry;
3121
3122         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3123                 return;
3124
3125         /*
3126          * NMIs can not handle page faults, even with fix ups.
3127          * The save user stack can (and often does) fault.
3128          */
3129         if (unlikely(in_nmi()))
3130                 return;
3131
3132         /*
3133          * prevent recursion, since the user stack tracing may
3134          * trigger other kernel events.
3135          */
3136         preempt_disable();
3137         if (__this_cpu_read(user_stack_count))
3138                 goto out;
3139
3140         __this_cpu_inc(user_stack_count);
3141
3142         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3143                                             sizeof(*entry), trace_ctx);
3144         if (!event)
3145                 goto out_drop_count;
3146         entry   = ring_buffer_event_data(event);
3147
3148         entry->tgid             = current->tgid;
3149         memset(&entry->caller, 0, sizeof(entry->caller));
3150
3151         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3152         if (!call_filter_check_discard(call, entry, buffer, event))
3153                 __buffer_unlock_commit(buffer, event);
3154
3155  out_drop_count:
3156         __this_cpu_dec(user_stack_count);
3157  out:
3158         preempt_enable();
3159 }
3160 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3161 static void ftrace_trace_userstack(struct trace_array *tr,
3162                                    struct trace_buffer *buffer,
3163                                    unsigned int trace_ctx)
3164 {
3165 }
3166 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3167
3168 #endif /* CONFIG_STACKTRACE */
3169
3170 static inline void
3171 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3172                           unsigned long long delta)
3173 {
3174         entry->bottom_delta_ts = delta & U32_MAX;
3175         entry->top_delta_ts = (delta >> 32);
3176 }
3177
3178 void trace_last_func_repeats(struct trace_array *tr,
3179                              struct trace_func_repeats *last_info,
3180                              unsigned int trace_ctx)
3181 {
3182         struct trace_buffer *buffer = tr->array_buffer.buffer;
3183         struct func_repeats_entry *entry;
3184         struct ring_buffer_event *event;
3185         u64 delta;
3186
3187         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3188                                             sizeof(*entry), trace_ctx);
3189         if (!event)
3190                 return;
3191
3192         delta = ring_buffer_event_time_stamp(buffer, event) -
3193                 last_info->ts_last_call;
3194
3195         entry = ring_buffer_event_data(event);
3196         entry->ip = last_info->ip;
3197         entry->parent_ip = last_info->parent_ip;
3198         entry->count = last_info->count;
3199         func_repeats_set_delta_ts(entry, delta);
3200
3201         __buffer_unlock_commit(buffer, event);
3202 }
3203
3204 /* created for use with alloc_percpu */
3205 struct trace_buffer_struct {
3206         int nesting;
3207         char buffer[4][TRACE_BUF_SIZE];
3208 };
3209
3210 static struct trace_buffer_struct *trace_percpu_buffer;
3211
3212 /*
3213  * This allows for lockless recording.  If we're nested too deeply, then
3214  * this returns NULL.
3215  */
3216 static char *get_trace_buf(void)
3217 {
3218         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3219
3220         if (!buffer || buffer->nesting >= 4)
3221                 return NULL;
3222
3223         buffer->nesting++;
3224
3225         /* Interrupts must see nesting incremented before we use the buffer */
3226         barrier();
3227         return &buffer->buffer[buffer->nesting - 1][0];
3228 }
3229
3230 static void put_trace_buf(void)
3231 {
3232         /* Don't let the decrement of nesting leak before this */
3233         barrier();
3234         this_cpu_dec(trace_percpu_buffer->nesting);
3235 }
3236
3237 static int alloc_percpu_trace_buffer(void)
3238 {
3239         struct trace_buffer_struct *buffers;
3240
3241         if (trace_percpu_buffer)
3242                 return 0;
3243
3244         buffers = alloc_percpu(struct trace_buffer_struct);
3245         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3246                 return -ENOMEM;
3247
3248         trace_percpu_buffer = buffers;
3249         return 0;
3250 }
3251
3252 static int buffers_allocated;
3253
3254 void trace_printk_init_buffers(void)
3255 {
3256         if (buffers_allocated)
3257                 return;
3258
3259         if (alloc_percpu_trace_buffer())
3260                 return;
3261
3262         /* trace_printk() is for debug use only. Don't use it in production. */
3263
3264         pr_warn("\n");
3265         pr_warn("**********************************************************\n");
3266         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3267         pr_warn("**                                                      **\n");
3268         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3269         pr_warn("**                                                      **\n");
3270         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3271         pr_warn("** unsafe for production use.                           **\n");
3272         pr_warn("**                                                      **\n");
3273         pr_warn("** If you see this message and you are not debugging    **\n");
3274         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3275         pr_warn("**                                                      **\n");
3276         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3277         pr_warn("**********************************************************\n");
3278
3279         /* Expand the buffers to set size */
3280         tracing_update_buffers();
3281
3282         buffers_allocated = 1;
3283
3284         /*
3285          * trace_printk_init_buffers() can be called by modules.
3286          * If that happens, then we need to start cmdline recording
3287          * directly here. If the global_trace.buffer is already
3288          * allocated here, then this was called by module code.
3289          */
3290         if (global_trace.array_buffer.buffer)
3291                 tracing_start_cmdline_record();
3292 }
3293 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3294
3295 void trace_printk_start_comm(void)
3296 {
3297         /* Start tracing comms if trace printk is set */
3298         if (!buffers_allocated)
3299                 return;
3300         tracing_start_cmdline_record();
3301 }
3302
3303 static void trace_printk_start_stop_comm(int enabled)
3304 {
3305         if (!buffers_allocated)
3306                 return;
3307
3308         if (enabled)
3309                 tracing_start_cmdline_record();
3310         else
3311                 tracing_stop_cmdline_record();
3312 }
3313
3314 /**
3315  * trace_vbprintk - write binary msg to tracing buffer
3316  * @ip:    The address of the caller
3317  * @fmt:   The string format to write to the buffer
3318  * @args:  Arguments for @fmt
3319  */
3320 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3321 {
3322         struct trace_event_call *call = &event_bprint;
3323         struct ring_buffer_event *event;
3324         struct trace_buffer *buffer;
3325         struct trace_array *tr = &global_trace;
3326         struct bprint_entry *entry;
3327         unsigned int trace_ctx;
3328         char *tbuffer;
3329         int len = 0, size;
3330
3331         if (unlikely(tracing_selftest_running || tracing_disabled))
3332                 return 0;
3333
3334         /* Don't pollute graph traces with trace_vprintk internals */
3335         pause_graph_tracing();
3336
3337         trace_ctx = tracing_gen_ctx();
3338         preempt_disable_notrace();
3339
3340         tbuffer = get_trace_buf();
3341         if (!tbuffer) {
3342                 len = 0;
3343                 goto out_nobuffer;
3344         }
3345
3346         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3347
3348         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3349                 goto out_put;
3350
3351         size = sizeof(*entry) + sizeof(u32) * len;
3352         buffer = tr->array_buffer.buffer;
3353         ring_buffer_nest_start(buffer);
3354         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3355                                             trace_ctx);
3356         if (!event)
3357                 goto out;
3358         entry = ring_buffer_event_data(event);
3359         entry->ip                       = ip;
3360         entry->fmt                      = fmt;
3361
3362         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3363         if (!call_filter_check_discard(call, entry, buffer, event)) {
3364                 __buffer_unlock_commit(buffer, event);
3365                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3366         }
3367
3368 out:
3369         ring_buffer_nest_end(buffer);
3370 out_put:
3371         put_trace_buf();
3372
3373 out_nobuffer:
3374         preempt_enable_notrace();
3375         unpause_graph_tracing();
3376
3377         return len;
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vbprintk);
3380
3381 __printf(3, 0)
3382 static int
3383 __trace_array_vprintk(struct trace_buffer *buffer,
3384                       unsigned long ip, const char *fmt, va_list args)
3385 {
3386         struct trace_event_call *call = &event_print;
3387         struct ring_buffer_event *event;
3388         int len = 0, size;
3389         struct print_entry *entry;
3390         unsigned int trace_ctx;
3391         char *tbuffer;
3392
3393         if (tracing_disabled || tracing_selftest_running)
3394                 return 0;
3395
3396         /* Don't pollute graph traces with trace_vprintk internals */
3397         pause_graph_tracing();
3398
3399         trace_ctx = tracing_gen_ctx();
3400         preempt_disable_notrace();
3401
3402
3403         tbuffer = get_trace_buf();
3404         if (!tbuffer) {
3405                 len = 0;
3406                 goto out_nobuffer;
3407         }
3408
3409         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3410
3411         size = sizeof(*entry) + len + 1;
3412         ring_buffer_nest_start(buffer);
3413         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3414                                             trace_ctx);
3415         if (!event)
3416                 goto out;
3417         entry = ring_buffer_event_data(event);
3418         entry->ip = ip;
3419
3420         memcpy(&entry->buf, tbuffer, len + 1);
3421         if (!call_filter_check_discard(call, entry, buffer, event)) {
3422                 __buffer_unlock_commit(buffer, event);
3423                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3424         }
3425
3426 out:
3427         ring_buffer_nest_end(buffer);
3428         put_trace_buf();
3429
3430 out_nobuffer:
3431         preempt_enable_notrace();
3432         unpause_graph_tracing();
3433
3434         return len;
3435 }
3436
3437 __printf(3, 0)
3438 int trace_array_vprintk(struct trace_array *tr,
3439                         unsigned long ip, const char *fmt, va_list args)
3440 {
3441         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442 }
3443
3444 /**
3445  * trace_array_printk - Print a message to a specific instance
3446  * @tr: The instance trace_array descriptor
3447  * @ip: The instruction pointer that this is called from.
3448  * @fmt: The format to print (printf format)
3449  *
3450  * If a subsystem sets up its own instance, they have the right to
3451  * printk strings into their tracing instance buffer using this
3452  * function. Note, this function will not write into the top level
3453  * buffer (use trace_printk() for that), as writing into the top level
3454  * buffer should only have events that can be individually disabled.
3455  * trace_printk() is only used for debugging a kernel, and should not
3456  * be ever incorporated in normal use.
3457  *
3458  * trace_array_printk() can be used, as it will not add noise to the
3459  * top level tracing buffer.
3460  *
3461  * Note, trace_array_init_printk() must be called on @tr before this
3462  * can be used.
3463  */
3464 __printf(3, 0)
3465 int trace_array_printk(struct trace_array *tr,
3466                        unsigned long ip, const char *fmt, ...)
3467 {
3468         int ret;
3469         va_list ap;
3470
3471         if (!tr)
3472                 return -ENOENT;
3473
3474         /* This is only allowed for created instances */
3475         if (tr == &global_trace)
3476                 return 0;
3477
3478         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3479                 return 0;
3480
3481         va_start(ap, fmt);
3482         ret = trace_array_vprintk(tr, ip, fmt, ap);
3483         va_end(ap);
3484         return ret;
3485 }
3486 EXPORT_SYMBOL_GPL(trace_array_printk);
3487
3488 /**
3489  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3490  * @tr: The trace array to initialize the buffers for
3491  *
3492  * As trace_array_printk() only writes into instances, they are OK to
3493  * have in the kernel (unlike trace_printk()). This needs to be called
3494  * before trace_array_printk() can be used on a trace_array.
3495  */
3496 int trace_array_init_printk(struct trace_array *tr)
3497 {
3498         if (!tr)
3499                 return -ENOENT;
3500
3501         /* This is only allowed for created instances */
3502         if (tr == &global_trace)
3503                 return -EINVAL;
3504
3505         return alloc_percpu_trace_buffer();
3506 }
3507 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3508
3509 __printf(3, 4)
3510 int trace_array_printk_buf(struct trace_buffer *buffer,
3511                            unsigned long ip, const char *fmt, ...)
3512 {
3513         int ret;
3514         va_list ap;
3515
3516         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3517                 return 0;
3518
3519         va_start(ap, fmt);
3520         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3521         va_end(ap);
3522         return ret;
3523 }
3524
3525 __printf(2, 0)
3526 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3527 {
3528         return trace_array_vprintk(&global_trace, ip, fmt, args);
3529 }
3530 EXPORT_SYMBOL_GPL(trace_vprintk);
3531
3532 static void trace_iterator_increment(struct trace_iterator *iter)
3533 {
3534         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3535
3536         iter->idx++;
3537         if (buf_iter)
3538                 ring_buffer_iter_advance(buf_iter);
3539 }
3540
3541 static struct trace_entry *
3542 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3543                 unsigned long *lost_events)
3544 {
3545         struct ring_buffer_event *event;
3546         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3547
3548         if (buf_iter) {
3549                 event = ring_buffer_iter_peek(buf_iter, ts);
3550                 if (lost_events)
3551                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3552                                 (unsigned long)-1 : 0;
3553         } else {
3554                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3555                                          lost_events);
3556         }
3557
3558         if (event) {
3559                 iter->ent_size = ring_buffer_event_length(event);
3560                 return ring_buffer_event_data(event);
3561         }
3562         iter->ent_size = 0;
3563         return NULL;
3564 }
3565
3566 static struct trace_entry *
3567 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3568                   unsigned long *missing_events, u64 *ent_ts)
3569 {
3570         struct trace_buffer *buffer = iter->array_buffer->buffer;
3571         struct trace_entry *ent, *next = NULL;
3572         unsigned long lost_events = 0, next_lost = 0;
3573         int cpu_file = iter->cpu_file;
3574         u64 next_ts = 0, ts;
3575         int next_cpu = -1;
3576         int next_size = 0;
3577         int cpu;
3578
3579         /*
3580          * If we are in a per_cpu trace file, don't bother by iterating over
3581          * all cpu and peek directly.
3582          */
3583         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3584                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3585                         return NULL;
3586                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3587                 if (ent_cpu)
3588                         *ent_cpu = cpu_file;
3589
3590                 return ent;
3591         }
3592
3593         for_each_tracing_cpu(cpu) {
3594
3595                 if (ring_buffer_empty_cpu(buffer, cpu))
3596                         continue;
3597
3598                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3599
3600                 /*
3601                  * Pick the entry with the smallest timestamp:
3602                  */
3603                 if (ent && (!next || ts < next_ts)) {
3604                         next = ent;
3605                         next_cpu = cpu;
3606                         next_ts = ts;
3607                         next_lost = lost_events;
3608                         next_size = iter->ent_size;
3609                 }
3610         }
3611
3612         iter->ent_size = next_size;
3613
3614         if (ent_cpu)
3615                 *ent_cpu = next_cpu;
3616
3617         if (ent_ts)
3618                 *ent_ts = next_ts;
3619
3620         if (missing_events)
3621                 *missing_events = next_lost;
3622
3623         return next;
3624 }
3625
3626 #define STATIC_FMT_BUF_SIZE     128
3627 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3628
3629 static char *trace_iter_expand_format(struct trace_iterator *iter)
3630 {
3631         char *tmp;
3632
3633         /*
3634          * iter->tr is NULL when used with tp_printk, which makes
3635          * this get called where it is not safe to call krealloc().
3636          */
3637         if (!iter->tr || iter->fmt == static_fmt_buf)
3638                 return NULL;
3639
3640         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3641                        GFP_KERNEL);
3642         if (tmp) {
3643                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3644                 iter->fmt = tmp;
3645         }
3646
3647         return tmp;
3648 }
3649
3650 /* Returns true if the string is safe to dereference from an event */
3651 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3652 {
3653         unsigned long addr = (unsigned long)str;
3654         struct trace_event *trace_event;
3655         struct trace_event_call *event;
3656
3657         /* OK if part of the event data */
3658         if ((addr >= (unsigned long)iter->ent) &&
3659             (addr < (unsigned long)iter->ent + iter->ent_size))
3660                 return true;
3661
3662         /* OK if part of the temp seq buffer */
3663         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3664             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3665                 return true;
3666
3667         /* Core rodata can not be freed */
3668         if (is_kernel_rodata(addr))
3669                 return true;
3670
3671         if (trace_is_tracepoint_string(str))
3672                 return true;
3673
3674         /*
3675          * Now this could be a module event, referencing core module
3676          * data, which is OK.
3677          */
3678         if (!iter->ent)
3679                 return false;
3680
3681         trace_event = ftrace_find_event(iter->ent->type);
3682         if (!trace_event)
3683                 return false;
3684
3685         event = container_of(trace_event, struct trace_event_call, event);
3686         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3687                 return false;
3688
3689         /* Would rather have rodata, but this will suffice */
3690         if (within_module_core(addr, event->module))
3691                 return true;
3692
3693         return false;
3694 }
3695
3696 static const char *show_buffer(struct trace_seq *s)
3697 {
3698         struct seq_buf *seq = &s->seq;
3699
3700         seq_buf_terminate(seq);
3701
3702         return seq->buffer;
3703 }
3704
3705 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3706
3707 static int test_can_verify_check(const char *fmt, ...)
3708 {
3709         char buf[16];
3710         va_list ap;
3711         int ret;
3712
3713         /*
3714          * The verifier is dependent on vsnprintf() modifies the va_list
3715          * passed to it, where it is sent as a reference. Some architectures
3716          * (like x86_32) passes it by value, which means that vsnprintf()
3717          * does not modify the va_list passed to it, and the verifier
3718          * would then need to be able to understand all the values that
3719          * vsnprintf can use. If it is passed by value, then the verifier
3720          * is disabled.
3721          */
3722         va_start(ap, fmt);
3723         vsnprintf(buf, 16, "%d", ap);
3724         ret = va_arg(ap, int);
3725         va_end(ap);
3726
3727         return ret;
3728 }
3729
3730 static void test_can_verify(void)
3731 {
3732         if (!test_can_verify_check("%d %d", 0, 1)) {
3733                 pr_info("trace event string verifier disabled\n");
3734                 static_branch_inc(&trace_no_verify);
3735         }
3736 }
3737
3738 /**
3739  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3740  * @iter: The iterator that holds the seq buffer and the event being printed
3741  * @fmt: The format used to print the event
3742  * @ap: The va_list holding the data to print from @fmt.
3743  *
3744  * This writes the data into the @iter->seq buffer using the data from
3745  * @fmt and @ap. If the format has a %s, then the source of the string
3746  * is examined to make sure it is safe to print, otherwise it will
3747  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3748  * pointer.
3749  */
3750 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3751                          va_list ap)
3752 {
3753         const char *p = fmt;
3754         const char *str;
3755         int i, j;
3756
3757         if (WARN_ON_ONCE(!fmt))
3758                 return;
3759
3760         if (static_branch_unlikely(&trace_no_verify))
3761                 goto print;
3762
3763         /* Don't bother checking when doing a ftrace_dump() */
3764         if (iter->fmt == static_fmt_buf)
3765                 goto print;
3766
3767         while (*p) {
3768                 bool star = false;
3769                 int len = 0;
3770
3771                 j = 0;
3772
3773                 /* We only care about %s and variants */
3774                 for (i = 0; p[i]; i++) {
3775                         if (i + 1 >= iter->fmt_size) {
3776                                 /*
3777                                  * If we can't expand the copy buffer,
3778                                  * just print it.
3779                                  */
3780                                 if (!trace_iter_expand_format(iter))
3781                                         goto print;
3782                         }
3783
3784                         if (p[i] == '\\' && p[i+1]) {
3785                                 i++;
3786                                 continue;
3787                         }
3788                         if (p[i] == '%') {
3789                                 /* Need to test cases like %08.*s */
3790                                 for (j = 1; p[i+j]; j++) {
3791                                         if (isdigit(p[i+j]) ||
3792                                             p[i+j] == '.')
3793                                                 continue;
3794                                         if (p[i+j] == '*') {
3795                                                 star = true;
3796                                                 continue;
3797                                         }
3798                                         break;
3799                                 }
3800                                 if (p[i+j] == 's')
3801                                         break;
3802                                 star = false;
3803                         }
3804                         j = 0;
3805                 }
3806                 /* If no %s found then just print normally */
3807                 if (!p[i])
3808                         break;
3809
3810                 /* Copy up to the %s, and print that */
3811                 strncpy(iter->fmt, p, i);
3812                 iter->fmt[i] = '\0';
3813                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3814
3815                 /*
3816                  * If iter->seq is full, the above call no longer guarantees
3817                  * that ap is in sync with fmt processing, and further calls
3818                  * to va_arg() can return wrong positional arguments.
3819                  *
3820                  * Ensure that ap is no longer used in this case.
3821                  */
3822                 if (iter->seq.full) {
3823                         p = "";
3824                         break;
3825                 }
3826
3827                 if (star)
3828                         len = va_arg(ap, int);
3829
3830                 /* The ap now points to the string data of the %s */
3831                 str = va_arg(ap, const char *);
3832
3833                 /*
3834                  * If you hit this warning, it is likely that the
3835                  * trace event in question used %s on a string that
3836                  * was saved at the time of the event, but may not be
3837                  * around when the trace is read. Use __string(),
3838                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3839                  * instead. See samples/trace_events/trace-events-sample.h
3840                  * for reference.
3841                  */
3842                 if (WARN_ONCE(!trace_safe_str(iter, str),
3843                               "fmt: '%s' current_buffer: '%s'",
3844                               fmt, show_buffer(&iter->seq))) {
3845                         int ret;
3846
3847                         /* Try to safely read the string */
3848                         if (star) {
3849                                 if (len + 1 > iter->fmt_size)
3850                                         len = iter->fmt_size - 1;
3851                                 if (len < 0)
3852                                         len = 0;
3853                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3854                                 iter->fmt[len] = 0;
3855                                 star = false;
3856                         } else {
3857                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3858                                                                   iter->fmt_size);
3859                         }
3860                         if (ret < 0)
3861                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3862                         else
3863                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3864                                                  str, iter->fmt);
3865                         str = "[UNSAFE-MEMORY]";
3866                         strcpy(iter->fmt, "%s");
3867                 } else {
3868                         strncpy(iter->fmt, p + i, j + 1);
3869                         iter->fmt[j+1] = '\0';
3870                 }
3871                 if (star)
3872                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3873                 else
3874                         trace_seq_printf(&iter->seq, iter->fmt, str);
3875
3876                 p += i + j + 1;
3877         }
3878  print:
3879         if (*p)
3880                 trace_seq_vprintf(&iter->seq, p, ap);
3881 }
3882
3883 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3884 {
3885         const char *p, *new_fmt;
3886         char *q;
3887
3888         if (WARN_ON_ONCE(!fmt))
3889                 return fmt;
3890
3891         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3892                 return fmt;
3893
3894         p = fmt;
3895         new_fmt = q = iter->fmt;
3896         while (*p) {
3897                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3898                         if (!trace_iter_expand_format(iter))
3899                                 return fmt;
3900
3901                         q += iter->fmt - new_fmt;
3902                         new_fmt = iter->fmt;
3903                 }
3904
3905                 *q++ = *p++;
3906
3907                 /* Replace %p with %px */
3908                 if (p[-1] == '%') {
3909                         if (p[0] == '%') {
3910                                 *q++ = *p++;
3911                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3912                                 *q++ = *p++;
3913                                 *q++ = 'x';
3914                         }
3915                 }
3916         }
3917         *q = '\0';
3918
3919         return new_fmt;
3920 }
3921
3922 #define STATIC_TEMP_BUF_SIZE    128
3923 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3924
3925 /* Find the next real entry, without updating the iterator itself */
3926 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3927                                           int *ent_cpu, u64 *ent_ts)
3928 {
3929         /* __find_next_entry will reset ent_size */
3930         int ent_size = iter->ent_size;
3931         struct trace_entry *entry;
3932
3933         /*
3934          * If called from ftrace_dump(), then the iter->temp buffer
3935          * will be the static_temp_buf and not created from kmalloc.
3936          * If the entry size is greater than the buffer, we can
3937          * not save it. Just return NULL in that case. This is only
3938          * used to add markers when two consecutive events' time
3939          * stamps have a large delta. See trace_print_lat_context()
3940          */
3941         if (iter->temp == static_temp_buf &&
3942             STATIC_TEMP_BUF_SIZE < ent_size)
3943                 return NULL;
3944
3945         /*
3946          * The __find_next_entry() may call peek_next_entry(), which may
3947          * call ring_buffer_peek() that may make the contents of iter->ent
3948          * undefined. Need to copy iter->ent now.
3949          */
3950         if (iter->ent && iter->ent != iter->temp) {
3951                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3952                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3953                         void *temp;
3954                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3955                         if (!temp)
3956                                 return NULL;
3957                         kfree(iter->temp);
3958                         iter->temp = temp;
3959                         iter->temp_size = iter->ent_size;
3960                 }
3961                 memcpy(iter->temp, iter->ent, iter->ent_size);
3962                 iter->ent = iter->temp;
3963         }
3964         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3965         /* Put back the original ent_size */
3966         iter->ent_size = ent_size;
3967
3968         return entry;
3969 }
3970
3971 /* Find the next real entry, and increment the iterator to the next entry */
3972 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3973 {
3974         iter->ent = __find_next_entry(iter, &iter->cpu,
3975                                       &iter->lost_events, &iter->ts);
3976
3977         if (iter->ent)
3978                 trace_iterator_increment(iter);
3979
3980         return iter->ent ? iter : NULL;
3981 }
3982
3983 static void trace_consume(struct trace_iterator *iter)
3984 {
3985         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3986                             &iter->lost_events);
3987 }
3988
3989 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3990 {
3991         struct trace_iterator *iter = m->private;
3992         int i = (int)*pos;
3993         void *ent;
3994
3995         WARN_ON_ONCE(iter->leftover);
3996
3997         (*pos)++;
3998
3999         /* can't go backwards */
4000         if (iter->idx > i)
4001                 return NULL;
4002
4003         if (iter->idx < 0)
4004                 ent = trace_find_next_entry_inc(iter);
4005         else
4006                 ent = iter;
4007
4008         while (ent && iter->idx < i)
4009                 ent = trace_find_next_entry_inc(iter);
4010
4011         iter->pos = *pos;
4012
4013         return ent;
4014 }
4015
4016 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4017 {
4018         struct ring_buffer_iter *buf_iter;
4019         unsigned long entries = 0;
4020         u64 ts;
4021
4022         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4023
4024         buf_iter = trace_buffer_iter(iter, cpu);
4025         if (!buf_iter)
4026                 return;
4027
4028         ring_buffer_iter_reset(buf_iter);
4029
4030         /*
4031          * We could have the case with the max latency tracers
4032          * that a reset never took place on a cpu. This is evident
4033          * by the timestamp being before the start of the buffer.
4034          */
4035         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4036                 if (ts >= iter->array_buffer->time_start)
4037                         break;
4038                 entries++;
4039                 ring_buffer_iter_advance(buf_iter);
4040         }
4041
4042         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4043 }
4044
4045 /*
4046  * The current tracer is copied to avoid a global locking
4047  * all around.
4048  */
4049 static void *s_start(struct seq_file *m, loff_t *pos)
4050 {
4051         struct trace_iterator *iter = m->private;
4052         struct trace_array *tr = iter->tr;
4053         int cpu_file = iter->cpu_file;
4054         void *p = NULL;
4055         loff_t l = 0;
4056         int cpu;
4057
4058         /*
4059          * copy the tracer to avoid using a global lock all around.
4060          * iter->trace is a copy of current_trace, the pointer to the
4061          * name may be used instead of a strcmp(), as iter->trace->name
4062          * will point to the same string as current_trace->name.
4063          */
4064         mutex_lock(&trace_types_lock);
4065         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4066                 *iter->trace = *tr->current_trace;
4067         mutex_unlock(&trace_types_lock);
4068
4069 #ifdef CONFIG_TRACER_MAX_TRACE
4070         if (iter->snapshot && iter->trace->use_max_tr)
4071                 return ERR_PTR(-EBUSY);
4072 #endif
4073
4074         if (*pos != iter->pos) {
4075                 iter->ent = NULL;
4076                 iter->cpu = 0;
4077                 iter->idx = -1;
4078
4079                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4080                         for_each_tracing_cpu(cpu)
4081                                 tracing_iter_reset(iter, cpu);
4082                 } else
4083                         tracing_iter_reset(iter, cpu_file);
4084
4085                 iter->leftover = 0;
4086                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4087                         ;
4088
4089         } else {
4090                 /*
4091                  * If we overflowed the seq_file before, then we want
4092                  * to just reuse the trace_seq buffer again.
4093                  */
4094                 if (iter->leftover)
4095                         p = iter;
4096                 else {
4097                         l = *pos - 1;
4098                         p = s_next(m, p, &l);
4099                 }
4100         }
4101
4102         trace_event_read_lock();
4103         trace_access_lock(cpu_file);
4104         return p;
4105 }
4106
4107 static void s_stop(struct seq_file *m, void *p)
4108 {
4109         struct trace_iterator *iter = m->private;
4110
4111 #ifdef CONFIG_TRACER_MAX_TRACE
4112         if (iter->snapshot && iter->trace->use_max_tr)
4113                 return;
4114 #endif
4115
4116         trace_access_unlock(iter->cpu_file);
4117         trace_event_read_unlock();
4118 }
4119
4120 static void
4121 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4122                       unsigned long *entries, int cpu)
4123 {
4124         unsigned long count;
4125
4126         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4127         /*
4128          * If this buffer has skipped entries, then we hold all
4129          * entries for the trace and we need to ignore the
4130          * ones before the time stamp.
4131          */
4132         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4133                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4134                 /* total is the same as the entries */
4135                 *total = count;
4136         } else
4137                 *total = count +
4138                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4139         *entries = count;
4140 }
4141
4142 static void
4143 get_total_entries(struct array_buffer *buf,
4144                   unsigned long *total, unsigned long *entries)
4145 {
4146         unsigned long t, e;
4147         int cpu;
4148
4149         *total = 0;
4150         *entries = 0;
4151
4152         for_each_tracing_cpu(cpu) {
4153                 get_total_entries_cpu(buf, &t, &e, cpu);
4154                 *total += t;
4155                 *entries += e;
4156         }
4157 }
4158
4159 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4160 {
4161         unsigned long total, entries;
4162
4163         if (!tr)
4164                 tr = &global_trace;
4165
4166         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4167
4168         return entries;
4169 }
4170
4171 unsigned long trace_total_entries(struct trace_array *tr)
4172 {
4173         unsigned long total, entries;
4174
4175         if (!tr)
4176                 tr = &global_trace;
4177
4178         get_total_entries(&tr->array_buffer, &total, &entries);
4179
4180         return entries;
4181 }
4182
4183 static void print_lat_help_header(struct seq_file *m)
4184 {
4185         seq_puts(m, "#                    _------=> CPU#            \n"
4186                     "#                   / _-----=> irqs-off        \n"
4187                     "#                  | / _----=> need-resched    \n"
4188                     "#                  || / _---=> hardirq/softirq \n"
4189                     "#                  ||| / _--=> preempt-depth   \n"
4190                     "#                  |||| / _-=> migrate-disable \n"
4191                     "#                  ||||| /     delay           \n"
4192                     "#  cmd     pid     |||||| time  |   caller     \n"
4193                     "#     \\   /        ||||||  \\    |    /       \n");
4194 }
4195
4196 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4197 {
4198         unsigned long total;
4199         unsigned long entries;
4200
4201         get_total_entries(buf, &total, &entries);
4202         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4203                    entries, total, num_online_cpus());
4204         seq_puts(m, "#\n");
4205 }
4206
4207 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4208                                    unsigned int flags)
4209 {
4210         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211
4212         print_event_info(buf, m);
4213
4214         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4215         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4216 }
4217
4218 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4219                                        unsigned int flags)
4220 {
4221         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222         const char *space = "            ";
4223         int prec = tgid ? 12 : 2;
4224
4225         print_event_info(buf, m);
4226
4227         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4228         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4229         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4230         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4231         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4232         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4233         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4235 }
4236
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241         struct array_buffer *buf = iter->array_buffer;
4242         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243         struct tracer *type = iter->trace;
4244         unsigned long entries;
4245         unsigned long total;
4246         const char *name = "preemption";
4247
4248         name = type->name;
4249
4250         get_total_entries(buf, &total, &entries);
4251
4252         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253                    name, UTS_RELEASE);
4254         seq_puts(m, "# -----------------------------------"
4255                  "---------------------------------\n");
4256         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258                    nsecs_to_usecs(data->saved_latency),
4259                    entries,
4260                    total,
4261                    buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263                    "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265                    "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267                    "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269                    "preempt_rt",
4270 #else
4271                    "unknown",
4272 #endif
4273                    /* These are reserved for later use */
4274                    0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276         seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278         seq_puts(m, ")\n");
4279 #endif
4280         seq_puts(m, "#    -----------------\n");
4281         seq_printf(m, "#    | task: %.16s-%d "
4282                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283                    data->comm, data->pid,
4284                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285                    data->policy, data->rt_priority);
4286         seq_puts(m, "#    -----------------\n");
4287
4288         if (data->critical_start) {
4289                 seq_puts(m, "#  => started at: ");
4290                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291                 trace_print_seq(m, &iter->seq);
4292                 seq_puts(m, "\n#  => ended at:   ");
4293                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294                 trace_print_seq(m, &iter->seq);
4295                 seq_puts(m, "\n#\n");
4296         }
4297
4298         seq_puts(m, "#\n");
4299 }
4300
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303         struct trace_seq *s = &iter->seq;
4304         struct trace_array *tr = iter->tr;
4305
4306         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307                 return;
4308
4309         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310                 return;
4311
4312         if (cpumask_available(iter->started) &&
4313             cpumask_test_cpu(iter->cpu, iter->started))
4314                 return;
4315
4316         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317                 return;
4318
4319         if (cpumask_available(iter->started))
4320                 cpumask_set_cpu(iter->cpu, iter->started);
4321
4322         /* Don't print started cpu buffer for the first entry of the trace */
4323         if (iter->idx > 1)
4324                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325                                 iter->cpu);
4326 }
4327
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330         struct trace_array *tr = iter->tr;
4331         struct trace_seq *s = &iter->seq;
4332         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333         struct trace_entry *entry;
4334         struct trace_event *event;
4335
4336         entry = iter->ent;
4337
4338         test_cpu_buff_start(iter);
4339
4340         event = ftrace_find_event(entry->type);
4341
4342         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344                         trace_print_lat_context(iter);
4345                 else
4346                         trace_print_context(iter);
4347         }
4348
4349         if (trace_seq_has_overflowed(s))
4350                 return TRACE_TYPE_PARTIAL_LINE;
4351
4352         if (event)
4353                 return event->funcs->trace(iter, sym_flags, event);
4354
4355         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356
4357         return trace_handle_return(s);
4358 }
4359
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362         struct trace_array *tr = iter->tr;
4363         struct trace_seq *s = &iter->seq;
4364         struct trace_entry *entry;
4365         struct trace_event *event;
4366
4367         entry = iter->ent;
4368
4369         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370                 trace_seq_printf(s, "%d %d %llu ",
4371                                  entry->pid, iter->cpu, iter->ts);
4372
4373         if (trace_seq_has_overflowed(s))
4374                 return TRACE_TYPE_PARTIAL_LINE;
4375
4376         event = ftrace_find_event(entry->type);
4377         if (event)
4378                 return event->funcs->raw(iter, 0, event);
4379
4380         trace_seq_printf(s, "%d ?\n", entry->type);
4381
4382         return trace_handle_return(s);
4383 }
4384
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         unsigned char newline = '\n';
4390         struct trace_entry *entry;
4391         struct trace_event *event;
4392
4393         entry = iter->ent;
4394
4395         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4397                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4399                 if (trace_seq_has_overflowed(s))
4400                         return TRACE_TYPE_PARTIAL_LINE;
4401         }
4402
4403         event = ftrace_find_event(entry->type);
4404         if (event) {
4405                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406                 if (ret != TRACE_TYPE_HANDLED)
4407                         return ret;
4408         }
4409
4410         SEQ_PUT_FIELD(s, newline);
4411
4412         return trace_handle_return(s);
4413 }
4414
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417         struct trace_array *tr = iter->tr;
4418         struct trace_seq *s = &iter->seq;
4419         struct trace_entry *entry;
4420         struct trace_event *event;
4421
4422         entry = iter->ent;
4423
4424         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425                 SEQ_PUT_FIELD(s, entry->pid);
4426                 SEQ_PUT_FIELD(s, iter->cpu);
4427                 SEQ_PUT_FIELD(s, iter->ts);
4428                 if (trace_seq_has_overflowed(s))
4429                         return TRACE_TYPE_PARTIAL_LINE;
4430         }
4431
4432         event = ftrace_find_event(entry->type);
4433         return event ? event->funcs->binary(iter, 0, event) :
4434                 TRACE_TYPE_HANDLED;
4435 }
4436
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439         struct ring_buffer_iter *buf_iter;
4440         int cpu;
4441
4442         /* If we are looking at one CPU buffer, only check that one */
4443         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444                 cpu = iter->cpu_file;
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453                 return 1;
4454         }
4455
4456         for_each_tracing_cpu(cpu) {
4457                 buf_iter = trace_buffer_iter(iter, cpu);
4458                 if (buf_iter) {
4459                         if (!ring_buffer_iter_empty(buf_iter))
4460                                 return 0;
4461                 } else {
4462                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                 return 0;
4464                 }
4465         }
4466
4467         return 1;
4468 }
4469
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473         struct trace_array *tr = iter->tr;
4474         unsigned long trace_flags = tr->trace_flags;
4475         enum print_line_t ret;
4476
4477         if (iter->lost_events) {
4478                 if (iter->lost_events == (unsigned long)-1)
4479                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480                                          iter->cpu);
4481                 else
4482                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483                                          iter->cpu, iter->lost_events);
4484                 if (trace_seq_has_overflowed(&iter->seq))
4485                         return TRACE_TYPE_PARTIAL_LINE;
4486         }
4487
4488         if (iter->trace && iter->trace->print_line) {
4489                 ret = iter->trace->print_line(iter);
4490                 if (ret != TRACE_TYPE_UNHANDLED)
4491                         return ret;
4492         }
4493
4494         if (iter->ent->type == TRACE_BPUTS &&
4495                         trace_flags & TRACE_ITER_PRINTK &&
4496                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497                 return trace_print_bputs_msg_only(iter);
4498
4499         if (iter->ent->type == TRACE_BPRINT &&
4500                         trace_flags & TRACE_ITER_PRINTK &&
4501                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502                 return trace_print_bprintk_msg_only(iter);
4503
4504         if (iter->ent->type == TRACE_PRINT &&
4505                         trace_flags & TRACE_ITER_PRINTK &&
4506                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507                 return trace_print_printk_msg_only(iter);
4508
4509         if (trace_flags & TRACE_ITER_BIN)
4510                 return print_bin_fmt(iter);
4511
4512         if (trace_flags & TRACE_ITER_HEX)
4513                 return print_hex_fmt(iter);
4514
4515         if (trace_flags & TRACE_ITER_RAW)
4516                 return print_raw_fmt(iter);
4517
4518         return print_trace_fmt(iter);
4519 }
4520
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523         struct trace_iterator *iter = m->private;
4524         struct trace_array *tr = iter->tr;
4525
4526         /* print nothing if the buffers are empty */
4527         if (trace_empty(iter))
4528                 return;
4529
4530         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531                 print_trace_header(m, iter);
4532
4533         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534                 print_lat_help_header(m);
4535 }
4536
4537 void trace_default_header(struct seq_file *m)
4538 {
4539         struct trace_iterator *iter = m->private;
4540         struct trace_array *tr = iter->tr;
4541         unsigned long trace_flags = tr->trace_flags;
4542
4543         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544                 return;
4545
4546         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547                 /* print nothing if the buffers are empty */
4548                 if (trace_empty(iter))
4549                         return;
4550                 print_trace_header(m, iter);
4551                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4552                         print_lat_help_header(m);
4553         } else {
4554                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4556                                 print_func_help_header_irq(iter->array_buffer,
4557                                                            m, trace_flags);
4558                         else
4559                                 print_func_help_header(iter->array_buffer, m,
4560                                                        trace_flags);
4561                 }
4562         }
4563 }
4564
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567         if (!ftrace_is_dead())
4568                 return;
4569         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578                     "#                      Takes a snapshot of the main buffer.\n"
4579                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580                     "#                      (Doesn't have to be '2' works with any number that\n"
4581                     "#                       is not a '0' or '1')\n");
4582 }
4583
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592                     "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595                     "#                      (Doesn't have to be '2' works with any number that\n"
4596                     "#                       is not a '0' or '1')\n");
4597 }
4598
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601         if (iter->tr->allocated_snapshot)
4602                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603         else
4604                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605
4606         seq_puts(m, "# Snapshot commands:\n");
4607         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608                 show_snapshot_main_help(m);
4609         else
4610                 show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619         struct trace_iterator *iter = v;
4620         int ret;
4621
4622         if (iter->ent == NULL) {
4623                 if (iter->tr) {
4624                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625                         seq_puts(m, "#\n");
4626                         test_ftrace_alive(m);
4627                 }
4628                 if (iter->snapshot && trace_empty(iter))
4629                         print_snapshot_help(m, iter);
4630                 else if (iter->trace && iter->trace->print_header)
4631                         iter->trace->print_header(m);
4632                 else
4633                         trace_default_header(m);
4634
4635         } else if (iter->leftover) {
4636                 /*
4637                  * If we filled the seq_file buffer earlier, we
4638                  * want to just show it now.
4639                  */
4640                 ret = trace_print_seq(m, &iter->seq);
4641
4642                 /* ret should this time be zero, but you never know */
4643                 iter->leftover = ret;
4644
4645         } else {
4646                 print_trace_line(iter);
4647                 ret = trace_print_seq(m, &iter->seq);
4648                 /*
4649                  * If we overflow the seq_file buffer, then it will
4650                  * ask us for this data again at start up.
4651                  * Use that instead.
4652                  *  ret is 0 if seq_file write succeeded.
4653                  *        -1 otherwise.
4654                  */
4655                 iter->leftover = ret;
4656         }
4657
4658         return 0;
4659 }
4660
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667         if (inode->i_cdev) /* See trace_create_cpu_file() */
4668                 return (long)inode->i_cdev - 1;
4669         return RING_BUFFER_ALL_CPUS;
4670 }
4671
4672 static const struct seq_operations tracer_seq_ops = {
4673         .start          = s_start,
4674         .next           = s_next,
4675         .stop           = s_stop,
4676         .show           = s_show,
4677 };
4678
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682         struct trace_array *tr = inode->i_private;
4683         struct trace_iterator *iter;
4684         int cpu;
4685
4686         if (tracing_disabled)
4687                 return ERR_PTR(-ENODEV);
4688
4689         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690         if (!iter)
4691                 return ERR_PTR(-ENOMEM);
4692
4693         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694                                     GFP_KERNEL);
4695         if (!iter->buffer_iter)
4696                 goto release;
4697
4698         /*
4699          * trace_find_next_entry() may need to save off iter->ent.
4700          * It will place it into the iter->temp buffer. As most
4701          * events are less than 128, allocate a buffer of that size.
4702          * If one is greater, then trace_find_next_entry() will
4703          * allocate a new buffer to adjust for the bigger iter->ent.
4704          * It's not critical if it fails to get allocated here.
4705          */
4706         iter->temp = kmalloc(128, GFP_KERNEL);
4707         if (iter->temp)
4708                 iter->temp_size = 128;
4709
4710         /*
4711          * trace_event_printf() may need to modify given format
4712          * string to replace %p with %px so that it shows real address
4713          * instead of hash value. However, that is only for the event
4714          * tracing, other tracer may not need. Defer the allocation
4715          * until it is needed.
4716          */
4717         iter->fmt = NULL;
4718         iter->fmt_size = 0;
4719
4720         /*
4721          * We make a copy of the current tracer to avoid concurrent
4722          * changes on it while we are reading.
4723          */
4724         mutex_lock(&trace_types_lock);
4725         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726         if (!iter->trace)
4727                 goto fail;
4728
4729         *iter->trace = *tr->current_trace;
4730
4731         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732                 goto fail;
4733
4734         iter->tr = tr;
4735
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737         /* Currently only the top directory has a snapshot */
4738         if (tr->current_trace->print_max || snapshot)
4739                 iter->array_buffer = &tr->max_buffer;
4740         else
4741 #endif
4742                 iter->array_buffer = &tr->array_buffer;
4743         iter->snapshot = snapshot;
4744         iter->pos = -1;
4745         iter->cpu_file = tracing_get_cpu(inode);
4746         mutex_init(&iter->mutex);
4747
4748         /* Notify the tracer early; before we stop tracing. */
4749         if (iter->trace->open)
4750                 iter->trace->open(iter);
4751
4752         /* Annotate start of buffers if we had overruns */
4753         if (ring_buffer_overruns(iter->array_buffer->buffer))
4754                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755
4756         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757         if (trace_clocks[tr->clock_id].in_ns)
4758                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759
4760         /*
4761          * If pause-on-trace is enabled, then stop the trace while
4762          * dumping, unless this is the "snapshot" file
4763          */
4764         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765                 tracing_stop_tr(tr);
4766
4767         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768                 for_each_tracing_cpu(cpu) {
4769                         iter->buffer_iter[cpu] =
4770                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4771                                                          cpu, GFP_KERNEL);
4772                 }
4773                 ring_buffer_read_prepare_sync();
4774                 for_each_tracing_cpu(cpu) {
4775                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4776                         tracing_iter_reset(iter, cpu);
4777                 }
4778         } else {
4779                 cpu = iter->cpu_file;
4780                 iter->buffer_iter[cpu] =
4781                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                  cpu, GFP_KERNEL);
4783                 ring_buffer_read_prepare_sync();
4784                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4785                 tracing_iter_reset(iter, cpu);
4786         }
4787
4788         mutex_unlock(&trace_types_lock);
4789
4790         return iter;
4791
4792  fail:
4793         mutex_unlock(&trace_types_lock);
4794         kfree(iter->trace);
4795         kfree(iter->temp);
4796         kfree(iter->buffer_iter);
4797 release:
4798         seq_release_private(inode, file);
4799         return ERR_PTR(-ENOMEM);
4800 }
4801
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804         int ret;
4805
4806         ret = tracing_check_open_get_tr(NULL);
4807         if (ret)
4808                 return ret;
4809
4810         filp->private_data = inode->i_private;
4811         return 0;
4812 }
4813
4814 bool tracing_is_disabled(void)
4815 {
4816         return (tracing_disabled) ? true: false;
4817 }
4818
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825         struct trace_array *tr = inode->i_private;
4826         int ret;
4827
4828         ret = tracing_check_open_get_tr(tr);
4829         if (ret)
4830                 return ret;
4831
4832         filp->private_data = inode->i_private;
4833
4834         return 0;
4835 }
4836
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839         struct trace_array *tr = inode->i_private;
4840         struct seq_file *m = file->private_data;
4841         struct trace_iterator *iter;
4842         int cpu;
4843
4844         if (!(file->f_mode & FMODE_READ)) {
4845                 trace_array_put(tr);
4846                 return 0;
4847         }
4848
4849         /* Writes do not use seq_file */
4850         iter = m->private;
4851         mutex_lock(&trace_types_lock);
4852
4853         for_each_tracing_cpu(cpu) {
4854                 if (iter->buffer_iter[cpu])
4855                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856         }
4857
4858         if (iter->trace && iter->trace->close)
4859                 iter->trace->close(iter);
4860
4861         if (!iter->snapshot && tr->stop_count)
4862                 /* reenable tracing if it was previously enabled */
4863                 tracing_start_tr(tr);
4864
4865         __trace_array_put(tr);
4866
4867         mutex_unlock(&trace_types_lock);
4868
4869         mutex_destroy(&iter->mutex);
4870         free_cpumask_var(iter->started);
4871         kfree(iter->fmt);
4872         kfree(iter->temp);
4873         kfree(iter->trace);
4874         kfree(iter->buffer_iter);
4875         seq_release_private(inode, file);
4876
4877         return 0;
4878 }
4879
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882         struct trace_array *tr = inode->i_private;
4883
4884         trace_array_put(tr);
4885         return 0;
4886 }
4887
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890         struct trace_array *tr = inode->i_private;
4891
4892         trace_array_put(tr);
4893
4894         return single_release(inode, file);
4895 }
4896
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899         struct trace_array *tr = inode->i_private;
4900         struct trace_iterator *iter;
4901         int ret;
4902
4903         ret = tracing_check_open_get_tr(tr);
4904         if (ret)
4905                 return ret;
4906
4907         /* If this file was open for write, then erase contents */
4908         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909                 int cpu = tracing_get_cpu(inode);
4910                 struct array_buffer *trace_buf = &tr->array_buffer;
4911
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913                 if (tr->current_trace->print_max)
4914                         trace_buf = &tr->max_buffer;
4915 #endif
4916
4917                 if (cpu == RING_BUFFER_ALL_CPUS)
4918                         tracing_reset_online_cpus(trace_buf);
4919                 else
4920                         tracing_reset_cpu(trace_buf, cpu);
4921         }
4922
4923         if (file->f_mode & FMODE_READ) {
4924                 iter = __tracing_open(inode, file, false);
4925                 if (IS_ERR(iter))
4926                         ret = PTR_ERR(iter);
4927                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929         }
4930
4931         if (ret < 0)
4932                 trace_array_put(tr);
4933
4934         return ret;
4935 }
4936
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952         while (t && !trace_ok_for_array(t, tr))
4953                 t = t->next;
4954
4955         return t;
4956 }
4957
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961         struct trace_array *tr = m->private;
4962         struct tracer *t = v;
4963
4964         (*pos)++;
4965
4966         if (t)
4967                 t = get_tracer_for_array(tr, t->next);
4968
4969         return t;
4970 }
4971
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974         struct trace_array *tr = m->private;
4975         struct tracer *t;
4976         loff_t l = 0;
4977
4978         mutex_lock(&trace_types_lock);
4979
4980         t = get_tracer_for_array(tr, trace_types);
4981         for (; t && l < *pos; t = t_next(m, t, &l))
4982                         ;
4983
4984         return t;
4985 }
4986
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989         mutex_unlock(&trace_types_lock);
4990 }
4991
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994         struct tracer *t = v;
4995
4996         if (!t)
4997                 return 0;
4998
4999         seq_puts(m, t->name);
5000         if (t->next)
5001                 seq_putc(m, ' ');
5002         else
5003                 seq_putc(m, '\n');
5004
5005         return 0;
5006 }
5007
5008 static const struct seq_operations show_traces_seq_ops = {
5009         .start          = t_start,
5010         .next           = t_next,
5011         .stop           = t_stop,
5012         .show           = t_show,
5013 };
5014
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017         struct trace_array *tr = inode->i_private;
5018         struct seq_file *m;
5019         int ret;
5020
5021         ret = tracing_check_open_get_tr(tr);
5022         if (ret)
5023                 return ret;
5024
5025         ret = seq_open(file, &show_traces_seq_ops);
5026         if (ret) {
5027                 trace_array_put(tr);
5028                 return ret;
5029         }
5030
5031         m = file->private_data;
5032         m->private = tr;
5033
5034         return 0;
5035 }
5036
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039         struct trace_array *tr = inode->i_private;
5040
5041         trace_array_put(tr);
5042         return seq_release(inode, file);
5043 }
5044
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047                    size_t count, loff_t *ppos)
5048 {
5049         return count;
5050 }
5051
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054         int ret;
5055
5056         if (file->f_mode & FMODE_READ)
5057                 ret = seq_lseek(file, offset, whence);
5058         else
5059                 file->f_pos = ret = 0;
5060
5061         return ret;
5062 }
5063
5064 static const struct file_operations tracing_fops = {
5065         .open           = tracing_open,
5066         .read           = seq_read,
5067         .write          = tracing_write_stub,
5068         .llseek         = tracing_lseek,
5069         .release        = tracing_release,
5070 };
5071
5072 static const struct file_operations show_traces_fops = {
5073         .open           = show_traces_open,
5074         .read           = seq_read,
5075         .llseek         = seq_lseek,
5076         .release        = show_traces_release,
5077 };
5078
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081                      size_t count, loff_t *ppos)
5082 {
5083         struct trace_array *tr = file_inode(filp)->i_private;
5084         char *mask_str;
5085         int len;
5086
5087         len = snprintf(NULL, 0, "%*pb\n",
5088                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089         mask_str = kmalloc(len, GFP_KERNEL);
5090         if (!mask_str)
5091                 return -ENOMEM;
5092
5093         len = snprintf(mask_str, len, "%*pb\n",
5094                        cpumask_pr_args(tr->tracing_cpumask));
5095         if (len >= count) {
5096                 count = -EINVAL;
5097                 goto out_err;
5098         }
5099         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100
5101 out_err:
5102         kfree(mask_str);
5103
5104         return count;
5105 }
5106
5107 int tracing_set_cpumask(struct trace_array *tr,
5108                         cpumask_var_t tracing_cpumask_new)
5109 {
5110         int cpu;
5111
5112         if (!tr)
5113                 return -EINVAL;
5114
5115         local_irq_disable();
5116         arch_spin_lock(&tr->max_lock);
5117         for_each_tracing_cpu(cpu) {
5118                 /*
5119                  * Increase/decrease the disabled counter if we are
5120                  * about to flip a bit in the cpumask:
5121                  */
5122                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126                 }
5127                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131                 }
5132         }
5133         arch_spin_unlock(&tr->max_lock);
5134         local_irq_enable();
5135
5136         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137
5138         return 0;
5139 }
5140
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143                       size_t count, loff_t *ppos)
5144 {
5145         struct trace_array *tr = file_inode(filp)->i_private;
5146         cpumask_var_t tracing_cpumask_new;
5147         int err;
5148
5149         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150                 return -ENOMEM;
5151
5152         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153         if (err)
5154                 goto err_free;
5155
5156         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157         if (err)
5158                 goto err_free;
5159
5160         free_cpumask_var(tracing_cpumask_new);
5161
5162         return count;
5163
5164 err_free:
5165         free_cpumask_var(tracing_cpumask_new);
5166
5167         return err;
5168 }
5169
5170 static const struct file_operations tracing_cpumask_fops = {
5171         .open           = tracing_open_generic_tr,
5172         .read           = tracing_cpumask_read,
5173         .write          = tracing_cpumask_write,
5174         .release        = tracing_release_generic_tr,
5175         .llseek         = generic_file_llseek,
5176 };
5177
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180         struct tracer_opt *trace_opts;
5181         struct trace_array *tr = m->private;
5182         u32 tracer_flags;
5183         int i;
5184
5185         mutex_lock(&trace_types_lock);
5186         tracer_flags = tr->current_trace->flags->val;
5187         trace_opts = tr->current_trace->flags->opts;
5188
5189         for (i = 0; trace_options[i]; i++) {
5190                 if (tr->trace_flags & (1 << i))
5191                         seq_printf(m, "%s\n", trace_options[i]);
5192                 else
5193                         seq_printf(m, "no%s\n", trace_options[i]);
5194         }
5195
5196         for (i = 0; trace_opts[i].name; i++) {
5197                 if (tracer_flags & trace_opts[i].bit)
5198                         seq_printf(m, "%s\n", trace_opts[i].name);
5199                 else
5200                         seq_printf(m, "no%s\n", trace_opts[i].name);
5201         }
5202         mutex_unlock(&trace_types_lock);
5203
5204         return 0;
5205 }
5206
5207 static int __set_tracer_option(struct trace_array *tr,
5208                                struct tracer_flags *tracer_flags,
5209                                struct tracer_opt *opts, int neg)
5210 {
5211         struct tracer *trace = tracer_flags->trace;
5212         int ret;
5213
5214         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215         if (ret)
5216                 return ret;
5217
5218         if (neg)
5219                 tracer_flags->val &= ~opts->bit;
5220         else
5221                 tracer_flags->val |= opts->bit;
5222         return 0;
5223 }
5224
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228         struct tracer *trace = tr->current_trace;
5229         struct tracer_flags *tracer_flags = trace->flags;
5230         struct tracer_opt *opts = NULL;
5231         int i;
5232
5233         for (i = 0; tracer_flags->opts[i].name; i++) {
5234                 opts = &tracer_flags->opts[i];
5235
5236                 if (strcmp(cmp, opts->name) == 0)
5237                         return __set_tracer_option(tr, trace->flags, opts, neg);
5238         }
5239
5240         return -EINVAL;
5241 }
5242
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247                 return -1;
5248
5249         return 0;
5250 }
5251
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254         int *map;
5255
5256         if ((mask == TRACE_ITER_RECORD_TGID) ||
5257             (mask == TRACE_ITER_RECORD_CMD))
5258                 lockdep_assert_held(&event_mutex);
5259
5260         /* do nothing if flag is already set */
5261         if (!!(tr->trace_flags & mask) == !!enabled)
5262                 return 0;
5263
5264         /* Give the tracer a chance to approve the change */
5265         if (tr->current_trace->flag_changed)
5266                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267                         return -EINVAL;
5268
5269         if (enabled)
5270                 tr->trace_flags |= mask;
5271         else
5272                 tr->trace_flags &= ~mask;
5273
5274         if (mask == TRACE_ITER_RECORD_CMD)
5275                 trace_event_enable_cmd_record(enabled);
5276
5277         if (mask == TRACE_ITER_RECORD_TGID) {
5278                 if (!tgid_map) {
5279                         tgid_map_max = pid_max;
5280                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281                                        GFP_KERNEL);
5282
5283                         /*
5284                          * Pairs with smp_load_acquire() in
5285                          * trace_find_tgid_ptr() to ensure that if it observes
5286                          * the tgid_map we just allocated then it also observes
5287                          * the corresponding tgid_map_max value.
5288                          */
5289                         smp_store_release(&tgid_map, map);
5290                 }
5291                 if (!tgid_map) {
5292                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293                         return -ENOMEM;
5294                 }
5295
5296                 trace_event_enable_tgid_record(enabled);
5297         }
5298
5299         if (mask == TRACE_ITER_EVENT_FORK)
5300                 trace_event_follow_fork(tr, enabled);
5301
5302         if (mask == TRACE_ITER_FUNC_FORK)
5303                 ftrace_pid_follow_fork(tr, enabled);
5304
5305         if (mask == TRACE_ITER_OVERWRITE) {
5306                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310         }
5311
5312         if (mask == TRACE_ITER_PRINTK) {
5313                 trace_printk_start_stop_comm(enabled);
5314                 trace_printk_control(enabled);
5315         }
5316
5317         return 0;
5318 }
5319
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322         char *cmp;
5323         int neg = 0;
5324         int ret;
5325         size_t orig_len = strlen(option);
5326         int len;
5327
5328         cmp = strstrip(option);
5329
5330         len = str_has_prefix(cmp, "no");
5331         if (len)
5332                 neg = 1;
5333
5334         cmp += len;
5335
5336         mutex_lock(&event_mutex);
5337         mutex_lock(&trace_types_lock);
5338
5339         ret = match_string(trace_options, -1, cmp);
5340         /* If no option could be set, test the specific tracer options */
5341         if (ret < 0)
5342                 ret = set_tracer_option(tr, cmp, neg);
5343         else
5344                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5345
5346         mutex_unlock(&trace_types_lock);
5347         mutex_unlock(&event_mutex);
5348
5349         /*
5350          * If the first trailing whitespace is replaced with '\0' by strstrip,
5351          * turn it back into a space.
5352          */
5353         if (orig_len > strlen(option))
5354                 option[strlen(option)] = ' ';
5355
5356         return ret;
5357 }
5358
5359 static void __init apply_trace_boot_options(void)
5360 {
5361         char *buf = trace_boot_options_buf;
5362         char *option;
5363
5364         while (true) {
5365                 option = strsep(&buf, ",");
5366
5367                 if (!option)
5368                         break;
5369
5370                 if (*option)
5371                         trace_set_options(&global_trace, option);
5372
5373                 /* Put back the comma to allow this to be called again */
5374                 if (buf)
5375                         *(buf - 1) = ',';
5376         }
5377 }
5378
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381                         size_t cnt, loff_t *ppos)
5382 {
5383         struct seq_file *m = filp->private_data;
5384         struct trace_array *tr = m->private;
5385         char buf[64];
5386         int ret;
5387
5388         if (cnt >= sizeof(buf))
5389                 return -EINVAL;
5390
5391         if (copy_from_user(buf, ubuf, cnt))
5392                 return -EFAULT;
5393
5394         buf[cnt] = 0;
5395
5396         ret = trace_set_options(tr, buf);
5397         if (ret < 0)
5398                 return ret;
5399
5400         *ppos += cnt;
5401
5402         return cnt;
5403 }
5404
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407         struct trace_array *tr = inode->i_private;
5408         int ret;
5409
5410         ret = tracing_check_open_get_tr(tr);
5411         if (ret)
5412                 return ret;
5413
5414         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415         if (ret < 0)
5416                 trace_array_put(tr);
5417
5418         return ret;
5419 }
5420
5421 static const struct file_operations tracing_iter_fops = {
5422         .open           = tracing_trace_options_open,
5423         .read           = seq_read,
5424         .llseek         = seq_lseek,
5425         .release        = tracing_single_release_tr,
5426         .write          = tracing_trace_options_write,
5427 };
5428
5429 static const char readme_msg[] =
5430         "tracing mini-HOWTO:\n\n"
5431         "# echo 0 > tracing_on : quick way to disable tracing\n"
5432         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433         " Important files:\n"
5434         "  trace\t\t\t- The static contents of the buffer\n"
5435         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437         "  current_tracer\t- function and latency tracers\n"
5438         "  available_tracers\t- list of configured tracers for current_tracer\n"
5439         "  error_log\t- error log for failed commands (that support it)\n"
5440         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442         "  trace_clock\t\t-change the clock used to order events\n"
5443         "       local:   Per cpu clock but may not be synced across CPUs\n"
5444         "      global:   Synced across CPUs but slows tracing down.\n"
5445         "     counter:   Not a clock, but just an increment\n"
5446         "      uptime:   Jiffy counter from time of boot\n"
5447         "        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449         "     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452         "       delta:   Delta difference against a buffer-wide timestamp\n"
5453         "    absolute:   Absolute (standalone) timestamp\n"
5454         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5457         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458         "\t\t\t  Remove sub-buffer with rmdir\n"
5459         "  trace_options\t\t- Set format or modify how tracing happens\n"
5460         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5461         "\t\t\t  option name\n"
5462         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464         "\n  available_filter_functions - list of functions that can be filtered on\n"
5465         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466         "\t\t\t  functions\n"
5467         "\t     accepts: func_full_name or glob-matching-pattern\n"
5468         "\t     modules: Can select a group via module\n"
5469         "\t      Format: :mod:<module-name>\n"
5470         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471         "\t    triggers: a command to perform when function is hit\n"
5472         "\t      Format: <function>:<trigger>[:count]\n"
5473         "\t     trigger: traceon, traceoff\n"
5474         "\t\t      enable_event:<system>:<event>\n"
5475         "\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477         "\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480         "\t\t      snapshot\n"
5481 #endif
5482         "\t\t      dump\n"
5483         "\t\t      cpudump\n"
5484         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486         "\t     The first one will disable tracing every time do_fault is hit\n"
5487         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488         "\t       The first time do trap is hit and it disables tracing, the\n"
5489         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5490         "\t       the counter will not decrement. It only decrements when the\n"
5491         "\t       trigger did work\n"
5492         "\t     To remove trigger without count:\n"
5493         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494         "\t     To remove trigger with a count:\n"
5495         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498         "\t    modules: Can select a group via module command :mod:\n"
5499         "\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503         "\t\t    (function)\n"
5504         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505         "\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514         "\t\t\t  snapshot buffer. Read the contents for more\n"
5515         "\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518         "  stack_trace\t\t- Shows the max stack trace when active\n"
5519         "  stack_max_size\t- Shows current max stack size that was traced\n"
5520         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5521         "\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524         "\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540         "\t  accepts: event-definitions (one definition per line)\n"
5541         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5547         "\t           -:[<group>/]<event>\n"
5548 #ifdef CONFIG_KPROBE_EVENTS
5549         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5550   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5551 #endif
5552 #ifdef CONFIG_UPROBE_EVENTS
5553   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5554 #endif
5555         "\t     args: <name>=fetcharg[:type]\n"
5556         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5557 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5558         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5559 #else
5560         "\t           $stack<index>, $stack, $retval, $comm,\n"
5561 #endif
5562         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5563         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5564         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5565         "\t           <type>\\[<array-size>\\]\n"
5566 #ifdef CONFIG_HIST_TRIGGERS
5567         "\t    field: <stype> <name>;\n"
5568         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5569         "\t           [unsigned] char/int/long\n"
5570 #endif
5571         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5572         "\t            of the <attached-group>/<attached-event>.\n"
5573 #endif
5574         "  events/\t\t- Directory containing all trace event subsystems:\n"
5575         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5576         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5577         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5578         "\t\t\t  events\n"
5579         "      filter\t\t- If set, only events passing filter are traced\n"
5580         "  events/<system>/<event>/\t- Directory containing control files for\n"
5581         "\t\t\t  <event>:\n"
5582         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5583         "      filter\t\t- If set, only events passing filter are traced\n"
5584         "      trigger\t\t- If set, a command to perform when event is hit\n"
5585         "\t    Format: <trigger>[:count][if <filter>]\n"
5586         "\t   trigger: traceon, traceoff\n"
5587         "\t            enable_event:<system>:<event>\n"
5588         "\t            disable_event:<system>:<event>\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590         "\t            enable_hist:<system>:<event>\n"
5591         "\t            disable_hist:<system>:<event>\n"
5592 #endif
5593 #ifdef CONFIG_STACKTRACE
5594         "\t\t    stacktrace\n"
5595 #endif
5596 #ifdef CONFIG_TRACER_SNAPSHOT
5597         "\t\t    snapshot\n"
5598 #endif
5599 #ifdef CONFIG_HIST_TRIGGERS
5600         "\t\t    hist (see below)\n"
5601 #endif
5602         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5603         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5604         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5605         "\t                  events/block/block_unplug/trigger\n"
5606         "\t   The first disables tracing every time block_unplug is hit.\n"
5607         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5608         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5609         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5610         "\t   Like function triggers, the counter is only decremented if it\n"
5611         "\t    enabled or disabled tracing.\n"
5612         "\t   To remove a trigger without a count:\n"
5613         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5614         "\t   To remove a trigger with a count:\n"
5615         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5616         "\t   Filters can be ignored when removing a trigger.\n"
5617 #ifdef CONFIG_HIST_TRIGGERS
5618         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5619         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5620         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5621         "\t            [:values=<field1[,field2,...]>]\n"
5622         "\t            [:sort=<field1[,field2,...]>]\n"
5623         "\t            [:size=#entries]\n"
5624         "\t            [:pause][:continue][:clear]\n"
5625         "\t            [:name=histname1]\n"
5626         "\t            [:<handler>.<action>]\n"
5627         "\t            [if <filter>]\n\n"
5628         "\t    Note, special fields can be used as well:\n"
5629         "\t            common_timestamp - to record current timestamp\n"
5630         "\t            common_cpu - to record the CPU the event happened on\n"
5631         "\n"
5632         "\t    A hist trigger variable can be:\n"
5633         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5634         "\t        - a reference to another variable e.g. y=$x,\n"
5635         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5636         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5637         "\n"
5638         "\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5639         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5640         "\t    variable reference, field or numeric literal.\n"
5641         "\n"
5642         "\t    When a matching event is hit, an entry is added to a hash\n"
5643         "\t    table using the key(s) and value(s) named, and the value of a\n"
5644         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5645         "\t    correspond to fields in the event's format description.  Keys\n"
5646         "\t    can be any field, or the special string 'stacktrace'.\n"
5647         "\t    Compound keys consisting of up to two fields can be specified\n"
5648         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5649         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5650         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5651         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5652         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5653         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5654         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5655         "\t    its histogram data will be shared with other triggers of the\n"
5656         "\t    same name, and trigger hits will update this common data.\n\n"
5657         "\t    Reading the 'hist' file for the event will dump the hash\n"
5658         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5659         "\t    triggers attached to an event, there will be a table for each\n"
5660         "\t    trigger in the output.  The table displayed for a named\n"
5661         "\t    trigger will be the same as any other instance having the\n"
5662         "\t    same name.  The default format used to display a given field\n"
5663         "\t    can be modified by appending any of the following modifiers\n"
5664         "\t    to the field name, as applicable:\n\n"
5665         "\t            .hex        display a number as a hex value\n"
5666         "\t            .sym        display an address as a symbol\n"
5667         "\t            .sym-offset display an address as a symbol and offset\n"
5668         "\t            .execname   display a common_pid as a program name\n"
5669         "\t            .syscall    display a syscall id as a syscall name\n"
5670         "\t            .log2       display log2 value rather than raw number\n"
5671         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5672         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5673         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5674         "\t    trigger or to start a hist trigger but not log any events\n"
5675         "\t    until told to do so.  'continue' can be used to start or\n"
5676         "\t    restart a paused hist trigger.\n\n"
5677         "\t    The 'clear' parameter will clear the contents of a running\n"
5678         "\t    hist trigger and leave its current paused/active state\n"
5679         "\t    unchanged.\n\n"
5680         "\t    The enable_hist and disable_hist triggers can be used to\n"
5681         "\t    have one event conditionally start and stop another event's\n"
5682         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5683         "\t    the enable_event and disable_event triggers.\n\n"
5684         "\t    Hist trigger handlers and actions are executed whenever a\n"
5685         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5686         "\t        <handler>.<action>\n\n"
5687         "\t    The available handlers are:\n\n"
5688         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5689         "\t        onmax(var)               - invoke if var exceeds current max\n"
5690         "\t        onchange(var)            - invoke action if var changes\n\n"
5691         "\t    The available actions are:\n\n"
5692         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5693         "\t        save(field,...)                      - save current event fields\n"
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5696 #endif
5697 #ifdef CONFIG_SYNTH_EVENTS
5698         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5699         "\t  Write into this file to define/undefine new synthetic events.\n"
5700         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5701 #endif
5702 #endif
5703 ;
5704
5705 static ssize_t
5706 tracing_readme_read(struct file *filp, char __user *ubuf,
5707                        size_t cnt, loff_t *ppos)
5708 {
5709         return simple_read_from_buffer(ubuf, cnt, ppos,
5710                                         readme_msg, strlen(readme_msg));
5711 }
5712
5713 static const struct file_operations tracing_readme_fops = {
5714         .open           = tracing_open_generic,
5715         .read           = tracing_readme_read,
5716         .llseek         = generic_file_llseek,
5717 };
5718
5719 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5720 {
5721         int pid = ++(*pos);
5722
5723         return trace_find_tgid_ptr(pid);
5724 }
5725
5726 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5727 {
5728         int pid = *pos;
5729
5730         return trace_find_tgid_ptr(pid);
5731 }
5732
5733 static void saved_tgids_stop(struct seq_file *m, void *v)
5734 {
5735 }
5736
5737 static int saved_tgids_show(struct seq_file *m, void *v)
5738 {
5739         int *entry = (int *)v;
5740         int pid = entry - tgid_map;
5741         int tgid = *entry;
5742
5743         if (tgid == 0)
5744                 return SEQ_SKIP;
5745
5746         seq_printf(m, "%d %d\n", pid, tgid);
5747         return 0;
5748 }
5749
5750 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5751         .start          = saved_tgids_start,
5752         .stop           = saved_tgids_stop,
5753         .next           = saved_tgids_next,
5754         .show           = saved_tgids_show,
5755 };
5756
5757 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5758 {
5759         int ret;
5760
5761         ret = tracing_check_open_get_tr(NULL);
5762         if (ret)
5763                 return ret;
5764
5765         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5766 }
5767
5768
5769 static const struct file_operations tracing_saved_tgids_fops = {
5770         .open           = tracing_saved_tgids_open,
5771         .read           = seq_read,
5772         .llseek         = seq_lseek,
5773         .release        = seq_release,
5774 };
5775
5776 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5777 {
5778         unsigned int *ptr = v;
5779
5780         if (*pos || m->count)
5781                 ptr++;
5782
5783         (*pos)++;
5784
5785         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5786              ptr++) {
5787                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5788                         continue;
5789
5790                 return ptr;
5791         }
5792
5793         return NULL;
5794 }
5795
5796 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5797 {
5798         void *v;
5799         loff_t l = 0;
5800
5801         preempt_disable();
5802         arch_spin_lock(&trace_cmdline_lock);
5803
5804         v = &savedcmd->map_cmdline_to_pid[0];
5805         while (l <= *pos) {
5806                 v = saved_cmdlines_next(m, v, &l);
5807                 if (!v)
5808                         return NULL;
5809         }
5810
5811         return v;
5812 }
5813
5814 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5815 {
5816         arch_spin_unlock(&trace_cmdline_lock);
5817         preempt_enable();
5818 }
5819
5820 static int saved_cmdlines_show(struct seq_file *m, void *v)
5821 {
5822         char buf[TASK_COMM_LEN];
5823         unsigned int *pid = v;
5824
5825         __trace_find_cmdline(*pid, buf);
5826         seq_printf(m, "%d %s\n", *pid, buf);
5827         return 0;
5828 }
5829
5830 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5831         .start          = saved_cmdlines_start,
5832         .next           = saved_cmdlines_next,
5833         .stop           = saved_cmdlines_stop,
5834         .show           = saved_cmdlines_show,
5835 };
5836
5837 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5838 {
5839         int ret;
5840
5841         ret = tracing_check_open_get_tr(NULL);
5842         if (ret)
5843                 return ret;
5844
5845         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5846 }
5847
5848 static const struct file_operations tracing_saved_cmdlines_fops = {
5849         .open           = tracing_saved_cmdlines_open,
5850         .read           = seq_read,
5851         .llseek         = seq_lseek,
5852         .release        = seq_release,
5853 };
5854
5855 static ssize_t
5856 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5857                                  size_t cnt, loff_t *ppos)
5858 {
5859         char buf[64];
5860         int r;
5861
5862         arch_spin_lock(&trace_cmdline_lock);
5863         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5864         arch_spin_unlock(&trace_cmdline_lock);
5865
5866         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5867 }
5868
5869 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5870 {
5871         kfree(s->saved_cmdlines);
5872         kfree(s->map_cmdline_to_pid);
5873         kfree(s);
5874 }
5875
5876 static int tracing_resize_saved_cmdlines(unsigned int val)
5877 {
5878         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5879
5880         s = kmalloc(sizeof(*s), GFP_KERNEL);
5881         if (!s)
5882                 return -ENOMEM;
5883
5884         if (allocate_cmdlines_buffer(val, s) < 0) {
5885                 kfree(s);
5886                 return -ENOMEM;
5887         }
5888
5889         arch_spin_lock(&trace_cmdline_lock);
5890         savedcmd_temp = savedcmd;
5891         savedcmd = s;
5892         arch_spin_unlock(&trace_cmdline_lock);
5893         free_saved_cmdlines_buffer(savedcmd_temp);
5894
5895         return 0;
5896 }
5897
5898 static ssize_t
5899 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5900                                   size_t cnt, loff_t *ppos)
5901 {
5902         unsigned long val;
5903         int ret;
5904
5905         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5906         if (ret)
5907                 return ret;
5908
5909         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5910         if (!val || val > PID_MAX_DEFAULT)
5911                 return -EINVAL;
5912
5913         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5914         if (ret < 0)
5915                 return ret;
5916
5917         *ppos += cnt;
5918
5919         return cnt;
5920 }
5921
5922 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5923         .open           = tracing_open_generic,
5924         .read           = tracing_saved_cmdlines_size_read,
5925         .write          = tracing_saved_cmdlines_size_write,
5926 };
5927
5928 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5929 static union trace_eval_map_item *
5930 update_eval_map(union trace_eval_map_item *ptr)
5931 {
5932         if (!ptr->map.eval_string) {
5933                 if (ptr->tail.next) {
5934                         ptr = ptr->tail.next;
5935                         /* Set ptr to the next real item (skip head) */
5936                         ptr++;
5937                 } else
5938                         return NULL;
5939         }
5940         return ptr;
5941 }
5942
5943 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5944 {
5945         union trace_eval_map_item *ptr = v;
5946
5947         /*
5948          * Paranoid! If ptr points to end, we don't want to increment past it.
5949          * This really should never happen.
5950          */
5951         (*pos)++;
5952         ptr = update_eval_map(ptr);
5953         if (WARN_ON_ONCE(!ptr))
5954                 return NULL;
5955
5956         ptr++;
5957         ptr = update_eval_map(ptr);
5958
5959         return ptr;
5960 }
5961
5962 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5963 {
5964         union trace_eval_map_item *v;
5965         loff_t l = 0;
5966
5967         mutex_lock(&trace_eval_mutex);
5968
5969         v = trace_eval_maps;
5970         if (v)
5971                 v++;
5972
5973         while (v && l < *pos) {
5974                 v = eval_map_next(m, v, &l);
5975         }
5976
5977         return v;
5978 }
5979
5980 static void eval_map_stop(struct seq_file *m, void *v)
5981 {
5982         mutex_unlock(&trace_eval_mutex);
5983 }
5984
5985 static int eval_map_show(struct seq_file *m, void *v)
5986 {
5987         union trace_eval_map_item *ptr = v;
5988
5989         seq_printf(m, "%s %ld (%s)\n",
5990                    ptr->map.eval_string, ptr->map.eval_value,
5991                    ptr->map.system);
5992
5993         return 0;
5994 }
5995
5996 static const struct seq_operations tracing_eval_map_seq_ops = {
5997         .start          = eval_map_start,
5998         .next           = eval_map_next,
5999         .stop           = eval_map_stop,
6000         .show           = eval_map_show,
6001 };
6002
6003 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6004 {
6005         int ret;
6006
6007         ret = tracing_check_open_get_tr(NULL);
6008         if (ret)
6009                 return ret;
6010
6011         return seq_open(filp, &tracing_eval_map_seq_ops);
6012 }
6013
6014 static const struct file_operations tracing_eval_map_fops = {
6015         .open           = tracing_eval_map_open,
6016         .read           = seq_read,
6017         .llseek         = seq_lseek,
6018         .release        = seq_release,
6019 };
6020
6021 static inline union trace_eval_map_item *
6022 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6023 {
6024         /* Return tail of array given the head */
6025         return ptr + ptr->head.length + 1;
6026 }
6027
6028 static void
6029 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6030                            int len)
6031 {
6032         struct trace_eval_map **stop;
6033         struct trace_eval_map **map;
6034         union trace_eval_map_item *map_array;
6035         union trace_eval_map_item *ptr;
6036
6037         stop = start + len;
6038
6039         /*
6040          * The trace_eval_maps contains the map plus a head and tail item,
6041          * where the head holds the module and length of array, and the
6042          * tail holds a pointer to the next list.
6043          */
6044         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6045         if (!map_array) {
6046                 pr_warn("Unable to allocate trace eval mapping\n");
6047                 return;
6048         }
6049
6050         mutex_lock(&trace_eval_mutex);
6051
6052         if (!trace_eval_maps)
6053                 trace_eval_maps = map_array;
6054         else {
6055                 ptr = trace_eval_maps;
6056                 for (;;) {
6057                         ptr = trace_eval_jmp_to_tail(ptr);
6058                         if (!ptr->tail.next)
6059                                 break;
6060                         ptr = ptr->tail.next;
6061
6062                 }
6063                 ptr->tail.next = map_array;
6064         }
6065         map_array->head.mod = mod;
6066         map_array->head.length = len;
6067         map_array++;
6068
6069         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6070                 map_array->map = **map;
6071                 map_array++;
6072         }
6073         memset(map_array, 0, sizeof(*map_array));
6074
6075         mutex_unlock(&trace_eval_mutex);
6076 }
6077
6078 static void trace_create_eval_file(struct dentry *d_tracer)
6079 {
6080         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6081                           NULL, &tracing_eval_map_fops);
6082 }
6083
6084 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6085 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6086 static inline void trace_insert_eval_map_file(struct module *mod,
6087                               struct trace_eval_map **start, int len) { }
6088 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6089
6090 static void trace_insert_eval_map(struct module *mod,
6091                                   struct trace_eval_map **start, int len)
6092 {
6093         struct trace_eval_map **map;
6094
6095         if (len <= 0)
6096                 return;
6097
6098         map = start;
6099
6100         trace_event_eval_update(map, len);
6101
6102         trace_insert_eval_map_file(mod, start, len);
6103 }
6104
6105 static ssize_t
6106 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6107                        size_t cnt, loff_t *ppos)
6108 {
6109         struct trace_array *tr = filp->private_data;
6110         char buf[MAX_TRACER_SIZE+2];
6111         int r;
6112
6113         mutex_lock(&trace_types_lock);
6114         r = sprintf(buf, "%s\n", tr->current_trace->name);
6115         mutex_unlock(&trace_types_lock);
6116
6117         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118 }
6119
6120 int tracer_init(struct tracer *t, struct trace_array *tr)
6121 {
6122         tracing_reset_online_cpus(&tr->array_buffer);
6123         return t->init(tr);
6124 }
6125
6126 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6127 {
6128         int cpu;
6129
6130         for_each_tracing_cpu(cpu)
6131                 per_cpu_ptr(buf->data, cpu)->entries = val;
6132 }
6133
6134 #ifdef CONFIG_TRACER_MAX_TRACE
6135 /* resize @tr's buffer to the size of @size_tr's entries */
6136 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6137                                         struct array_buffer *size_buf, int cpu_id)
6138 {
6139         int cpu, ret = 0;
6140
6141         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6142                 for_each_tracing_cpu(cpu) {
6143                         ret = ring_buffer_resize(trace_buf->buffer,
6144                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6145                         if (ret < 0)
6146                                 break;
6147                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6148                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6149                 }
6150         } else {
6151                 ret = ring_buffer_resize(trace_buf->buffer,
6152                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6153                 if (ret == 0)
6154                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6155                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6156         }
6157
6158         return ret;
6159 }
6160 #endif /* CONFIG_TRACER_MAX_TRACE */
6161
6162 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6163                                         unsigned long size, int cpu)
6164 {
6165         int ret;
6166
6167         /*
6168          * If kernel or user changes the size of the ring buffer
6169          * we use the size that was given, and we can forget about
6170          * expanding it later.
6171          */
6172         ring_buffer_expanded = true;
6173
6174         /* May be called before buffers are initialized */
6175         if (!tr->array_buffer.buffer)
6176                 return 0;
6177
6178         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6179         if (ret < 0)
6180                 return ret;
6181
6182 #ifdef CONFIG_TRACER_MAX_TRACE
6183         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6184             !tr->current_trace->use_max_tr)
6185                 goto out;
6186
6187         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6188         if (ret < 0) {
6189                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6190                                                      &tr->array_buffer, cpu);
6191                 if (r < 0) {
6192                         /*
6193                          * AARGH! We are left with different
6194                          * size max buffer!!!!
6195                          * The max buffer is our "snapshot" buffer.
6196                          * When a tracer needs a snapshot (one of the
6197                          * latency tracers), it swaps the max buffer
6198                          * with the saved snap shot. We succeeded to
6199                          * update the size of the main buffer, but failed to
6200                          * update the size of the max buffer. But when we tried
6201                          * to reset the main buffer to the original size, we
6202                          * failed there too. This is very unlikely to
6203                          * happen, but if it does, warn and kill all
6204                          * tracing.
6205                          */
6206                         WARN_ON(1);
6207                         tracing_disabled = 1;
6208                 }
6209                 return ret;
6210         }
6211
6212         if (cpu == RING_BUFFER_ALL_CPUS)
6213                 set_buffer_entries(&tr->max_buffer, size);
6214         else
6215                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6216
6217  out:
6218 #endif /* CONFIG_TRACER_MAX_TRACE */
6219
6220         if (cpu == RING_BUFFER_ALL_CPUS)
6221                 set_buffer_entries(&tr->array_buffer, size);
6222         else
6223                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6224
6225         return ret;
6226 }
6227
6228 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6229                                   unsigned long size, int cpu_id)
6230 {
6231         int ret;
6232
6233         mutex_lock(&trace_types_lock);
6234
6235         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6236                 /* make sure, this cpu is enabled in the mask */
6237                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6238                         ret = -EINVAL;
6239                         goto out;
6240                 }
6241         }
6242
6243         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6244         if (ret < 0)
6245                 ret = -ENOMEM;
6246
6247 out:
6248         mutex_unlock(&trace_types_lock);
6249
6250         return ret;
6251 }
6252
6253
6254 /**
6255  * tracing_update_buffers - used by tracing facility to expand ring buffers
6256  *
6257  * To save on memory when the tracing is never used on a system with it
6258  * configured in. The ring buffers are set to a minimum size. But once
6259  * a user starts to use the tracing facility, then they need to grow
6260  * to their default size.
6261  *
6262  * This function is to be called when a tracer is about to be used.
6263  */
6264 int tracing_update_buffers(void)
6265 {
6266         int ret = 0;
6267
6268         mutex_lock(&trace_types_lock);
6269         if (!ring_buffer_expanded)
6270                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6271                                                 RING_BUFFER_ALL_CPUS);
6272         mutex_unlock(&trace_types_lock);
6273
6274         return ret;
6275 }
6276
6277 struct trace_option_dentry;
6278
6279 static void
6280 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6281
6282 /*
6283  * Used to clear out the tracer before deletion of an instance.
6284  * Must have trace_types_lock held.
6285  */
6286 static void tracing_set_nop(struct trace_array *tr)
6287 {
6288         if (tr->current_trace == &nop_trace)
6289                 return;
6290         
6291         tr->current_trace->enabled--;
6292
6293         if (tr->current_trace->reset)
6294                 tr->current_trace->reset(tr);
6295
6296         tr->current_trace = &nop_trace;
6297 }
6298
6299 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6300 {
6301         /* Only enable if the directory has been created already. */
6302         if (!tr->dir)
6303                 return;
6304
6305         create_trace_option_files(tr, t);
6306 }
6307
6308 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6309 {
6310         struct tracer *t;
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312         bool had_max_tr;
6313 #endif
6314         int ret = 0;
6315
6316         mutex_lock(&trace_types_lock);
6317
6318         if (!ring_buffer_expanded) {
6319                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6320                                                 RING_BUFFER_ALL_CPUS);
6321                 if (ret < 0)
6322                         goto out;
6323                 ret = 0;
6324         }
6325
6326         for (t = trace_types; t; t = t->next) {
6327                 if (strcmp(t->name, buf) == 0)
6328                         break;
6329         }
6330         if (!t) {
6331                 ret = -EINVAL;
6332                 goto out;
6333         }
6334         if (t == tr->current_trace)
6335                 goto out;
6336
6337 #ifdef CONFIG_TRACER_SNAPSHOT
6338         if (t->use_max_tr) {
6339                 arch_spin_lock(&tr->max_lock);
6340                 if (tr->cond_snapshot)
6341                         ret = -EBUSY;
6342                 arch_spin_unlock(&tr->max_lock);
6343                 if (ret)
6344                         goto out;
6345         }
6346 #endif
6347         /* Some tracers won't work on kernel command line */
6348         if (system_state < SYSTEM_RUNNING && t->noboot) {
6349                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6350                         t->name);
6351                 goto out;
6352         }
6353
6354         /* Some tracers are only allowed for the top level buffer */
6355         if (!trace_ok_for_array(t, tr)) {
6356                 ret = -EINVAL;
6357                 goto out;
6358         }
6359
6360         /* If trace pipe files are being read, we can't change the tracer */
6361         if (tr->trace_ref) {
6362                 ret = -EBUSY;
6363                 goto out;
6364         }
6365
6366         trace_branch_disable();
6367
6368         tr->current_trace->enabled--;
6369
6370         if (tr->current_trace->reset)
6371                 tr->current_trace->reset(tr);
6372
6373         /* Current trace needs to be nop_trace before synchronize_rcu */
6374         tr->current_trace = &nop_trace;
6375
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377         had_max_tr = tr->allocated_snapshot;
6378
6379         if (had_max_tr && !t->use_max_tr) {
6380                 /*
6381                  * We need to make sure that the update_max_tr sees that
6382                  * current_trace changed to nop_trace to keep it from
6383                  * swapping the buffers after we resize it.
6384                  * The update_max_tr is called from interrupts disabled
6385                  * so a synchronized_sched() is sufficient.
6386                  */
6387                 synchronize_rcu();
6388                 free_snapshot(tr);
6389         }
6390 #endif
6391
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393         if (t->use_max_tr && !had_max_tr) {
6394                 ret = tracing_alloc_snapshot_instance(tr);
6395                 if (ret < 0)
6396                         goto out;
6397         }
6398 #endif
6399
6400         if (t->init) {
6401                 ret = tracer_init(t, tr);
6402                 if (ret)
6403                         goto out;
6404         }
6405
6406         tr->current_trace = t;
6407         tr->current_trace->enabled++;
6408         trace_branch_enable(tr);
6409  out:
6410         mutex_unlock(&trace_types_lock);
6411
6412         return ret;
6413 }
6414
6415 static ssize_t
6416 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6417                         size_t cnt, loff_t *ppos)
6418 {
6419         struct trace_array *tr = filp->private_data;
6420         char buf[MAX_TRACER_SIZE+1];
6421         int i;
6422         size_t ret;
6423         int err;
6424
6425         ret = cnt;
6426
6427         if (cnt > MAX_TRACER_SIZE)
6428                 cnt = MAX_TRACER_SIZE;
6429
6430         if (copy_from_user(buf, ubuf, cnt))
6431                 return -EFAULT;
6432
6433         buf[cnt] = 0;
6434
6435         /* strip ending whitespace. */
6436         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6437                 buf[i] = 0;
6438
6439         err = tracing_set_tracer(tr, buf);
6440         if (err)
6441                 return err;
6442
6443         *ppos += ret;
6444
6445         return ret;
6446 }
6447
6448 static ssize_t
6449 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6450                    size_t cnt, loff_t *ppos)
6451 {
6452         char buf[64];
6453         int r;
6454
6455         r = snprintf(buf, sizeof(buf), "%ld\n",
6456                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6457         if (r > sizeof(buf))
6458                 r = sizeof(buf);
6459         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6460 }
6461
6462 static ssize_t
6463 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6464                     size_t cnt, loff_t *ppos)
6465 {
6466         unsigned long val;
6467         int ret;
6468
6469         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6470         if (ret)
6471                 return ret;
6472
6473         *ptr = val * 1000;
6474
6475         return cnt;
6476 }
6477
6478 static ssize_t
6479 tracing_thresh_read(struct file *filp, char __user *ubuf,
6480                     size_t cnt, loff_t *ppos)
6481 {
6482         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6483 }
6484
6485 static ssize_t
6486 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6487                      size_t cnt, loff_t *ppos)
6488 {
6489         struct trace_array *tr = filp->private_data;
6490         int ret;
6491
6492         mutex_lock(&trace_types_lock);
6493         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6494         if (ret < 0)
6495                 goto out;
6496
6497         if (tr->current_trace->update_thresh) {
6498                 ret = tr->current_trace->update_thresh(tr);
6499                 if (ret < 0)
6500                         goto out;
6501         }
6502
6503         ret = cnt;
6504 out:
6505         mutex_unlock(&trace_types_lock);
6506
6507         return ret;
6508 }
6509
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511
6512 static ssize_t
6513 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6514                      size_t cnt, loff_t *ppos)
6515 {
6516         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6517 }
6518
6519 static ssize_t
6520 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521                       size_t cnt, loff_t *ppos)
6522 {
6523         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6524 }
6525
6526 #endif
6527
6528 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6529 {
6530         struct trace_array *tr = inode->i_private;
6531         struct trace_iterator *iter;
6532         int ret;
6533
6534         ret = tracing_check_open_get_tr(tr);
6535         if (ret)
6536                 return ret;
6537
6538         mutex_lock(&trace_types_lock);
6539
6540         /* create a buffer to store the information to pass to userspace */
6541         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6542         if (!iter) {
6543                 ret = -ENOMEM;
6544                 __trace_array_put(tr);
6545                 goto out;
6546         }
6547
6548         trace_seq_init(&iter->seq);
6549         iter->trace = tr->current_trace;
6550
6551         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6552                 ret = -ENOMEM;
6553                 goto fail;
6554         }
6555
6556         /* trace pipe does not show start of buffer */
6557         cpumask_setall(iter->started);
6558
6559         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6560                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6561
6562         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6563         if (trace_clocks[tr->clock_id].in_ns)
6564                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6565
6566         iter->tr = tr;
6567         iter->array_buffer = &tr->array_buffer;
6568         iter->cpu_file = tracing_get_cpu(inode);
6569         mutex_init(&iter->mutex);
6570         filp->private_data = iter;
6571
6572         if (iter->trace->pipe_open)
6573                 iter->trace->pipe_open(iter);
6574
6575         nonseekable_open(inode, filp);
6576
6577         tr->trace_ref++;
6578 out:
6579         mutex_unlock(&trace_types_lock);
6580         return ret;
6581
6582 fail:
6583         kfree(iter);
6584         __trace_array_put(tr);
6585         mutex_unlock(&trace_types_lock);
6586         return ret;
6587 }
6588
6589 static int tracing_release_pipe(struct inode *inode, struct file *file)
6590 {
6591         struct trace_iterator *iter = file->private_data;
6592         struct trace_array *tr = inode->i_private;
6593
6594         mutex_lock(&trace_types_lock);
6595
6596         tr->trace_ref--;
6597
6598         if (iter->trace->pipe_close)
6599                 iter->trace->pipe_close(iter);
6600
6601         mutex_unlock(&trace_types_lock);
6602
6603         free_cpumask_var(iter->started);
6604         mutex_destroy(&iter->mutex);
6605         kfree(iter);
6606
6607         trace_array_put(tr);
6608
6609         return 0;
6610 }
6611
6612 static __poll_t
6613 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6614 {
6615         struct trace_array *tr = iter->tr;
6616
6617         /* Iterators are static, they should be filled or empty */
6618         if (trace_buffer_iter(iter, iter->cpu_file))
6619                 return EPOLLIN | EPOLLRDNORM;
6620
6621         if (tr->trace_flags & TRACE_ITER_BLOCK)
6622                 /*
6623                  * Always select as readable when in blocking mode
6624                  */
6625                 return EPOLLIN | EPOLLRDNORM;
6626         else
6627                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6628                                              filp, poll_table);
6629 }
6630
6631 static __poll_t
6632 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6633 {
6634         struct trace_iterator *iter = filp->private_data;
6635
6636         return trace_poll(iter, filp, poll_table);
6637 }
6638
6639 /* Must be called with iter->mutex held. */
6640 static int tracing_wait_pipe(struct file *filp)
6641 {
6642         struct trace_iterator *iter = filp->private_data;
6643         int ret;
6644
6645         while (trace_empty(iter)) {
6646
6647                 if ((filp->f_flags & O_NONBLOCK)) {
6648                         return -EAGAIN;
6649                 }
6650
6651                 /*
6652                  * We block until we read something and tracing is disabled.
6653                  * We still block if tracing is disabled, but we have never
6654                  * read anything. This allows a user to cat this file, and
6655                  * then enable tracing. But after we have read something,
6656                  * we give an EOF when tracing is again disabled.
6657                  *
6658                  * iter->pos will be 0 if we haven't read anything.
6659                  */
6660                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6661                         break;
6662
6663                 mutex_unlock(&iter->mutex);
6664
6665                 ret = wait_on_pipe(iter, 0);
6666
6667                 mutex_lock(&iter->mutex);
6668
6669                 if (ret)
6670                         return ret;
6671         }
6672
6673         return 1;
6674 }
6675
6676 /*
6677  * Consumer reader.
6678  */
6679 static ssize_t
6680 tracing_read_pipe(struct file *filp, char __user *ubuf,
6681                   size_t cnt, loff_t *ppos)
6682 {
6683         struct trace_iterator *iter = filp->private_data;
6684         ssize_t sret;
6685
6686         /*
6687          * Avoid more than one consumer on a single file descriptor
6688          * This is just a matter of traces coherency, the ring buffer itself
6689          * is protected.
6690          */
6691         mutex_lock(&iter->mutex);
6692
6693         /* return any leftover data */
6694         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6695         if (sret != -EBUSY)
6696                 goto out;
6697
6698         trace_seq_init(&iter->seq);
6699
6700         if (iter->trace->read) {
6701                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6702                 if (sret)
6703                         goto out;
6704         }
6705
6706 waitagain:
6707         sret = tracing_wait_pipe(filp);
6708         if (sret <= 0)
6709                 goto out;
6710
6711         /* stop when tracing is finished */
6712         if (trace_empty(iter)) {
6713                 sret = 0;
6714                 goto out;
6715         }
6716
6717         if (cnt >= PAGE_SIZE)
6718                 cnt = PAGE_SIZE - 1;
6719
6720         /* reset all but tr, trace, and overruns */
6721         memset_startat(iter, 0, seq);
6722         cpumask_clear(iter->started);
6723         trace_seq_init(&iter->seq);
6724         iter->pos = -1;
6725
6726         trace_event_read_lock();
6727         trace_access_lock(iter->cpu_file);
6728         while (trace_find_next_entry_inc(iter) != NULL) {
6729                 enum print_line_t ret;
6730                 int save_len = iter->seq.seq.len;
6731
6732                 ret = print_trace_line(iter);
6733                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6734                         /* don't print partial lines */
6735                         iter->seq.seq.len = save_len;
6736                         break;
6737                 }
6738                 if (ret != TRACE_TYPE_NO_CONSUME)
6739                         trace_consume(iter);
6740
6741                 if (trace_seq_used(&iter->seq) >= cnt)
6742                         break;
6743
6744                 /*
6745                  * Setting the full flag means we reached the trace_seq buffer
6746                  * size and we should leave by partial output condition above.
6747                  * One of the trace_seq_* functions is not used properly.
6748                  */
6749                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6750                           iter->ent->type);
6751         }
6752         trace_access_unlock(iter->cpu_file);
6753         trace_event_read_unlock();
6754
6755         /* Now copy what we have to the user */
6756         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6757         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6758                 trace_seq_init(&iter->seq);
6759
6760         /*
6761          * If there was nothing to send to user, in spite of consuming trace
6762          * entries, go back to wait for more entries.
6763          */
6764         if (sret == -EBUSY)
6765                 goto waitagain;
6766
6767 out:
6768         mutex_unlock(&iter->mutex);
6769
6770         return sret;
6771 }
6772
6773 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6774                                      unsigned int idx)
6775 {
6776         __free_page(spd->pages[idx]);
6777 }
6778
6779 static size_t
6780 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6781 {
6782         size_t count;
6783         int save_len;
6784         int ret;
6785
6786         /* Seq buffer is page-sized, exactly what we need. */
6787         for (;;) {
6788                 save_len = iter->seq.seq.len;
6789                 ret = print_trace_line(iter);
6790
6791                 if (trace_seq_has_overflowed(&iter->seq)) {
6792                         iter->seq.seq.len = save_len;
6793                         break;
6794                 }
6795
6796                 /*
6797                  * This should not be hit, because it should only
6798                  * be set if the iter->seq overflowed. But check it
6799                  * anyway to be safe.
6800                  */
6801                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6802                         iter->seq.seq.len = save_len;
6803                         break;
6804                 }
6805
6806                 count = trace_seq_used(&iter->seq) - save_len;
6807                 if (rem < count) {
6808                         rem = 0;
6809                         iter->seq.seq.len = save_len;
6810                         break;
6811                 }
6812
6813                 if (ret != TRACE_TYPE_NO_CONSUME)
6814                         trace_consume(iter);
6815                 rem -= count;
6816                 if (!trace_find_next_entry_inc(iter))   {
6817                         rem = 0;
6818                         iter->ent = NULL;
6819                         break;
6820                 }
6821         }
6822
6823         return rem;
6824 }
6825
6826 static ssize_t tracing_splice_read_pipe(struct file *filp,
6827                                         loff_t *ppos,
6828                                         struct pipe_inode_info *pipe,
6829                                         size_t len,
6830                                         unsigned int flags)
6831 {
6832         struct page *pages_def[PIPE_DEF_BUFFERS];
6833         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6834         struct trace_iterator *iter = filp->private_data;
6835         struct splice_pipe_desc spd = {
6836                 .pages          = pages_def,
6837                 .partial        = partial_def,
6838                 .nr_pages       = 0, /* This gets updated below. */
6839                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6840                 .ops            = &default_pipe_buf_ops,
6841                 .spd_release    = tracing_spd_release_pipe,
6842         };
6843         ssize_t ret;
6844         size_t rem;
6845         unsigned int i;
6846
6847         if (splice_grow_spd(pipe, &spd))
6848                 return -ENOMEM;
6849
6850         mutex_lock(&iter->mutex);
6851
6852         if (iter->trace->splice_read) {
6853                 ret = iter->trace->splice_read(iter, filp,
6854                                                ppos, pipe, len, flags);
6855                 if (ret)
6856                         goto out_err;
6857         }
6858
6859         ret = tracing_wait_pipe(filp);
6860         if (ret <= 0)
6861                 goto out_err;
6862
6863         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6864                 ret = -EFAULT;
6865                 goto out_err;
6866         }
6867
6868         trace_event_read_lock();
6869         trace_access_lock(iter->cpu_file);
6870
6871         /* Fill as many pages as possible. */
6872         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6873                 spd.pages[i] = alloc_page(GFP_KERNEL);
6874                 if (!spd.pages[i])
6875                         break;
6876
6877                 rem = tracing_fill_pipe_page(rem, iter);
6878
6879                 /* Copy the data into the page, so we can start over. */
6880                 ret = trace_seq_to_buffer(&iter->seq,
6881                                           page_address(spd.pages[i]),
6882                                           trace_seq_used(&iter->seq));
6883                 if (ret < 0) {
6884                         __free_page(spd.pages[i]);
6885                         break;
6886                 }
6887                 spd.partial[i].offset = 0;
6888                 spd.partial[i].len = trace_seq_used(&iter->seq);
6889
6890                 trace_seq_init(&iter->seq);
6891         }
6892
6893         trace_access_unlock(iter->cpu_file);
6894         trace_event_read_unlock();
6895         mutex_unlock(&iter->mutex);
6896
6897         spd.nr_pages = i;
6898
6899         if (i)
6900                 ret = splice_to_pipe(pipe, &spd);
6901         else
6902                 ret = 0;
6903 out:
6904         splice_shrink_spd(&spd);
6905         return ret;
6906
6907 out_err:
6908         mutex_unlock(&iter->mutex);
6909         goto out;
6910 }
6911
6912 static ssize_t
6913 tracing_entries_read(struct file *filp, char __user *ubuf,
6914                      size_t cnt, loff_t *ppos)
6915 {
6916         struct inode *inode = file_inode(filp);
6917         struct trace_array *tr = inode->i_private;
6918         int cpu = tracing_get_cpu(inode);
6919         char buf[64];
6920         int r = 0;
6921         ssize_t ret;
6922
6923         mutex_lock(&trace_types_lock);
6924
6925         if (cpu == RING_BUFFER_ALL_CPUS) {
6926                 int cpu, buf_size_same;
6927                 unsigned long size;
6928
6929                 size = 0;
6930                 buf_size_same = 1;
6931                 /* check if all cpu sizes are same */
6932                 for_each_tracing_cpu(cpu) {
6933                         /* fill in the size from first enabled cpu */
6934                         if (size == 0)
6935                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6936                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6937                                 buf_size_same = 0;
6938                                 break;
6939                         }
6940                 }
6941
6942                 if (buf_size_same) {
6943                         if (!ring_buffer_expanded)
6944                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6945                                             size >> 10,
6946                                             trace_buf_size >> 10);
6947                         else
6948                                 r = sprintf(buf, "%lu\n", size >> 10);
6949                 } else
6950                         r = sprintf(buf, "X\n");
6951         } else
6952                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6953
6954         mutex_unlock(&trace_types_lock);
6955
6956         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6957         return ret;
6958 }
6959
6960 static ssize_t
6961 tracing_entries_write(struct file *filp, const char __user *ubuf,
6962                       size_t cnt, loff_t *ppos)
6963 {
6964         struct inode *inode = file_inode(filp);
6965         struct trace_array *tr = inode->i_private;
6966         unsigned long val;
6967         int ret;
6968
6969         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6970         if (ret)
6971                 return ret;
6972
6973         /* must have at least 1 entry */
6974         if (!val)
6975                 return -EINVAL;
6976
6977         /* value is in KB */
6978         val <<= 10;
6979         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6980         if (ret < 0)
6981                 return ret;
6982
6983         *ppos += cnt;
6984
6985         return cnt;
6986 }
6987
6988 static ssize_t
6989 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6990                                 size_t cnt, loff_t *ppos)
6991 {
6992         struct trace_array *tr = filp->private_data;
6993         char buf[64];
6994         int r, cpu;
6995         unsigned long size = 0, expanded_size = 0;
6996
6997         mutex_lock(&trace_types_lock);
6998         for_each_tracing_cpu(cpu) {
6999                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7000                 if (!ring_buffer_expanded)
7001                         expanded_size += trace_buf_size >> 10;
7002         }
7003         if (ring_buffer_expanded)
7004                 r = sprintf(buf, "%lu\n", size);
7005         else
7006                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7007         mutex_unlock(&trace_types_lock);
7008
7009         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7010 }
7011
7012 static ssize_t
7013 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7014                           size_t cnt, loff_t *ppos)
7015 {
7016         /*
7017          * There is no need to read what the user has written, this function
7018          * is just to make sure that there is no error when "echo" is used
7019          */
7020
7021         *ppos += cnt;
7022
7023         return cnt;
7024 }
7025
7026 static int
7027 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7028 {
7029         struct trace_array *tr = inode->i_private;
7030
7031         /* disable tracing ? */
7032         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7033                 tracer_tracing_off(tr);
7034         /* resize the ring buffer to 0 */
7035         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7036
7037         trace_array_put(tr);
7038
7039         return 0;
7040 }
7041
7042 static ssize_t
7043 tracing_mark_write(struct file *filp, const char __user *ubuf,
7044                                         size_t cnt, loff_t *fpos)
7045 {
7046         struct trace_array *tr = filp->private_data;
7047         struct ring_buffer_event *event;
7048         enum event_trigger_type tt = ETT_NONE;
7049         struct trace_buffer *buffer;
7050         struct print_entry *entry;
7051         ssize_t written;
7052         int size;
7053         int len;
7054
7055 /* Used in tracing_mark_raw_write() as well */
7056 #define FAULTED_STR "<faulted>"
7057 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7058
7059         if (tracing_disabled)
7060                 return -EINVAL;
7061
7062         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7063                 return -EINVAL;
7064
7065         if (cnt > TRACE_BUF_SIZE)
7066                 cnt = TRACE_BUF_SIZE;
7067
7068         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7069
7070         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7071
7072         /* If less than "<faulted>", then make sure we can still add that */
7073         if (cnt < FAULTED_SIZE)
7074                 size += FAULTED_SIZE - cnt;
7075
7076         buffer = tr->array_buffer.buffer;
7077         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7078                                             tracing_gen_ctx());
7079         if (unlikely(!event))
7080                 /* Ring buffer disabled, return as if not open for write */
7081                 return -EBADF;
7082
7083         entry = ring_buffer_event_data(event);
7084         entry->ip = _THIS_IP_;
7085
7086         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7087         if (len) {
7088                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7089                 cnt = FAULTED_SIZE;
7090                 written = -EFAULT;
7091         } else
7092                 written = cnt;
7093
7094         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7095                 /* do not add \n before testing triggers, but add \0 */
7096                 entry->buf[cnt] = '\0';
7097                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7098         }
7099
7100         if (entry->buf[cnt - 1] != '\n') {
7101                 entry->buf[cnt] = '\n';
7102                 entry->buf[cnt + 1] = '\0';
7103         } else
7104                 entry->buf[cnt] = '\0';
7105
7106         if (static_branch_unlikely(&trace_marker_exports_enabled))
7107                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7108         __buffer_unlock_commit(buffer, event);
7109
7110         if (tt)
7111                 event_triggers_post_call(tr->trace_marker_file, tt);
7112
7113         if (written > 0)
7114                 *fpos += written;
7115
7116         return written;
7117 }
7118
7119 /* Limit it for now to 3K (including tag) */
7120 #define RAW_DATA_MAX_SIZE (1024*3)
7121
7122 static ssize_t
7123 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7124                                         size_t cnt, loff_t *fpos)
7125 {
7126         struct trace_array *tr = filp->private_data;
7127         struct ring_buffer_event *event;
7128         struct trace_buffer *buffer;
7129         struct raw_data_entry *entry;
7130         ssize_t written;
7131         int size;
7132         int len;
7133
7134 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7135
7136         if (tracing_disabled)
7137                 return -EINVAL;
7138
7139         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7140                 return -EINVAL;
7141
7142         /* The marker must at least have a tag id */
7143         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7144                 return -EINVAL;
7145
7146         if (cnt > TRACE_BUF_SIZE)
7147                 cnt = TRACE_BUF_SIZE;
7148
7149         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7150
7151         size = sizeof(*entry) + cnt;
7152         if (cnt < FAULT_SIZE_ID)
7153                 size += FAULT_SIZE_ID - cnt;
7154
7155         buffer = tr->array_buffer.buffer;
7156         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7157                                             tracing_gen_ctx());
7158         if (!event)
7159                 /* Ring buffer disabled, return as if not open for write */
7160                 return -EBADF;
7161
7162         entry = ring_buffer_event_data(event);
7163
7164         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7165         if (len) {
7166                 entry->id = -1;
7167                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7168                 written = -EFAULT;
7169         } else
7170                 written = cnt;
7171
7172         __buffer_unlock_commit(buffer, event);
7173
7174         if (written > 0)
7175                 *fpos += written;
7176
7177         return written;
7178 }
7179
7180 static int tracing_clock_show(struct seq_file *m, void *v)
7181 {
7182         struct trace_array *tr = m->private;
7183         int i;
7184
7185         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7186                 seq_printf(m,
7187                         "%s%s%s%s", i ? " " : "",
7188                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7189                         i == tr->clock_id ? "]" : "");
7190         seq_putc(m, '\n');
7191
7192         return 0;
7193 }
7194
7195 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7196 {
7197         int i;
7198
7199         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7200                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7201                         break;
7202         }
7203         if (i == ARRAY_SIZE(trace_clocks))
7204                 return -EINVAL;
7205
7206         mutex_lock(&trace_types_lock);
7207
7208         tr->clock_id = i;
7209
7210         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7211
7212         /*
7213          * New clock may not be consistent with the previous clock.
7214          * Reset the buffer so that it doesn't have incomparable timestamps.
7215          */
7216         tracing_reset_online_cpus(&tr->array_buffer);
7217
7218 #ifdef CONFIG_TRACER_MAX_TRACE
7219         if (tr->max_buffer.buffer)
7220                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7221         tracing_reset_online_cpus(&tr->max_buffer);
7222 #endif
7223
7224         mutex_unlock(&trace_types_lock);
7225
7226         return 0;
7227 }
7228
7229 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7230                                    size_t cnt, loff_t *fpos)
7231 {
7232         struct seq_file *m = filp->private_data;
7233         struct trace_array *tr = m->private;
7234         char buf[64];
7235         const char *clockstr;
7236         int ret;
7237
7238         if (cnt >= sizeof(buf))
7239                 return -EINVAL;
7240
7241         if (copy_from_user(buf, ubuf, cnt))
7242                 return -EFAULT;
7243
7244         buf[cnt] = 0;
7245
7246         clockstr = strstrip(buf);
7247
7248         ret = tracing_set_clock(tr, clockstr);
7249         if (ret)
7250                 return ret;
7251
7252         *fpos += cnt;
7253
7254         return cnt;
7255 }
7256
7257 static int tracing_clock_open(struct inode *inode, struct file *file)
7258 {
7259         struct trace_array *tr = inode->i_private;
7260         int ret;
7261
7262         ret = tracing_check_open_get_tr(tr);
7263         if (ret)
7264                 return ret;
7265
7266         ret = single_open(file, tracing_clock_show, inode->i_private);
7267         if (ret < 0)
7268                 trace_array_put(tr);
7269
7270         return ret;
7271 }
7272
7273 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7274 {
7275         struct trace_array *tr = m->private;
7276
7277         mutex_lock(&trace_types_lock);
7278
7279         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7280                 seq_puts(m, "delta [absolute]\n");
7281         else
7282                 seq_puts(m, "[delta] absolute\n");
7283
7284         mutex_unlock(&trace_types_lock);
7285
7286         return 0;
7287 }
7288
7289 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7290 {
7291         struct trace_array *tr = inode->i_private;
7292         int ret;
7293
7294         ret = tracing_check_open_get_tr(tr);
7295         if (ret)
7296                 return ret;
7297
7298         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7299         if (ret < 0)
7300                 trace_array_put(tr);
7301
7302         return ret;
7303 }
7304
7305 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7306 {
7307         if (rbe == this_cpu_read(trace_buffered_event))
7308                 return ring_buffer_time_stamp(buffer);
7309
7310         return ring_buffer_event_time_stamp(buffer, rbe);
7311 }
7312
7313 /*
7314  * Set or disable using the per CPU trace_buffer_event when possible.
7315  */
7316 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7317 {
7318         int ret = 0;
7319
7320         mutex_lock(&trace_types_lock);
7321
7322         if (set && tr->no_filter_buffering_ref++)
7323                 goto out;
7324
7325         if (!set) {
7326                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7327                         ret = -EINVAL;
7328                         goto out;
7329                 }
7330
7331                 --tr->no_filter_buffering_ref;
7332         }
7333  out:
7334         mutex_unlock(&trace_types_lock);
7335
7336         return ret;
7337 }
7338
7339 struct ftrace_buffer_info {
7340         struct trace_iterator   iter;
7341         void                    *spare;
7342         unsigned int            spare_cpu;
7343         unsigned int            read;
7344 };
7345
7346 #ifdef CONFIG_TRACER_SNAPSHOT
7347 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7348 {
7349         struct trace_array *tr = inode->i_private;
7350         struct trace_iterator *iter;
7351         struct seq_file *m;
7352         int ret;
7353
7354         ret = tracing_check_open_get_tr(tr);
7355         if (ret)
7356                 return ret;
7357
7358         if (file->f_mode & FMODE_READ) {
7359                 iter = __tracing_open(inode, file, true);
7360                 if (IS_ERR(iter))
7361                         ret = PTR_ERR(iter);
7362         } else {
7363                 /* Writes still need the seq_file to hold the private data */
7364                 ret = -ENOMEM;
7365                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7366                 if (!m)
7367                         goto out;
7368                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7369                 if (!iter) {
7370                         kfree(m);
7371                         goto out;
7372                 }
7373                 ret = 0;
7374
7375                 iter->tr = tr;
7376                 iter->array_buffer = &tr->max_buffer;
7377                 iter->cpu_file = tracing_get_cpu(inode);
7378                 m->private = iter;
7379                 file->private_data = m;
7380         }
7381 out:
7382         if (ret < 0)
7383                 trace_array_put(tr);
7384
7385         return ret;
7386 }
7387
7388 static ssize_t
7389 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7390                        loff_t *ppos)
7391 {
7392         struct seq_file *m = filp->private_data;
7393         struct trace_iterator *iter = m->private;
7394         struct trace_array *tr = iter->tr;
7395         unsigned long val;
7396         int ret;
7397
7398         ret = tracing_update_buffers();
7399         if (ret < 0)
7400                 return ret;
7401
7402         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7403         if (ret)
7404                 return ret;
7405
7406         mutex_lock(&trace_types_lock);
7407
7408         if (tr->current_trace->use_max_tr) {
7409                 ret = -EBUSY;
7410                 goto out;
7411         }
7412
7413         arch_spin_lock(&tr->max_lock);
7414         if (tr->cond_snapshot)
7415                 ret = -EBUSY;
7416         arch_spin_unlock(&tr->max_lock);
7417         if (ret)
7418                 goto out;
7419
7420         switch (val) {
7421         case 0:
7422                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7423                         ret = -EINVAL;
7424                         break;
7425                 }
7426                 if (tr->allocated_snapshot)
7427                         free_snapshot(tr);
7428                 break;
7429         case 1:
7430 /* Only allow per-cpu swap if the ring buffer supports it */
7431 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7432                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7433                         ret = -EINVAL;
7434                         break;
7435                 }
7436 #endif
7437                 if (tr->allocated_snapshot)
7438                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7439                                         &tr->array_buffer, iter->cpu_file);
7440                 else
7441                         ret = tracing_alloc_snapshot_instance(tr);
7442                 if (ret < 0)
7443                         break;
7444                 local_irq_disable();
7445                 /* Now, we're going to swap */
7446                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7447                         update_max_tr(tr, current, smp_processor_id(), NULL);
7448                 else
7449                         update_max_tr_single(tr, current, iter->cpu_file);
7450                 local_irq_enable();
7451                 break;
7452         default:
7453                 if (tr->allocated_snapshot) {
7454                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7455                                 tracing_reset_online_cpus(&tr->max_buffer);
7456                         else
7457                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7458                 }
7459                 break;
7460         }
7461
7462         if (ret >= 0) {
7463                 *ppos += cnt;
7464                 ret = cnt;
7465         }
7466 out:
7467         mutex_unlock(&trace_types_lock);
7468         return ret;
7469 }
7470
7471 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7472 {
7473         struct seq_file *m = file->private_data;
7474         int ret;
7475
7476         ret = tracing_release(inode, file);
7477
7478         if (file->f_mode & FMODE_READ)
7479                 return ret;
7480
7481         /* If write only, the seq_file is just a stub */
7482         if (m)
7483                 kfree(m->private);
7484         kfree(m);
7485
7486         return 0;
7487 }
7488
7489 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7490 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7491                                     size_t count, loff_t *ppos);
7492 static int tracing_buffers_release(struct inode *inode, struct file *file);
7493 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7494                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7495
7496 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7497 {
7498         struct ftrace_buffer_info *info;
7499         int ret;
7500
7501         /* The following checks for tracefs lockdown */
7502         ret = tracing_buffers_open(inode, filp);
7503         if (ret < 0)
7504                 return ret;
7505
7506         info = filp->private_data;
7507
7508         if (info->iter.trace->use_max_tr) {
7509                 tracing_buffers_release(inode, filp);
7510                 return -EBUSY;
7511         }
7512
7513         info->iter.snapshot = true;
7514         info->iter.array_buffer = &info->iter.tr->max_buffer;
7515
7516         return ret;
7517 }
7518
7519 #endif /* CONFIG_TRACER_SNAPSHOT */
7520
7521
7522 static const struct file_operations tracing_thresh_fops = {
7523         .open           = tracing_open_generic,
7524         .read           = tracing_thresh_read,
7525         .write          = tracing_thresh_write,
7526         .llseek         = generic_file_llseek,
7527 };
7528
7529 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7530 static const struct file_operations tracing_max_lat_fops = {
7531         .open           = tracing_open_generic,
7532         .read           = tracing_max_lat_read,
7533         .write          = tracing_max_lat_write,
7534         .llseek         = generic_file_llseek,
7535 };
7536 #endif
7537
7538 static const struct file_operations set_tracer_fops = {
7539         .open           = tracing_open_generic,
7540         .read           = tracing_set_trace_read,
7541         .write          = tracing_set_trace_write,
7542         .llseek         = generic_file_llseek,
7543 };
7544
7545 static const struct file_operations tracing_pipe_fops = {
7546         .open           = tracing_open_pipe,
7547         .poll           = tracing_poll_pipe,
7548         .read           = tracing_read_pipe,
7549         .splice_read    = tracing_splice_read_pipe,
7550         .release        = tracing_release_pipe,
7551         .llseek         = no_llseek,
7552 };
7553
7554 static const struct file_operations tracing_entries_fops = {
7555         .open           = tracing_open_generic_tr,
7556         .read           = tracing_entries_read,
7557         .write          = tracing_entries_write,
7558         .llseek         = generic_file_llseek,
7559         .release        = tracing_release_generic_tr,
7560 };
7561
7562 static const struct file_operations tracing_total_entries_fops = {
7563         .open           = tracing_open_generic_tr,
7564         .read           = tracing_total_entries_read,
7565         .llseek         = generic_file_llseek,
7566         .release        = tracing_release_generic_tr,
7567 };
7568
7569 static const struct file_operations tracing_free_buffer_fops = {
7570         .open           = tracing_open_generic_tr,
7571         .write          = tracing_free_buffer_write,
7572         .release        = tracing_free_buffer_release,
7573 };
7574
7575 static const struct file_operations tracing_mark_fops = {
7576         .open           = tracing_open_generic_tr,
7577         .write          = tracing_mark_write,
7578         .llseek         = generic_file_llseek,
7579         .release        = tracing_release_generic_tr,
7580 };
7581
7582 static const struct file_operations tracing_mark_raw_fops = {
7583         .open           = tracing_open_generic_tr,
7584         .write          = tracing_mark_raw_write,
7585         .llseek         = generic_file_llseek,
7586         .release        = tracing_release_generic_tr,
7587 };
7588
7589 static const struct file_operations trace_clock_fops = {
7590         .open           = tracing_clock_open,
7591         .read           = seq_read,
7592         .llseek         = seq_lseek,
7593         .release        = tracing_single_release_tr,
7594         .write          = tracing_clock_write,
7595 };
7596
7597 static const struct file_operations trace_time_stamp_mode_fops = {
7598         .open           = tracing_time_stamp_mode_open,
7599         .read           = seq_read,
7600         .llseek         = seq_lseek,
7601         .release        = tracing_single_release_tr,
7602 };
7603
7604 #ifdef CONFIG_TRACER_SNAPSHOT
7605 static const struct file_operations snapshot_fops = {
7606         .open           = tracing_snapshot_open,
7607         .read           = seq_read,
7608         .write          = tracing_snapshot_write,
7609         .llseek         = tracing_lseek,
7610         .release        = tracing_snapshot_release,
7611 };
7612
7613 static const struct file_operations snapshot_raw_fops = {
7614         .open           = snapshot_raw_open,
7615         .read           = tracing_buffers_read,
7616         .release        = tracing_buffers_release,
7617         .splice_read    = tracing_buffers_splice_read,
7618         .llseek         = no_llseek,
7619 };
7620
7621 #endif /* CONFIG_TRACER_SNAPSHOT */
7622
7623 /*
7624  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7625  * @filp: The active open file structure
7626  * @ubuf: The userspace provided buffer to read value into
7627  * @cnt: The maximum number of bytes to read
7628  * @ppos: The current "file" position
7629  *
7630  * This function implements the write interface for a struct trace_min_max_param.
7631  * The filp->private_data must point to a trace_min_max_param structure that
7632  * defines where to write the value, the min and the max acceptable values,
7633  * and a lock to protect the write.
7634  */
7635 static ssize_t
7636 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7637 {
7638         struct trace_min_max_param *param = filp->private_data;
7639         u64 val;
7640         int err;
7641
7642         if (!param)
7643                 return -EFAULT;
7644
7645         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7646         if (err)
7647                 return err;
7648
7649         if (param->lock)
7650                 mutex_lock(param->lock);
7651
7652         if (param->min && val < *param->min)
7653                 err = -EINVAL;
7654
7655         if (param->max && val > *param->max)
7656                 err = -EINVAL;
7657
7658         if (!err)
7659                 *param->val = val;
7660
7661         if (param->lock)
7662                 mutex_unlock(param->lock);
7663
7664         if (err)
7665                 return err;
7666
7667         return cnt;
7668 }
7669
7670 /*
7671  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7672  * @filp: The active open file structure
7673  * @ubuf: The userspace provided buffer to read value into
7674  * @cnt: The maximum number of bytes to read
7675  * @ppos: The current "file" position
7676  *
7677  * This function implements the read interface for a struct trace_min_max_param.
7678  * The filp->private_data must point to a trace_min_max_param struct with valid
7679  * data.
7680  */
7681 static ssize_t
7682 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7683 {
7684         struct trace_min_max_param *param = filp->private_data;
7685         char buf[U64_STR_SIZE];
7686         int len;
7687         u64 val;
7688
7689         if (!param)
7690                 return -EFAULT;
7691
7692         val = *param->val;
7693
7694         if (cnt > sizeof(buf))
7695                 cnt = sizeof(buf);
7696
7697         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7698
7699         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7700 }
7701
7702 const struct file_operations trace_min_max_fops = {
7703         .open           = tracing_open_generic,
7704         .read           = trace_min_max_read,
7705         .write          = trace_min_max_write,
7706 };
7707
7708 #define TRACING_LOG_ERRS_MAX    8
7709 #define TRACING_LOG_LOC_MAX     128
7710
7711 #define CMD_PREFIX "  Command: "
7712
7713 struct err_info {
7714         const char      **errs; /* ptr to loc-specific array of err strings */
7715         u8              type;   /* index into errs -> specific err string */
7716         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7717         u64             ts;
7718 };
7719
7720 struct tracing_log_err {
7721         struct list_head        list;
7722         struct err_info         info;
7723         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7724         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7725 };
7726
7727 static DEFINE_MUTEX(tracing_err_log_lock);
7728
7729 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7730 {
7731         struct tracing_log_err *err;
7732
7733         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7734                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7735                 if (!err)
7736                         err = ERR_PTR(-ENOMEM);
7737                 tr->n_err_log_entries++;
7738
7739                 return err;
7740         }
7741
7742         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7743         list_del(&err->list);
7744
7745         return err;
7746 }
7747
7748 /**
7749  * err_pos - find the position of a string within a command for error careting
7750  * @cmd: The tracing command that caused the error
7751  * @str: The string to position the caret at within @cmd
7752  *
7753  * Finds the position of the first occurrence of @str within @cmd.  The
7754  * return value can be passed to tracing_log_err() for caret placement
7755  * within @cmd.
7756  *
7757  * Returns the index within @cmd of the first occurrence of @str or 0
7758  * if @str was not found.
7759  */
7760 unsigned int err_pos(char *cmd, const char *str)
7761 {
7762         char *found;
7763
7764         if (WARN_ON(!strlen(cmd)))
7765                 return 0;
7766
7767         found = strstr(cmd, str);
7768         if (found)
7769                 return found - cmd;
7770
7771         return 0;
7772 }
7773
7774 /**
7775  * tracing_log_err - write an error to the tracing error log
7776  * @tr: The associated trace array for the error (NULL for top level array)
7777  * @loc: A string describing where the error occurred
7778  * @cmd: The tracing command that caused the error
7779  * @errs: The array of loc-specific static error strings
7780  * @type: The index into errs[], which produces the specific static err string
7781  * @pos: The position the caret should be placed in the cmd
7782  *
7783  * Writes an error into tracing/error_log of the form:
7784  *
7785  * <loc>: error: <text>
7786  *   Command: <cmd>
7787  *              ^
7788  *
7789  * tracing/error_log is a small log file containing the last
7790  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7791  * unless there has been a tracing error, and the error log can be
7792  * cleared and have its memory freed by writing the empty string in
7793  * truncation mode to it i.e. echo > tracing/error_log.
7794  *
7795  * NOTE: the @errs array along with the @type param are used to
7796  * produce a static error string - this string is not copied and saved
7797  * when the error is logged - only a pointer to it is saved.  See
7798  * existing callers for examples of how static strings are typically
7799  * defined for use with tracing_log_err().
7800  */
7801 void tracing_log_err(struct trace_array *tr,
7802                      const char *loc, const char *cmd,
7803                      const char **errs, u8 type, u8 pos)
7804 {
7805         struct tracing_log_err *err;
7806
7807         if (!tr)
7808                 tr = &global_trace;
7809
7810         mutex_lock(&tracing_err_log_lock);
7811         err = get_tracing_log_err(tr);
7812         if (PTR_ERR(err) == -ENOMEM) {
7813                 mutex_unlock(&tracing_err_log_lock);
7814                 return;
7815         }
7816
7817         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7818         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7819
7820         err->info.errs = errs;
7821         err->info.type = type;
7822         err->info.pos = pos;
7823         err->info.ts = local_clock();
7824
7825         list_add_tail(&err->list, &tr->err_log);
7826         mutex_unlock(&tracing_err_log_lock);
7827 }
7828
7829 static void clear_tracing_err_log(struct trace_array *tr)
7830 {
7831         struct tracing_log_err *err, *next;
7832
7833         mutex_lock(&tracing_err_log_lock);
7834         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7835                 list_del(&err->list);
7836                 kfree(err);
7837         }
7838
7839         tr->n_err_log_entries = 0;
7840         mutex_unlock(&tracing_err_log_lock);
7841 }
7842
7843 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7844 {
7845         struct trace_array *tr = m->private;
7846
7847         mutex_lock(&tracing_err_log_lock);
7848
7849         return seq_list_start(&tr->err_log, *pos);
7850 }
7851
7852 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7853 {
7854         struct trace_array *tr = m->private;
7855
7856         return seq_list_next(v, &tr->err_log, pos);
7857 }
7858
7859 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7860 {
7861         mutex_unlock(&tracing_err_log_lock);
7862 }
7863
7864 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7865 {
7866         u8 i;
7867
7868         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7869                 seq_putc(m, ' ');
7870         for (i = 0; i < pos; i++)
7871                 seq_putc(m, ' ');
7872         seq_puts(m, "^\n");
7873 }
7874
7875 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7876 {
7877         struct tracing_log_err *err = v;
7878
7879         if (err) {
7880                 const char *err_text = err->info.errs[err->info.type];
7881                 u64 sec = err->info.ts;
7882                 u32 nsec;
7883
7884                 nsec = do_div(sec, NSEC_PER_SEC);
7885                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7886                            err->loc, err_text);
7887                 seq_printf(m, "%s", err->cmd);
7888                 tracing_err_log_show_pos(m, err->info.pos);
7889         }
7890
7891         return 0;
7892 }
7893
7894 static const struct seq_operations tracing_err_log_seq_ops = {
7895         .start  = tracing_err_log_seq_start,
7896         .next   = tracing_err_log_seq_next,
7897         .stop   = tracing_err_log_seq_stop,
7898         .show   = tracing_err_log_seq_show
7899 };
7900
7901 static int tracing_err_log_open(struct inode *inode, struct file *file)
7902 {
7903         struct trace_array *tr = inode->i_private;
7904         int ret = 0;
7905
7906         ret = tracing_check_open_get_tr(tr);
7907         if (ret)
7908                 return ret;
7909
7910         /* If this file was opened for write, then erase contents */
7911         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7912                 clear_tracing_err_log(tr);
7913
7914         if (file->f_mode & FMODE_READ) {
7915                 ret = seq_open(file, &tracing_err_log_seq_ops);
7916                 if (!ret) {
7917                         struct seq_file *m = file->private_data;
7918                         m->private = tr;
7919                 } else {
7920                         trace_array_put(tr);
7921                 }
7922         }
7923         return ret;
7924 }
7925
7926 static ssize_t tracing_err_log_write(struct file *file,
7927                                      const char __user *buffer,
7928                                      size_t count, loff_t *ppos)
7929 {
7930         return count;
7931 }
7932
7933 static int tracing_err_log_release(struct inode *inode, struct file *file)
7934 {
7935         struct trace_array *tr = inode->i_private;
7936
7937         trace_array_put(tr);
7938
7939         if (file->f_mode & FMODE_READ)
7940                 seq_release(inode, file);
7941
7942         return 0;
7943 }
7944
7945 static const struct file_operations tracing_err_log_fops = {
7946         .open           = tracing_err_log_open,
7947         .write          = tracing_err_log_write,
7948         .read           = seq_read,
7949         .llseek         = seq_lseek,
7950         .release        = tracing_err_log_release,
7951 };
7952
7953 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7954 {
7955         struct trace_array *tr = inode->i_private;
7956         struct ftrace_buffer_info *info;
7957         int ret;
7958
7959         ret = tracing_check_open_get_tr(tr);
7960         if (ret)
7961                 return ret;
7962
7963         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7964         if (!info) {
7965                 trace_array_put(tr);
7966                 return -ENOMEM;
7967         }
7968
7969         mutex_lock(&trace_types_lock);
7970
7971         info->iter.tr           = tr;
7972         info->iter.cpu_file     = tracing_get_cpu(inode);
7973         info->iter.trace        = tr->current_trace;
7974         info->iter.array_buffer = &tr->array_buffer;
7975         info->spare             = NULL;
7976         /* Force reading ring buffer for first read */
7977         info->read              = (unsigned int)-1;
7978
7979         filp->private_data = info;
7980
7981         tr->trace_ref++;
7982
7983         mutex_unlock(&trace_types_lock);
7984
7985         ret = nonseekable_open(inode, filp);
7986         if (ret < 0)
7987                 trace_array_put(tr);
7988
7989         return ret;
7990 }
7991
7992 static __poll_t
7993 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7994 {
7995         struct ftrace_buffer_info *info = filp->private_data;
7996         struct trace_iterator *iter = &info->iter;
7997
7998         return trace_poll(iter, filp, poll_table);
7999 }
8000
8001 static ssize_t
8002 tracing_buffers_read(struct file *filp, char __user *ubuf,
8003                      size_t count, loff_t *ppos)
8004 {
8005         struct ftrace_buffer_info *info = filp->private_data;
8006         struct trace_iterator *iter = &info->iter;
8007         ssize_t ret = 0;
8008         ssize_t size;
8009
8010         if (!count)
8011                 return 0;
8012
8013 #ifdef CONFIG_TRACER_MAX_TRACE
8014         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8015                 return -EBUSY;
8016 #endif
8017
8018         if (!info->spare) {
8019                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8020                                                           iter->cpu_file);
8021                 if (IS_ERR(info->spare)) {
8022                         ret = PTR_ERR(info->spare);
8023                         info->spare = NULL;
8024                 } else {
8025                         info->spare_cpu = iter->cpu_file;
8026                 }
8027         }
8028         if (!info->spare)
8029                 return ret;
8030
8031         /* Do we have previous read data to read? */
8032         if (info->read < PAGE_SIZE)
8033                 goto read;
8034
8035  again:
8036         trace_access_lock(iter->cpu_file);
8037         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8038                                     &info->spare,
8039                                     count,
8040                                     iter->cpu_file, 0);
8041         trace_access_unlock(iter->cpu_file);
8042
8043         if (ret < 0) {
8044                 if (trace_empty(iter)) {
8045                         if ((filp->f_flags & O_NONBLOCK))
8046                                 return -EAGAIN;
8047
8048                         ret = wait_on_pipe(iter, 0);
8049                         if (ret)
8050                                 return ret;
8051
8052                         goto again;
8053                 }
8054                 return 0;
8055         }
8056
8057         info->read = 0;
8058  read:
8059         size = PAGE_SIZE - info->read;
8060         if (size > count)
8061                 size = count;
8062
8063         ret = copy_to_user(ubuf, info->spare + info->read, size);
8064         if (ret == size)
8065                 return -EFAULT;
8066
8067         size -= ret;
8068
8069         *ppos += size;
8070         info->read += size;
8071
8072         return size;
8073 }
8074
8075 static int tracing_buffers_release(struct inode *inode, struct file *file)
8076 {
8077         struct ftrace_buffer_info *info = file->private_data;
8078         struct trace_iterator *iter = &info->iter;
8079
8080         mutex_lock(&trace_types_lock);
8081
8082         iter->tr->trace_ref--;
8083
8084         __trace_array_put(iter->tr);
8085
8086         if (info->spare)
8087                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8088                                            info->spare_cpu, info->spare);
8089         kvfree(info);
8090
8091         mutex_unlock(&trace_types_lock);
8092
8093         return 0;
8094 }
8095
8096 struct buffer_ref {
8097         struct trace_buffer     *buffer;
8098         void                    *page;
8099         int                     cpu;
8100         refcount_t              refcount;
8101 };
8102
8103 static void buffer_ref_release(struct buffer_ref *ref)
8104 {
8105         if (!refcount_dec_and_test(&ref->refcount))
8106                 return;
8107         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8108         kfree(ref);
8109 }
8110
8111 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8112                                     struct pipe_buffer *buf)
8113 {
8114         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8115
8116         buffer_ref_release(ref);
8117         buf->private = 0;
8118 }
8119
8120 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8121                                 struct pipe_buffer *buf)
8122 {
8123         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8124
8125         if (refcount_read(&ref->refcount) > INT_MAX/2)
8126                 return false;
8127
8128         refcount_inc(&ref->refcount);
8129         return true;
8130 }
8131
8132 /* Pipe buffer operations for a buffer. */
8133 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8134         .release                = buffer_pipe_buf_release,
8135         .get                    = buffer_pipe_buf_get,
8136 };
8137
8138 /*
8139  * Callback from splice_to_pipe(), if we need to release some pages
8140  * at the end of the spd in case we error'ed out in filling the pipe.
8141  */
8142 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8143 {
8144         struct buffer_ref *ref =
8145                 (struct buffer_ref *)spd->partial[i].private;
8146
8147         buffer_ref_release(ref);
8148         spd->partial[i].private = 0;
8149 }
8150
8151 static ssize_t
8152 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8153                             struct pipe_inode_info *pipe, size_t len,
8154                             unsigned int flags)
8155 {
8156         struct ftrace_buffer_info *info = file->private_data;
8157         struct trace_iterator *iter = &info->iter;
8158         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8159         struct page *pages_def[PIPE_DEF_BUFFERS];
8160         struct splice_pipe_desc spd = {
8161                 .pages          = pages_def,
8162                 .partial        = partial_def,
8163                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8164                 .ops            = &buffer_pipe_buf_ops,
8165                 .spd_release    = buffer_spd_release,
8166         };
8167         struct buffer_ref *ref;
8168         int entries, i;
8169         ssize_t ret = 0;
8170
8171 #ifdef CONFIG_TRACER_MAX_TRACE
8172         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8173                 return -EBUSY;
8174 #endif
8175
8176         if (*ppos & (PAGE_SIZE - 1))
8177                 return -EINVAL;
8178
8179         if (len & (PAGE_SIZE - 1)) {
8180                 if (len < PAGE_SIZE)
8181                         return -EINVAL;
8182                 len &= PAGE_MASK;
8183         }
8184
8185         if (splice_grow_spd(pipe, &spd))
8186                 return -ENOMEM;
8187
8188  again:
8189         trace_access_lock(iter->cpu_file);
8190         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8191
8192         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8193                 struct page *page;
8194                 int r;
8195
8196                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8197                 if (!ref) {
8198                         ret = -ENOMEM;
8199                         break;
8200                 }
8201
8202                 refcount_set(&ref->refcount, 1);
8203                 ref->buffer = iter->array_buffer->buffer;
8204                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8205                 if (IS_ERR(ref->page)) {
8206                         ret = PTR_ERR(ref->page);
8207                         ref->page = NULL;
8208                         kfree(ref);
8209                         break;
8210                 }
8211                 ref->cpu = iter->cpu_file;
8212
8213                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8214                                           len, iter->cpu_file, 1);
8215                 if (r < 0) {
8216                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8217                                                    ref->page);
8218                         kfree(ref);
8219                         break;
8220                 }
8221
8222                 page = virt_to_page(ref->page);
8223
8224                 spd.pages[i] = page;
8225                 spd.partial[i].len = PAGE_SIZE;
8226                 spd.partial[i].offset = 0;
8227                 spd.partial[i].private = (unsigned long)ref;
8228                 spd.nr_pages++;
8229                 *ppos += PAGE_SIZE;
8230
8231                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8232         }
8233
8234         trace_access_unlock(iter->cpu_file);
8235         spd.nr_pages = i;
8236
8237         /* did we read anything? */
8238         if (!spd.nr_pages) {
8239                 if (ret)
8240                         goto out;
8241
8242                 ret = -EAGAIN;
8243                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8244                         goto out;
8245
8246                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8247                 if (ret)
8248                         goto out;
8249
8250                 goto again;
8251         }
8252
8253         ret = splice_to_pipe(pipe, &spd);
8254 out:
8255         splice_shrink_spd(&spd);
8256
8257         return ret;
8258 }
8259
8260 static const struct file_operations tracing_buffers_fops = {
8261         .open           = tracing_buffers_open,
8262         .read           = tracing_buffers_read,
8263         .poll           = tracing_buffers_poll,
8264         .release        = tracing_buffers_release,
8265         .splice_read    = tracing_buffers_splice_read,
8266         .llseek         = no_llseek,
8267 };
8268
8269 static ssize_t
8270 tracing_stats_read(struct file *filp, char __user *ubuf,
8271                    size_t count, loff_t *ppos)
8272 {
8273         struct inode *inode = file_inode(filp);
8274         struct trace_array *tr = inode->i_private;
8275         struct array_buffer *trace_buf = &tr->array_buffer;
8276         int cpu = tracing_get_cpu(inode);
8277         struct trace_seq *s;
8278         unsigned long cnt;
8279         unsigned long long t;
8280         unsigned long usec_rem;
8281
8282         s = kmalloc(sizeof(*s), GFP_KERNEL);
8283         if (!s)
8284                 return -ENOMEM;
8285
8286         trace_seq_init(s);
8287
8288         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8289         trace_seq_printf(s, "entries: %ld\n", cnt);
8290
8291         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8292         trace_seq_printf(s, "overrun: %ld\n", cnt);
8293
8294         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8295         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8296
8297         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8298         trace_seq_printf(s, "bytes: %ld\n", cnt);
8299
8300         if (trace_clocks[tr->clock_id].in_ns) {
8301                 /* local or global for trace_clock */
8302                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8303                 usec_rem = do_div(t, USEC_PER_SEC);
8304                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8305                                                                 t, usec_rem);
8306
8307                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8308                 usec_rem = do_div(t, USEC_PER_SEC);
8309                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8310         } else {
8311                 /* counter or tsc mode for trace_clock */
8312                 trace_seq_printf(s, "oldest event ts: %llu\n",
8313                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8314
8315                 trace_seq_printf(s, "now ts: %llu\n",
8316                                 ring_buffer_time_stamp(trace_buf->buffer));
8317         }
8318
8319         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8320         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8321
8322         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8323         trace_seq_printf(s, "read events: %ld\n", cnt);
8324
8325         count = simple_read_from_buffer(ubuf, count, ppos,
8326                                         s->buffer, trace_seq_used(s));
8327
8328         kfree(s);
8329
8330         return count;
8331 }
8332
8333 static const struct file_operations tracing_stats_fops = {
8334         .open           = tracing_open_generic_tr,
8335         .read           = tracing_stats_read,
8336         .llseek         = generic_file_llseek,
8337         .release        = tracing_release_generic_tr,
8338 };
8339
8340 #ifdef CONFIG_DYNAMIC_FTRACE
8341
8342 static ssize_t
8343 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8344                   size_t cnt, loff_t *ppos)
8345 {
8346         ssize_t ret;
8347         char *buf;
8348         int r;
8349
8350         /* 256 should be plenty to hold the amount needed */
8351         buf = kmalloc(256, GFP_KERNEL);
8352         if (!buf)
8353                 return -ENOMEM;
8354
8355         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8356                       ftrace_update_tot_cnt,
8357                       ftrace_number_of_pages,
8358                       ftrace_number_of_groups);
8359
8360         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8361         kfree(buf);
8362         return ret;
8363 }
8364
8365 static const struct file_operations tracing_dyn_info_fops = {
8366         .open           = tracing_open_generic,
8367         .read           = tracing_read_dyn_info,
8368         .llseek         = generic_file_llseek,
8369 };
8370 #endif /* CONFIG_DYNAMIC_FTRACE */
8371
8372 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8373 static void
8374 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8375                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8376                 void *data)
8377 {
8378         tracing_snapshot_instance(tr);
8379 }
8380
8381 static void
8382 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8383                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8384                       void *data)
8385 {
8386         struct ftrace_func_mapper *mapper = data;
8387         long *count = NULL;
8388
8389         if (mapper)
8390                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8391
8392         if (count) {
8393
8394                 if (*count <= 0)
8395                         return;
8396
8397                 (*count)--;
8398         }
8399
8400         tracing_snapshot_instance(tr);
8401 }
8402
8403 static int
8404 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8405                       struct ftrace_probe_ops *ops, void *data)
8406 {
8407         struct ftrace_func_mapper *mapper = data;
8408         long *count = NULL;
8409
8410         seq_printf(m, "%ps:", (void *)ip);
8411
8412         seq_puts(m, "snapshot");
8413
8414         if (mapper)
8415                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8416
8417         if (count)
8418                 seq_printf(m, ":count=%ld\n", *count);
8419         else
8420                 seq_puts(m, ":unlimited\n");
8421
8422         return 0;
8423 }
8424
8425 static int
8426 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8427                      unsigned long ip, void *init_data, void **data)
8428 {
8429         struct ftrace_func_mapper *mapper = *data;
8430
8431         if (!mapper) {
8432                 mapper = allocate_ftrace_func_mapper();
8433                 if (!mapper)
8434                         return -ENOMEM;
8435                 *data = mapper;
8436         }
8437
8438         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8439 }
8440
8441 static void
8442 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8443                      unsigned long ip, void *data)
8444 {
8445         struct ftrace_func_mapper *mapper = data;
8446
8447         if (!ip) {
8448                 if (!mapper)
8449                         return;
8450                 free_ftrace_func_mapper(mapper, NULL);
8451                 return;
8452         }
8453
8454         ftrace_func_mapper_remove_ip(mapper, ip);
8455 }
8456
8457 static struct ftrace_probe_ops snapshot_probe_ops = {
8458         .func                   = ftrace_snapshot,
8459         .print                  = ftrace_snapshot_print,
8460 };
8461
8462 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8463         .func                   = ftrace_count_snapshot,
8464         .print                  = ftrace_snapshot_print,
8465         .init                   = ftrace_snapshot_init,
8466         .free                   = ftrace_snapshot_free,
8467 };
8468
8469 static int
8470 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8471                                char *glob, char *cmd, char *param, int enable)
8472 {
8473         struct ftrace_probe_ops *ops;
8474         void *count = (void *)-1;
8475         char *number;
8476         int ret;
8477
8478         if (!tr)
8479                 return -ENODEV;
8480
8481         /* hash funcs only work with set_ftrace_filter */
8482         if (!enable)
8483                 return -EINVAL;
8484
8485         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8486
8487         if (glob[0] == '!')
8488                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8489
8490         if (!param)
8491                 goto out_reg;
8492
8493         number = strsep(&param, ":");
8494
8495         if (!strlen(number))
8496                 goto out_reg;
8497
8498         /*
8499          * We use the callback data field (which is a pointer)
8500          * as our counter.
8501          */
8502         ret = kstrtoul(number, 0, (unsigned long *)&count);
8503         if (ret)
8504                 return ret;
8505
8506  out_reg:
8507         ret = tracing_alloc_snapshot_instance(tr);
8508         if (ret < 0)
8509                 goto out;
8510
8511         ret = register_ftrace_function_probe(glob, tr, ops, count);
8512
8513  out:
8514         return ret < 0 ? ret : 0;
8515 }
8516
8517 static struct ftrace_func_command ftrace_snapshot_cmd = {
8518         .name                   = "snapshot",
8519         .func                   = ftrace_trace_snapshot_callback,
8520 };
8521
8522 static __init int register_snapshot_cmd(void)
8523 {
8524         return register_ftrace_command(&ftrace_snapshot_cmd);
8525 }
8526 #else
8527 static inline __init int register_snapshot_cmd(void) { return 0; }
8528 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8529
8530 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8531 {
8532         if (WARN_ON(!tr->dir))
8533                 return ERR_PTR(-ENODEV);
8534
8535         /* Top directory uses NULL as the parent */
8536         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8537                 return NULL;
8538
8539         /* All sub buffers have a descriptor */
8540         return tr->dir;
8541 }
8542
8543 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8544 {
8545         struct dentry *d_tracer;
8546
8547         if (tr->percpu_dir)
8548                 return tr->percpu_dir;
8549
8550         d_tracer = tracing_get_dentry(tr);
8551         if (IS_ERR(d_tracer))
8552                 return NULL;
8553
8554         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8555
8556         MEM_FAIL(!tr->percpu_dir,
8557                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8558
8559         return tr->percpu_dir;
8560 }
8561
8562 static struct dentry *
8563 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8564                       void *data, long cpu, const struct file_operations *fops)
8565 {
8566         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8567
8568         if (ret) /* See tracing_get_cpu() */
8569                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8570         return ret;
8571 }
8572
8573 static void
8574 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8575 {
8576         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8577         struct dentry *d_cpu;
8578         char cpu_dir[30]; /* 30 characters should be more than enough */
8579
8580         if (!d_percpu)
8581                 return;
8582
8583         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8584         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8585         if (!d_cpu) {
8586                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8587                 return;
8588         }
8589
8590         /* per cpu trace_pipe */
8591         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8592                                 tr, cpu, &tracing_pipe_fops);
8593
8594         /* per cpu trace */
8595         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8596                                 tr, cpu, &tracing_fops);
8597
8598         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8599                                 tr, cpu, &tracing_buffers_fops);
8600
8601         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8602                                 tr, cpu, &tracing_stats_fops);
8603
8604         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8605                                 tr, cpu, &tracing_entries_fops);
8606
8607 #ifdef CONFIG_TRACER_SNAPSHOT
8608         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8609                                 tr, cpu, &snapshot_fops);
8610
8611         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8612                                 tr, cpu, &snapshot_raw_fops);
8613 #endif
8614 }
8615
8616 #ifdef CONFIG_FTRACE_SELFTEST
8617 /* Let selftest have access to static functions in this file */
8618 #include "trace_selftest.c"
8619 #endif
8620
8621 static ssize_t
8622 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8623                         loff_t *ppos)
8624 {
8625         struct trace_option_dentry *topt = filp->private_data;
8626         char *buf;
8627
8628         if (topt->flags->val & topt->opt->bit)
8629                 buf = "1\n";
8630         else
8631                 buf = "0\n";
8632
8633         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8634 }
8635
8636 static ssize_t
8637 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8638                          loff_t *ppos)
8639 {
8640         struct trace_option_dentry *topt = filp->private_data;
8641         unsigned long val;
8642         int ret;
8643
8644         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8645         if (ret)
8646                 return ret;
8647
8648         if (val != 0 && val != 1)
8649                 return -EINVAL;
8650
8651         if (!!(topt->flags->val & topt->opt->bit) != val) {
8652                 mutex_lock(&trace_types_lock);
8653                 ret = __set_tracer_option(topt->tr, topt->flags,
8654                                           topt->opt, !val);
8655                 mutex_unlock(&trace_types_lock);
8656                 if (ret)
8657                         return ret;
8658         }
8659
8660         *ppos += cnt;
8661
8662         return cnt;
8663 }
8664
8665
8666 static const struct file_operations trace_options_fops = {
8667         .open = tracing_open_generic,
8668         .read = trace_options_read,
8669         .write = trace_options_write,
8670         .llseek = generic_file_llseek,
8671 };
8672
8673 /*
8674  * In order to pass in both the trace_array descriptor as well as the index
8675  * to the flag that the trace option file represents, the trace_array
8676  * has a character array of trace_flags_index[], which holds the index
8677  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8678  * The address of this character array is passed to the flag option file
8679  * read/write callbacks.
8680  *
8681  * In order to extract both the index and the trace_array descriptor,
8682  * get_tr_index() uses the following algorithm.
8683  *
8684  *   idx = *ptr;
8685  *
8686  * As the pointer itself contains the address of the index (remember
8687  * index[1] == 1).
8688  *
8689  * Then to get the trace_array descriptor, by subtracting that index
8690  * from the ptr, we get to the start of the index itself.
8691  *
8692  *   ptr - idx == &index[0]
8693  *
8694  * Then a simple container_of() from that pointer gets us to the
8695  * trace_array descriptor.
8696  */
8697 static void get_tr_index(void *data, struct trace_array **ptr,
8698                          unsigned int *pindex)
8699 {
8700         *pindex = *(unsigned char *)data;
8701
8702         *ptr = container_of(data - *pindex, struct trace_array,
8703                             trace_flags_index);
8704 }
8705
8706 static ssize_t
8707 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8708                         loff_t *ppos)
8709 {
8710         void *tr_index = filp->private_data;
8711         struct trace_array *tr;
8712         unsigned int index;
8713         char *buf;
8714
8715         get_tr_index(tr_index, &tr, &index);
8716
8717         if (tr->trace_flags & (1 << index))
8718                 buf = "1\n";
8719         else
8720                 buf = "0\n";
8721
8722         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8723 }
8724
8725 static ssize_t
8726 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8727                          loff_t *ppos)
8728 {
8729         void *tr_index = filp->private_data;
8730         struct trace_array *tr;
8731         unsigned int index;
8732         unsigned long val;
8733         int ret;
8734
8735         get_tr_index(tr_index, &tr, &index);
8736
8737         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8738         if (ret)
8739                 return ret;
8740
8741         if (val != 0 && val != 1)
8742                 return -EINVAL;
8743
8744         mutex_lock(&event_mutex);
8745         mutex_lock(&trace_types_lock);
8746         ret = set_tracer_flag(tr, 1 << index, val);
8747         mutex_unlock(&trace_types_lock);
8748         mutex_unlock(&event_mutex);
8749
8750         if (ret < 0)
8751                 return ret;
8752
8753         *ppos += cnt;
8754
8755         return cnt;
8756 }
8757
8758 static const struct file_operations trace_options_core_fops = {
8759         .open = tracing_open_generic,
8760         .read = trace_options_core_read,
8761         .write = trace_options_core_write,
8762         .llseek = generic_file_llseek,
8763 };
8764
8765 struct dentry *trace_create_file(const char *name,
8766                                  umode_t mode,
8767                                  struct dentry *parent,
8768                                  void *data,
8769                                  const struct file_operations *fops)
8770 {
8771         struct dentry *ret;
8772
8773         ret = tracefs_create_file(name, mode, parent, data, fops);
8774         if (!ret)
8775                 pr_warn("Could not create tracefs '%s' entry\n", name);
8776
8777         return ret;
8778 }
8779
8780
8781 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8782 {
8783         struct dentry *d_tracer;
8784
8785         if (tr->options)
8786                 return tr->options;
8787
8788         d_tracer = tracing_get_dentry(tr);
8789         if (IS_ERR(d_tracer))
8790                 return NULL;
8791
8792         tr->options = tracefs_create_dir("options", d_tracer);
8793         if (!tr->options) {
8794                 pr_warn("Could not create tracefs directory 'options'\n");
8795                 return NULL;
8796         }
8797
8798         return tr->options;
8799 }
8800
8801 static void
8802 create_trace_option_file(struct trace_array *tr,
8803                          struct trace_option_dentry *topt,
8804                          struct tracer_flags *flags,
8805                          struct tracer_opt *opt)
8806 {
8807         struct dentry *t_options;
8808
8809         t_options = trace_options_init_dentry(tr);
8810         if (!t_options)
8811                 return;
8812
8813         topt->flags = flags;
8814         topt->opt = opt;
8815         topt->tr = tr;
8816
8817         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8818                                         t_options, topt, &trace_options_fops);
8819
8820 }
8821
8822 static void
8823 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8824 {
8825         struct trace_option_dentry *topts;
8826         struct trace_options *tr_topts;
8827         struct tracer_flags *flags;
8828         struct tracer_opt *opts;
8829         int cnt;
8830         int i;
8831
8832         if (!tracer)
8833                 return;
8834
8835         flags = tracer->flags;
8836
8837         if (!flags || !flags->opts)
8838                 return;
8839
8840         /*
8841          * If this is an instance, only create flags for tracers
8842          * the instance may have.
8843          */
8844         if (!trace_ok_for_array(tracer, tr))
8845                 return;
8846
8847         for (i = 0; i < tr->nr_topts; i++) {
8848                 /* Make sure there's no duplicate flags. */
8849                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8850                         return;
8851         }
8852
8853         opts = flags->opts;
8854
8855         for (cnt = 0; opts[cnt].name; cnt++)
8856                 ;
8857
8858         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8859         if (!topts)
8860                 return;
8861
8862         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8863                             GFP_KERNEL);
8864         if (!tr_topts) {
8865                 kfree(topts);
8866                 return;
8867         }
8868
8869         tr->topts = tr_topts;
8870         tr->topts[tr->nr_topts].tracer = tracer;
8871         tr->topts[tr->nr_topts].topts = topts;
8872         tr->nr_topts++;
8873
8874         for (cnt = 0; opts[cnt].name; cnt++) {
8875                 create_trace_option_file(tr, &topts[cnt], flags,
8876                                          &opts[cnt]);
8877                 MEM_FAIL(topts[cnt].entry == NULL,
8878                           "Failed to create trace option: %s",
8879                           opts[cnt].name);
8880         }
8881 }
8882
8883 static struct dentry *
8884 create_trace_option_core_file(struct trace_array *tr,
8885                               const char *option, long index)
8886 {
8887         struct dentry *t_options;
8888
8889         t_options = trace_options_init_dentry(tr);
8890         if (!t_options)
8891                 return NULL;
8892
8893         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8894                                  (void *)&tr->trace_flags_index[index],
8895                                  &trace_options_core_fops);
8896 }
8897
8898 static void create_trace_options_dir(struct trace_array *tr)
8899 {
8900         struct dentry *t_options;
8901         bool top_level = tr == &global_trace;
8902         int i;
8903
8904         t_options = trace_options_init_dentry(tr);
8905         if (!t_options)
8906                 return;
8907
8908         for (i = 0; trace_options[i]; i++) {
8909                 if (top_level ||
8910                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8911                         create_trace_option_core_file(tr, trace_options[i], i);
8912         }
8913 }
8914
8915 static ssize_t
8916 rb_simple_read(struct file *filp, char __user *ubuf,
8917                size_t cnt, loff_t *ppos)
8918 {
8919         struct trace_array *tr = filp->private_data;
8920         char buf[64];
8921         int r;
8922
8923         r = tracer_tracing_is_on(tr);
8924         r = sprintf(buf, "%d\n", r);
8925
8926         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8927 }
8928
8929 static ssize_t
8930 rb_simple_write(struct file *filp, const char __user *ubuf,
8931                 size_t cnt, loff_t *ppos)
8932 {
8933         struct trace_array *tr = filp->private_data;
8934         struct trace_buffer *buffer = tr->array_buffer.buffer;
8935         unsigned long val;
8936         int ret;
8937
8938         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8939         if (ret)
8940                 return ret;
8941
8942         if (buffer) {
8943                 mutex_lock(&trace_types_lock);
8944                 if (!!val == tracer_tracing_is_on(tr)) {
8945                         val = 0; /* do nothing */
8946                 } else if (val) {
8947                         tracer_tracing_on(tr);
8948                         if (tr->current_trace->start)
8949                                 tr->current_trace->start(tr);
8950                 } else {
8951                         tracer_tracing_off(tr);
8952                         if (tr->current_trace->stop)
8953                                 tr->current_trace->stop(tr);
8954                 }
8955                 mutex_unlock(&trace_types_lock);
8956         }
8957
8958         (*ppos)++;
8959
8960         return cnt;
8961 }
8962
8963 static const struct file_operations rb_simple_fops = {
8964         .open           = tracing_open_generic_tr,
8965         .read           = rb_simple_read,
8966         .write          = rb_simple_write,
8967         .release        = tracing_release_generic_tr,
8968         .llseek         = default_llseek,
8969 };
8970
8971 static ssize_t
8972 buffer_percent_read(struct file *filp, char __user *ubuf,
8973                     size_t cnt, loff_t *ppos)
8974 {
8975         struct trace_array *tr = filp->private_data;
8976         char buf[64];
8977         int r;
8978
8979         r = tr->buffer_percent;
8980         r = sprintf(buf, "%d\n", r);
8981
8982         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8983 }
8984
8985 static ssize_t
8986 buffer_percent_write(struct file *filp, const char __user *ubuf,
8987                      size_t cnt, loff_t *ppos)
8988 {
8989         struct trace_array *tr = filp->private_data;
8990         unsigned long val;
8991         int ret;
8992
8993         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8994         if (ret)
8995                 return ret;
8996
8997         if (val > 100)
8998                 return -EINVAL;
8999
9000         if (!val)
9001                 val = 1;
9002
9003         tr->buffer_percent = val;
9004
9005         (*ppos)++;
9006
9007         return cnt;
9008 }
9009
9010 static const struct file_operations buffer_percent_fops = {
9011         .open           = tracing_open_generic_tr,
9012         .read           = buffer_percent_read,
9013         .write          = buffer_percent_write,
9014         .release        = tracing_release_generic_tr,
9015         .llseek         = default_llseek,
9016 };
9017
9018 static struct dentry *trace_instance_dir;
9019
9020 static void
9021 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9022
9023 static int
9024 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9025 {
9026         enum ring_buffer_flags rb_flags;
9027
9028         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9029
9030         buf->tr = tr;
9031
9032         buf->buffer = ring_buffer_alloc(size, rb_flags);
9033         if (!buf->buffer)
9034                 return -ENOMEM;
9035
9036         buf->data = alloc_percpu(struct trace_array_cpu);
9037         if (!buf->data) {
9038                 ring_buffer_free(buf->buffer);
9039                 buf->buffer = NULL;
9040                 return -ENOMEM;
9041         }
9042
9043         /* Allocate the first page for all buffers */
9044         set_buffer_entries(&tr->array_buffer,
9045                            ring_buffer_size(tr->array_buffer.buffer, 0));
9046
9047         return 0;
9048 }
9049
9050 static int allocate_trace_buffers(struct trace_array *tr, int size)
9051 {
9052         int ret;
9053
9054         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9055         if (ret)
9056                 return ret;
9057
9058 #ifdef CONFIG_TRACER_MAX_TRACE
9059         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9060                                     allocate_snapshot ? size : 1);
9061         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9062                 ring_buffer_free(tr->array_buffer.buffer);
9063                 tr->array_buffer.buffer = NULL;
9064                 free_percpu(tr->array_buffer.data);
9065                 tr->array_buffer.data = NULL;
9066                 return -ENOMEM;
9067         }
9068         tr->allocated_snapshot = allocate_snapshot;
9069
9070         /*
9071          * Only the top level trace array gets its snapshot allocated
9072          * from the kernel command line.
9073          */
9074         allocate_snapshot = false;
9075 #endif
9076
9077         return 0;
9078 }
9079
9080 static void free_trace_buffer(struct array_buffer *buf)
9081 {
9082         if (buf->buffer) {
9083                 ring_buffer_free(buf->buffer);
9084                 buf->buffer = NULL;
9085                 free_percpu(buf->data);
9086                 buf->data = NULL;
9087         }
9088 }
9089
9090 static void free_trace_buffers(struct trace_array *tr)
9091 {
9092         if (!tr)
9093                 return;
9094
9095         free_trace_buffer(&tr->array_buffer);
9096
9097 #ifdef CONFIG_TRACER_MAX_TRACE
9098         free_trace_buffer(&tr->max_buffer);
9099 #endif
9100 }
9101
9102 static void init_trace_flags_index(struct trace_array *tr)
9103 {
9104         int i;
9105
9106         /* Used by the trace options files */
9107         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9108                 tr->trace_flags_index[i] = i;
9109 }
9110
9111 static void __update_tracer_options(struct trace_array *tr)
9112 {
9113         struct tracer *t;
9114
9115         for (t = trace_types; t; t = t->next)
9116                 add_tracer_options(tr, t);
9117 }
9118
9119 static void update_tracer_options(struct trace_array *tr)
9120 {
9121         mutex_lock(&trace_types_lock);
9122         __update_tracer_options(tr);
9123         mutex_unlock(&trace_types_lock);
9124 }
9125
9126 /* Must have trace_types_lock held */
9127 struct trace_array *trace_array_find(const char *instance)
9128 {
9129         struct trace_array *tr, *found = NULL;
9130
9131         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9132                 if (tr->name && strcmp(tr->name, instance) == 0) {
9133                         found = tr;
9134                         break;
9135                 }
9136         }
9137
9138         return found;
9139 }
9140
9141 struct trace_array *trace_array_find_get(const char *instance)
9142 {
9143         struct trace_array *tr;
9144
9145         mutex_lock(&trace_types_lock);
9146         tr = trace_array_find(instance);
9147         if (tr)
9148                 tr->ref++;
9149         mutex_unlock(&trace_types_lock);
9150
9151         return tr;
9152 }
9153
9154 static int trace_array_create_dir(struct trace_array *tr)
9155 {
9156         int ret;
9157
9158         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9159         if (!tr->dir)
9160                 return -EINVAL;
9161
9162         ret = event_trace_add_tracer(tr->dir, tr);
9163         if (ret) {
9164                 tracefs_remove(tr->dir);
9165                 return ret;
9166         }
9167
9168         init_tracer_tracefs(tr, tr->dir);
9169         __update_tracer_options(tr);
9170
9171         return ret;
9172 }
9173
9174 static struct trace_array *trace_array_create(const char *name)
9175 {
9176         struct trace_array *tr;
9177         int ret;
9178
9179         ret = -ENOMEM;
9180         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9181         if (!tr)
9182                 return ERR_PTR(ret);
9183
9184         tr->name = kstrdup(name, GFP_KERNEL);
9185         if (!tr->name)
9186                 goto out_free_tr;
9187
9188         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9189                 goto out_free_tr;
9190
9191         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9192
9193         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9194
9195         raw_spin_lock_init(&tr->start_lock);
9196
9197         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9198
9199         tr->current_trace = &nop_trace;
9200
9201         INIT_LIST_HEAD(&tr->systems);
9202         INIT_LIST_HEAD(&tr->events);
9203         INIT_LIST_HEAD(&tr->hist_vars);
9204         INIT_LIST_HEAD(&tr->err_log);
9205
9206         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9207                 goto out_free_tr;
9208
9209         if (ftrace_allocate_ftrace_ops(tr) < 0)
9210                 goto out_free_tr;
9211
9212         ftrace_init_trace_array(tr);
9213
9214         init_trace_flags_index(tr);
9215
9216         if (trace_instance_dir) {
9217                 ret = trace_array_create_dir(tr);
9218                 if (ret)
9219                         goto out_free_tr;
9220         } else
9221                 __trace_early_add_events(tr);
9222
9223         list_add(&tr->list, &ftrace_trace_arrays);
9224
9225         tr->ref++;
9226
9227         return tr;
9228
9229  out_free_tr:
9230         ftrace_free_ftrace_ops(tr);
9231         free_trace_buffers(tr);
9232         free_cpumask_var(tr->tracing_cpumask);
9233         kfree(tr->name);
9234         kfree(tr);
9235
9236         return ERR_PTR(ret);
9237 }
9238
9239 static int instance_mkdir(const char *name)
9240 {
9241         struct trace_array *tr;
9242         int ret;
9243
9244         mutex_lock(&event_mutex);
9245         mutex_lock(&trace_types_lock);
9246
9247         ret = -EEXIST;
9248         if (trace_array_find(name))
9249                 goto out_unlock;
9250
9251         tr = trace_array_create(name);
9252
9253         ret = PTR_ERR_OR_ZERO(tr);
9254
9255 out_unlock:
9256         mutex_unlock(&trace_types_lock);
9257         mutex_unlock(&event_mutex);
9258         return ret;
9259 }
9260
9261 /**
9262  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9263  * @name: The name of the trace array to be looked up/created.
9264  *
9265  * Returns pointer to trace array with given name.
9266  * NULL, if it cannot be created.
9267  *
9268  * NOTE: This function increments the reference counter associated with the
9269  * trace array returned. This makes sure it cannot be freed while in use.
9270  * Use trace_array_put() once the trace array is no longer needed.
9271  * If the trace_array is to be freed, trace_array_destroy() needs to
9272  * be called after the trace_array_put(), or simply let user space delete
9273  * it from the tracefs instances directory. But until the
9274  * trace_array_put() is called, user space can not delete it.
9275  *
9276  */
9277 struct trace_array *trace_array_get_by_name(const char *name)
9278 {
9279         struct trace_array *tr;
9280
9281         mutex_lock(&event_mutex);
9282         mutex_lock(&trace_types_lock);
9283
9284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9285                 if (tr->name && strcmp(tr->name, name) == 0)
9286                         goto out_unlock;
9287         }
9288
9289         tr = trace_array_create(name);
9290
9291         if (IS_ERR(tr))
9292                 tr = NULL;
9293 out_unlock:
9294         if (tr)
9295                 tr->ref++;
9296
9297         mutex_unlock(&trace_types_lock);
9298         mutex_unlock(&event_mutex);
9299         return tr;
9300 }
9301 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9302
9303 static int __remove_instance(struct trace_array *tr)
9304 {
9305         int i;
9306
9307         /* Reference counter for a newly created trace array = 1. */
9308         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9309                 return -EBUSY;
9310
9311         list_del(&tr->list);
9312
9313         /* Disable all the flags that were enabled coming in */
9314         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9315                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9316                         set_tracer_flag(tr, 1 << i, 0);
9317         }
9318
9319         tracing_set_nop(tr);
9320         clear_ftrace_function_probes(tr);
9321         event_trace_del_tracer(tr);
9322         ftrace_clear_pids(tr);
9323         ftrace_destroy_function_files(tr);
9324         tracefs_remove(tr->dir);
9325         free_percpu(tr->last_func_repeats);
9326         free_trace_buffers(tr);
9327
9328         for (i = 0; i < tr->nr_topts; i++) {
9329                 kfree(tr->topts[i].topts);
9330         }
9331         kfree(tr->topts);
9332
9333         free_cpumask_var(tr->tracing_cpumask);
9334         kfree(tr->name);
9335         kfree(tr);
9336
9337         return 0;
9338 }
9339
9340 int trace_array_destroy(struct trace_array *this_tr)
9341 {
9342         struct trace_array *tr;
9343         int ret;
9344
9345         if (!this_tr)
9346                 return -EINVAL;
9347
9348         mutex_lock(&event_mutex);
9349         mutex_lock(&trace_types_lock);
9350
9351         ret = -ENODEV;
9352
9353         /* Making sure trace array exists before destroying it. */
9354         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9355                 if (tr == this_tr) {
9356                         ret = __remove_instance(tr);
9357                         break;
9358                 }
9359         }
9360
9361         mutex_unlock(&trace_types_lock);
9362         mutex_unlock(&event_mutex);
9363
9364         return ret;
9365 }
9366 EXPORT_SYMBOL_GPL(trace_array_destroy);
9367
9368 static int instance_rmdir(const char *name)
9369 {
9370         struct trace_array *tr;
9371         int ret;
9372
9373         mutex_lock(&event_mutex);
9374         mutex_lock(&trace_types_lock);
9375
9376         ret = -ENODEV;
9377         tr = trace_array_find(name);
9378         if (tr)
9379                 ret = __remove_instance(tr);
9380
9381         mutex_unlock(&trace_types_lock);
9382         mutex_unlock(&event_mutex);
9383
9384         return ret;
9385 }
9386
9387 static __init void create_trace_instances(struct dentry *d_tracer)
9388 {
9389         struct trace_array *tr;
9390
9391         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9392                                                          instance_mkdir,
9393                                                          instance_rmdir);
9394         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9395                 return;
9396
9397         mutex_lock(&event_mutex);
9398         mutex_lock(&trace_types_lock);
9399
9400         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9401                 if (!tr->name)
9402                         continue;
9403                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9404                              "Failed to create instance directory\n"))
9405                         break;
9406         }
9407
9408         mutex_unlock(&trace_types_lock);
9409         mutex_unlock(&event_mutex);
9410 }
9411
9412 static void
9413 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9414 {
9415         struct trace_event_file *file;
9416         int cpu;
9417
9418         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9419                         tr, &show_traces_fops);
9420
9421         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9422                         tr, &set_tracer_fops);
9423
9424         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9425                           tr, &tracing_cpumask_fops);
9426
9427         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9428                           tr, &tracing_iter_fops);
9429
9430         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9431                           tr, &tracing_fops);
9432
9433         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9434                           tr, &tracing_pipe_fops);
9435
9436         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9437                           tr, &tracing_entries_fops);
9438
9439         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9440                           tr, &tracing_total_entries_fops);
9441
9442         trace_create_file("free_buffer", 0200, d_tracer,
9443                           tr, &tracing_free_buffer_fops);
9444
9445         trace_create_file("trace_marker", 0220, d_tracer,
9446                           tr, &tracing_mark_fops);
9447
9448         file = __find_event_file(tr, "ftrace", "print");
9449         if (file && file->dir)
9450                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9451                                   file, &event_trigger_fops);
9452         tr->trace_marker_file = file;
9453
9454         trace_create_file("trace_marker_raw", 0220, d_tracer,
9455                           tr, &tracing_mark_raw_fops);
9456
9457         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9458                           &trace_clock_fops);
9459
9460         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9461                           tr, &rb_simple_fops);
9462
9463         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9464                           &trace_time_stamp_mode_fops);
9465
9466         tr->buffer_percent = 50;
9467
9468         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9469                         tr, &buffer_percent_fops);
9470
9471         create_trace_options_dir(tr);
9472
9473         trace_create_maxlat_file(tr, d_tracer);
9474
9475         if (ftrace_create_function_files(tr, d_tracer))
9476                 MEM_FAIL(1, "Could not allocate function filter files");
9477
9478 #ifdef CONFIG_TRACER_SNAPSHOT
9479         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9480                           tr, &snapshot_fops);
9481 #endif
9482
9483         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9484                           tr, &tracing_err_log_fops);
9485
9486         for_each_tracing_cpu(cpu)
9487                 tracing_init_tracefs_percpu(tr, cpu);
9488
9489         ftrace_init_tracefs(tr, d_tracer);
9490 }
9491
9492 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9493 {
9494         struct vfsmount *mnt;
9495         struct file_system_type *type;
9496
9497         /*
9498          * To maintain backward compatibility for tools that mount
9499          * debugfs to get to the tracing facility, tracefs is automatically
9500          * mounted to the debugfs/tracing directory.
9501          */
9502         type = get_fs_type("tracefs");
9503         if (!type)
9504                 return NULL;
9505         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9506         put_filesystem(type);
9507         if (IS_ERR(mnt))
9508                 return NULL;
9509         mntget(mnt);
9510
9511         return mnt;
9512 }
9513
9514 /**
9515  * tracing_init_dentry - initialize top level trace array
9516  *
9517  * This is called when creating files or directories in the tracing
9518  * directory. It is called via fs_initcall() by any of the boot up code
9519  * and expects to return the dentry of the top level tracing directory.
9520  */
9521 int tracing_init_dentry(void)
9522 {
9523         struct trace_array *tr = &global_trace;
9524
9525         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9526                 pr_warn("Tracing disabled due to lockdown\n");
9527                 return -EPERM;
9528         }
9529
9530         /* The top level trace array uses  NULL as parent */
9531         if (tr->dir)
9532                 return 0;
9533
9534         if (WARN_ON(!tracefs_initialized()))
9535                 return -ENODEV;
9536
9537         /*
9538          * As there may still be users that expect the tracing
9539          * files to exist in debugfs/tracing, we must automount
9540          * the tracefs file system there, so older tools still
9541          * work with the newer kernel.
9542          */
9543         tr->dir = debugfs_create_automount("tracing", NULL,
9544                                            trace_automount, NULL);
9545
9546         return 0;
9547 }
9548
9549 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9550 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9551
9552 static struct workqueue_struct *eval_map_wq __initdata;
9553 static struct work_struct eval_map_work __initdata;
9554
9555 static void __init eval_map_work_func(struct work_struct *work)
9556 {
9557         int len;
9558
9559         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9560         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9561 }
9562
9563 static int __init trace_eval_init(void)
9564 {
9565         INIT_WORK(&eval_map_work, eval_map_work_func);
9566
9567         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9568         if (!eval_map_wq) {
9569                 pr_err("Unable to allocate eval_map_wq\n");
9570                 /* Do work here */
9571                 eval_map_work_func(&eval_map_work);
9572                 return -ENOMEM;
9573         }
9574
9575         queue_work(eval_map_wq, &eval_map_work);
9576         return 0;
9577 }
9578
9579 static int __init trace_eval_sync(void)
9580 {
9581         /* Make sure the eval map updates are finished */
9582         if (eval_map_wq)
9583                 destroy_workqueue(eval_map_wq);
9584         return 0;
9585 }
9586
9587 late_initcall_sync(trace_eval_sync);
9588
9589
9590 #ifdef CONFIG_MODULES
9591 static void trace_module_add_evals(struct module *mod)
9592 {
9593         if (!mod->num_trace_evals)
9594                 return;
9595
9596         /*
9597          * Modules with bad taint do not have events created, do
9598          * not bother with enums either.
9599          */
9600         if (trace_module_has_bad_taint(mod))
9601                 return;
9602
9603         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9604 }
9605
9606 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9607 static void trace_module_remove_evals(struct module *mod)
9608 {
9609         union trace_eval_map_item *map;
9610         union trace_eval_map_item **last = &trace_eval_maps;
9611
9612         if (!mod->num_trace_evals)
9613                 return;
9614
9615         mutex_lock(&trace_eval_mutex);
9616
9617         map = trace_eval_maps;
9618
9619         while (map) {
9620                 if (map->head.mod == mod)
9621                         break;
9622                 map = trace_eval_jmp_to_tail(map);
9623                 last = &map->tail.next;
9624                 map = map->tail.next;
9625         }
9626         if (!map)
9627                 goto out;
9628
9629         *last = trace_eval_jmp_to_tail(map)->tail.next;
9630         kfree(map);
9631  out:
9632         mutex_unlock(&trace_eval_mutex);
9633 }
9634 #else
9635 static inline void trace_module_remove_evals(struct module *mod) { }
9636 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9637
9638 static int trace_module_notify(struct notifier_block *self,
9639                                unsigned long val, void *data)
9640 {
9641         struct module *mod = data;
9642
9643         switch (val) {
9644         case MODULE_STATE_COMING:
9645                 trace_module_add_evals(mod);
9646                 break;
9647         case MODULE_STATE_GOING:
9648                 trace_module_remove_evals(mod);
9649                 break;
9650         }
9651
9652         return NOTIFY_OK;
9653 }
9654
9655 static struct notifier_block trace_module_nb = {
9656         .notifier_call = trace_module_notify,
9657         .priority = 0,
9658 };
9659 #endif /* CONFIG_MODULES */
9660
9661 static __init int tracer_init_tracefs(void)
9662 {
9663         int ret;
9664
9665         trace_access_lock_init();
9666
9667         ret = tracing_init_dentry();
9668         if (ret)
9669                 return 0;
9670
9671         event_trace_init();
9672
9673         init_tracer_tracefs(&global_trace, NULL);
9674         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9675
9676         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9677                         &global_trace, &tracing_thresh_fops);
9678
9679         trace_create_file("README", TRACE_MODE_READ, NULL,
9680                         NULL, &tracing_readme_fops);
9681
9682         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9683                         NULL, &tracing_saved_cmdlines_fops);
9684
9685         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9686                           NULL, &tracing_saved_cmdlines_size_fops);
9687
9688         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9689                         NULL, &tracing_saved_tgids_fops);
9690
9691         trace_eval_init();
9692
9693         trace_create_eval_file(NULL);
9694
9695 #ifdef CONFIG_MODULES
9696         register_module_notifier(&trace_module_nb);
9697 #endif
9698
9699 #ifdef CONFIG_DYNAMIC_FTRACE
9700         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9701                         NULL, &tracing_dyn_info_fops);
9702 #endif
9703
9704         create_trace_instances(NULL);
9705
9706         update_tracer_options(&global_trace);
9707
9708         return 0;
9709 }
9710
9711 fs_initcall(tracer_init_tracefs);
9712
9713 static int trace_panic_handler(struct notifier_block *this,
9714                                unsigned long event, void *unused)
9715 {
9716         if (ftrace_dump_on_oops)
9717                 ftrace_dump(ftrace_dump_on_oops);
9718         return NOTIFY_OK;
9719 }
9720
9721 static struct notifier_block trace_panic_notifier = {
9722         .notifier_call  = trace_panic_handler,
9723         .next           = NULL,
9724         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9725 };
9726
9727 static int trace_die_handler(struct notifier_block *self,
9728                              unsigned long val,
9729                              void *data)
9730 {
9731         switch (val) {
9732         case DIE_OOPS:
9733                 if (ftrace_dump_on_oops)
9734                         ftrace_dump(ftrace_dump_on_oops);
9735                 break;
9736         default:
9737                 break;
9738         }
9739         return NOTIFY_OK;
9740 }
9741
9742 static struct notifier_block trace_die_notifier = {
9743         .notifier_call = trace_die_handler,
9744         .priority = 200
9745 };
9746
9747 /*
9748  * printk is set to max of 1024, we really don't need it that big.
9749  * Nothing should be printing 1000 characters anyway.
9750  */
9751 #define TRACE_MAX_PRINT         1000
9752
9753 /*
9754  * Define here KERN_TRACE so that we have one place to modify
9755  * it if we decide to change what log level the ftrace dump
9756  * should be at.
9757  */
9758 #define KERN_TRACE              KERN_EMERG
9759
9760 void
9761 trace_printk_seq(struct trace_seq *s)
9762 {
9763         /* Probably should print a warning here. */
9764         if (s->seq.len >= TRACE_MAX_PRINT)
9765                 s->seq.len = TRACE_MAX_PRINT;
9766
9767         /*
9768          * More paranoid code. Although the buffer size is set to
9769          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9770          * an extra layer of protection.
9771          */
9772         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9773                 s->seq.len = s->seq.size - 1;
9774
9775         /* should be zero ended, but we are paranoid. */
9776         s->buffer[s->seq.len] = 0;
9777
9778         printk(KERN_TRACE "%s", s->buffer);
9779
9780         trace_seq_init(s);
9781 }
9782
9783 void trace_init_global_iter(struct trace_iterator *iter)
9784 {
9785         iter->tr = &global_trace;
9786         iter->trace = iter->tr->current_trace;
9787         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9788         iter->array_buffer = &global_trace.array_buffer;
9789
9790         if (iter->trace && iter->trace->open)
9791                 iter->trace->open(iter);
9792
9793         /* Annotate start of buffers if we had overruns */
9794         if (ring_buffer_overruns(iter->array_buffer->buffer))
9795                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9796
9797         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9798         if (trace_clocks[iter->tr->clock_id].in_ns)
9799                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9800 }
9801
9802 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9803 {
9804         /* use static because iter can be a bit big for the stack */
9805         static struct trace_iterator iter;
9806         static atomic_t dump_running;
9807         struct trace_array *tr = &global_trace;
9808         unsigned int old_userobj;
9809         unsigned long flags;
9810         int cnt = 0, cpu;
9811
9812         /* Only allow one dump user at a time. */
9813         if (atomic_inc_return(&dump_running) != 1) {
9814                 atomic_dec(&dump_running);
9815                 return;
9816         }
9817
9818         /*
9819          * Always turn off tracing when we dump.
9820          * We don't need to show trace output of what happens
9821          * between multiple crashes.
9822          *
9823          * If the user does a sysrq-z, then they can re-enable
9824          * tracing with echo 1 > tracing_on.
9825          */
9826         tracing_off();
9827
9828         local_irq_save(flags);
9829
9830         /* Simulate the iterator */
9831         trace_init_global_iter(&iter);
9832         /* Can not use kmalloc for iter.temp and iter.fmt */
9833         iter.temp = static_temp_buf;
9834         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9835         iter.fmt = static_fmt_buf;
9836         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9837
9838         for_each_tracing_cpu(cpu) {
9839                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9840         }
9841
9842         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9843
9844         /* don't look at user memory in panic mode */
9845         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9846
9847         switch (oops_dump_mode) {
9848         case DUMP_ALL:
9849                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9850                 break;
9851         case DUMP_ORIG:
9852                 iter.cpu_file = raw_smp_processor_id();
9853                 break;
9854         case DUMP_NONE:
9855                 goto out_enable;
9856         default:
9857                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9858                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9859         }
9860
9861         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9862
9863         /* Did function tracer already get disabled? */
9864         if (ftrace_is_dead()) {
9865                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9866                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9867         }
9868
9869         /*
9870          * We need to stop all tracing on all CPUS to read
9871          * the next buffer. This is a bit expensive, but is
9872          * not done often. We fill all what we can read,
9873          * and then release the locks again.
9874          */
9875
9876         while (!trace_empty(&iter)) {
9877
9878                 if (!cnt)
9879                         printk(KERN_TRACE "---------------------------------\n");
9880
9881                 cnt++;
9882
9883                 trace_iterator_reset(&iter);
9884                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9885
9886                 if (trace_find_next_entry_inc(&iter) != NULL) {
9887                         int ret;
9888
9889                         ret = print_trace_line(&iter);
9890                         if (ret != TRACE_TYPE_NO_CONSUME)
9891                                 trace_consume(&iter);
9892                 }
9893                 touch_nmi_watchdog();
9894
9895                 trace_printk_seq(&iter.seq);
9896         }
9897
9898         if (!cnt)
9899                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9900         else
9901                 printk(KERN_TRACE "---------------------------------\n");
9902
9903  out_enable:
9904         tr->trace_flags |= old_userobj;
9905
9906         for_each_tracing_cpu(cpu) {
9907                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9908         }
9909         atomic_dec(&dump_running);
9910         local_irq_restore(flags);
9911 }
9912 EXPORT_SYMBOL_GPL(ftrace_dump);
9913
9914 #define WRITE_BUFSIZE  4096
9915
9916 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9917                                 size_t count, loff_t *ppos,
9918                                 int (*createfn)(const char *))
9919 {
9920         char *kbuf, *buf, *tmp;
9921         int ret = 0;
9922         size_t done = 0;
9923         size_t size;
9924
9925         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9926         if (!kbuf)
9927                 return -ENOMEM;
9928
9929         while (done < count) {
9930                 size = count - done;
9931
9932                 if (size >= WRITE_BUFSIZE)
9933                         size = WRITE_BUFSIZE - 1;
9934
9935                 if (copy_from_user(kbuf, buffer + done, size)) {
9936                         ret = -EFAULT;
9937                         goto out;
9938                 }
9939                 kbuf[size] = '\0';
9940                 buf = kbuf;
9941                 do {
9942                         tmp = strchr(buf, '\n');
9943                         if (tmp) {
9944                                 *tmp = '\0';
9945                                 size = tmp - buf + 1;
9946                         } else {
9947                                 size = strlen(buf);
9948                                 if (done + size < count) {
9949                                         if (buf != kbuf)
9950                                                 break;
9951                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9952                                         pr_warn("Line length is too long: Should be less than %d\n",
9953                                                 WRITE_BUFSIZE - 2);
9954                                         ret = -EINVAL;
9955                                         goto out;
9956                                 }
9957                         }
9958                         done += size;
9959
9960                         /* Remove comments */
9961                         tmp = strchr(buf, '#');
9962
9963                         if (tmp)
9964                                 *tmp = '\0';
9965
9966                         ret = createfn(buf);
9967                         if (ret)
9968                                 goto out;
9969                         buf += size;
9970
9971                 } while (done < count);
9972         }
9973         ret = done;
9974
9975 out:
9976         kfree(kbuf);
9977
9978         return ret;
9979 }
9980
9981 __init static int tracer_alloc_buffers(void)
9982 {
9983         int ring_buf_size;
9984         int ret = -ENOMEM;
9985
9986
9987         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9988                 pr_warn("Tracing disabled due to lockdown\n");
9989                 return -EPERM;
9990         }
9991
9992         /*
9993          * Make sure we don't accidentally add more trace options
9994          * than we have bits for.
9995          */
9996         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9997
9998         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9999                 goto out;
10000
10001         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10002                 goto out_free_buffer_mask;
10003
10004         /* Only allocate trace_printk buffers if a trace_printk exists */
10005         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10006                 /* Must be called before global_trace.buffer is allocated */
10007                 trace_printk_init_buffers();
10008
10009         /* To save memory, keep the ring buffer size to its minimum */
10010         if (ring_buffer_expanded)
10011                 ring_buf_size = trace_buf_size;
10012         else
10013                 ring_buf_size = 1;
10014
10015         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10016         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10017
10018         raw_spin_lock_init(&global_trace.start_lock);
10019
10020         /*
10021          * The prepare callbacks allocates some memory for the ring buffer. We
10022          * don't free the buffer if the CPU goes down. If we were to free
10023          * the buffer, then the user would lose any trace that was in the
10024          * buffer. The memory will be removed once the "instance" is removed.
10025          */
10026         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10027                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10028                                       NULL);
10029         if (ret < 0)
10030                 goto out_free_cpumask;
10031         /* Used for event triggers */
10032         ret = -ENOMEM;
10033         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10034         if (!temp_buffer)
10035                 goto out_rm_hp_state;
10036
10037         if (trace_create_savedcmd() < 0)
10038                 goto out_free_temp_buffer;
10039
10040         /* TODO: make the number of buffers hot pluggable with CPUS */
10041         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10042                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10043                 goto out_free_savedcmd;
10044         }
10045
10046         if (global_trace.buffer_disabled)
10047                 tracing_off();
10048
10049         if (trace_boot_clock) {
10050                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10051                 if (ret < 0)
10052                         pr_warn("Trace clock %s not defined, going back to default\n",
10053                                 trace_boot_clock);
10054         }
10055
10056         /*
10057          * register_tracer() might reference current_trace, so it
10058          * needs to be set before we register anything. This is
10059          * just a bootstrap of current_trace anyway.
10060          */
10061         global_trace.current_trace = &nop_trace;
10062
10063         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10064
10065         ftrace_init_global_array_ops(&global_trace);
10066
10067         init_trace_flags_index(&global_trace);
10068
10069         register_tracer(&nop_trace);
10070
10071         /* Function tracing may start here (via kernel command line) */
10072         init_function_trace();
10073
10074         /* All seems OK, enable tracing */
10075         tracing_disabled = 0;
10076
10077         atomic_notifier_chain_register(&panic_notifier_list,
10078                                        &trace_panic_notifier);
10079
10080         register_die_notifier(&trace_die_notifier);
10081
10082         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10083
10084         INIT_LIST_HEAD(&global_trace.systems);
10085         INIT_LIST_HEAD(&global_trace.events);
10086         INIT_LIST_HEAD(&global_trace.hist_vars);
10087         INIT_LIST_HEAD(&global_trace.err_log);
10088         list_add(&global_trace.list, &ftrace_trace_arrays);
10089
10090         apply_trace_boot_options();
10091
10092         register_snapshot_cmd();
10093
10094         test_can_verify();
10095
10096         return 0;
10097
10098 out_free_savedcmd:
10099         free_saved_cmdlines_buffer(savedcmd);
10100 out_free_temp_buffer:
10101         ring_buffer_free(temp_buffer);
10102 out_rm_hp_state:
10103         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10104 out_free_cpumask:
10105         free_cpumask_var(global_trace.tracing_cpumask);
10106 out_free_buffer_mask:
10107         free_cpumask_var(tracing_buffer_mask);
10108 out:
10109         return ret;
10110 }
10111
10112 void __init early_trace_init(void)
10113 {
10114         if (tracepoint_printk) {
10115                 tracepoint_print_iter =
10116                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10117                 if (MEM_FAIL(!tracepoint_print_iter,
10118                              "Failed to allocate trace iterator\n"))
10119                         tracepoint_printk = 0;
10120                 else
10121                         static_key_enable(&tracepoint_printk_key.key);
10122         }
10123         tracer_alloc_buffers();
10124 }
10125
10126 void __init trace_init(void)
10127 {
10128         trace_event_init();
10129 }
10130
10131 __init static void clear_boot_tracer(void)
10132 {
10133         /*
10134          * The default tracer at boot buffer is an init section.
10135          * This function is called in lateinit. If we did not
10136          * find the boot tracer, then clear it out, to prevent
10137          * later registration from accessing the buffer that is
10138          * about to be freed.
10139          */
10140         if (!default_bootup_tracer)
10141                 return;
10142
10143         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10144                default_bootup_tracer);
10145         default_bootup_tracer = NULL;
10146 }
10147
10148 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10149 __init static void tracing_set_default_clock(void)
10150 {
10151         /* sched_clock_stable() is determined in late_initcall */
10152         if (!trace_boot_clock && !sched_clock_stable()) {
10153                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10154                         pr_warn("Can not set tracing clock due to lockdown\n");
10155                         return;
10156                 }
10157
10158                 printk(KERN_WARNING
10159                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10160                        "If you want to keep using the local clock, then add:\n"
10161                        "  \"trace_clock=local\"\n"
10162                        "on the kernel command line\n");
10163                 tracing_set_clock(&global_trace, "global");
10164         }
10165 }
10166 #else
10167 static inline void tracing_set_default_clock(void) { }
10168 #endif
10169
10170 __init static int late_trace_init(void)
10171 {
10172         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10173                 static_key_disable(&tracepoint_printk_key.key);
10174                 tracepoint_printk = 0;
10175         }
10176
10177         tracing_set_default_clock();
10178         clear_boot_tracer();
10179         return 0;
10180 }
10181
10182 late_initcall_sync(late_trace_init);