most: usb: replace snprintf in show functions with sysfs_emit
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 /**
516  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
517  * @filtered_pids: The list of pids to check
518  * @search_pid: The PID to find in @filtered_pids
519  *
520  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
521  */
522 bool
523 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
524 {
525         return trace_pid_list_is_set(filtered_pids, search_pid);
526 }
527
528 /**
529  * trace_ignore_this_task - should a task be ignored for tracing
530  * @filtered_pids: The list of pids to check
531  * @filtered_no_pids: The list of pids not to be traced
532  * @task: The task that should be ignored if not filtered
533  *
534  * Checks if @task should be traced or not from @filtered_pids.
535  * Returns true if @task should *NOT* be traced.
536  * Returns false if @task should be traced.
537  */
538 bool
539 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
540                        struct trace_pid_list *filtered_no_pids,
541                        struct task_struct *task)
542 {
543         /*
544          * If filtered_no_pids is not empty, and the task's pid is listed
545          * in filtered_no_pids, then return true.
546          * Otherwise, if filtered_pids is empty, that means we can
547          * trace all tasks. If it has content, then only trace pids
548          * within filtered_pids.
549          */
550
551         return (filtered_pids &&
552                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
553                 (filtered_no_pids &&
554                  trace_find_filtered_pid(filtered_no_pids, task->pid));
555 }
556
557 /**
558  * trace_filter_add_remove_task - Add or remove a task from a pid_list
559  * @pid_list: The list to modify
560  * @self: The current task for fork or NULL for exit
561  * @task: The task to add or remove
562  *
563  * If adding a task, if @self is defined, the task is only added if @self
564  * is also included in @pid_list. This happens on fork and tasks should
565  * only be added when the parent is listed. If @self is NULL, then the
566  * @task pid will be removed from the list, which would happen on exit
567  * of a task.
568  */
569 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
570                                   struct task_struct *self,
571                                   struct task_struct *task)
572 {
573         if (!pid_list)
574                 return;
575
576         /* For forks, we only add if the forking task is listed */
577         if (self) {
578                 if (!trace_find_filtered_pid(pid_list, self->pid))
579                         return;
580         }
581
582         /* "self" is set for forks, and NULL for exits */
583         if (self)
584                 trace_pid_list_set(pid_list, task->pid);
585         else
586                 trace_pid_list_clear(pid_list, task->pid);
587 }
588
589 /**
590  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
591  * @pid_list: The pid list to show
592  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
593  * @pos: The position of the file
594  *
595  * This is used by the seq_file "next" operation to iterate the pids
596  * listed in a trace_pid_list structure.
597  *
598  * Returns the pid+1 as we want to display pid of zero, but NULL would
599  * stop the iteration.
600  */
601 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
602 {
603         long pid = (unsigned long)v;
604         unsigned int next;
605
606         (*pos)++;
607
608         /* pid already is +1 of the actual previous bit */
609         if (trace_pid_list_next(pid_list, pid, &next) < 0)
610                 return NULL;
611
612         pid = next;
613
614         /* Return pid + 1 to allow zero to be represented */
615         return (void *)(pid + 1);
616 }
617
618 /**
619  * trace_pid_start - Used for seq_file to start reading pid lists
620  * @pid_list: The pid list to show
621  * @pos: The position of the file
622  *
623  * This is used by seq_file "start" operation to start the iteration
624  * of listing pids.
625  *
626  * Returns the pid+1 as we want to display pid of zero, but NULL would
627  * stop the iteration.
628  */
629 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
630 {
631         unsigned long pid;
632         unsigned int first;
633         loff_t l = 0;
634
635         if (trace_pid_list_first(pid_list, &first) < 0)
636                 return NULL;
637
638         pid = first;
639
640         /* Return pid + 1 so that zero can be the exit value */
641         for (pid++; pid && l < *pos;
642              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
643                 ;
644         return (void *)pid;
645 }
646
647 /**
648  * trace_pid_show - show the current pid in seq_file processing
649  * @m: The seq_file structure to write into
650  * @v: A void pointer of the pid (+1) value to display
651  *
652  * Can be directly used by seq_file operations to display the current
653  * pid value.
654  */
655 int trace_pid_show(struct seq_file *m, void *v)
656 {
657         unsigned long pid = (unsigned long)v - 1;
658
659         seq_printf(m, "%lu\n", pid);
660         return 0;
661 }
662
663 /* 128 should be much more than enough */
664 #define PID_BUF_SIZE            127
665
666 int trace_pid_write(struct trace_pid_list *filtered_pids,
667                     struct trace_pid_list **new_pid_list,
668                     const char __user *ubuf, size_t cnt)
669 {
670         struct trace_pid_list *pid_list;
671         struct trace_parser parser;
672         unsigned long val;
673         int nr_pids = 0;
674         ssize_t read = 0;
675         ssize_t ret;
676         loff_t pos;
677         pid_t pid;
678
679         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
680                 return -ENOMEM;
681
682         /*
683          * Always recreate a new array. The write is an all or nothing
684          * operation. Always create a new array when adding new pids by
685          * the user. If the operation fails, then the current list is
686          * not modified.
687          */
688         pid_list = trace_pid_list_alloc();
689         if (!pid_list) {
690                 trace_parser_put(&parser);
691                 return -ENOMEM;
692         }
693
694         if (filtered_pids) {
695                 /* copy the current bits to the new max */
696                 ret = trace_pid_list_first(filtered_pids, &pid);
697                 while (!ret) {
698                         trace_pid_list_set(pid_list, pid);
699                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
700                         nr_pids++;
701                 }
702         }
703
704         ret = 0;
705         while (cnt > 0) {
706
707                 pos = 0;
708
709                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
710                 if (ret < 0 || !trace_parser_loaded(&parser))
711                         break;
712
713                 read += ret;
714                 ubuf += ret;
715                 cnt -= ret;
716
717                 ret = -EINVAL;
718                 if (kstrtoul(parser.buffer, 0, &val))
719                         break;
720
721                 pid = (pid_t)val;
722
723                 if (trace_pid_list_set(pid_list, pid) < 0) {
724                         ret = -1;
725                         break;
726                 }
727                 nr_pids++;
728
729                 trace_parser_clear(&parser);
730                 ret = 0;
731         }
732         trace_parser_put(&parser);
733
734         if (ret < 0) {
735                 trace_pid_list_free(pid_list);
736                 return ret;
737         }
738
739         if (!nr_pids) {
740                 /* Cleared the list of pids */
741                 trace_pid_list_free(pid_list);
742                 read = ret;
743                 pid_list = NULL;
744         }
745
746         *new_pid_list = pid_list;
747
748         return read;
749 }
750
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753         u64 ts;
754
755         /* Early boot up does not have a buffer yet */
756         if (!buf->buffer)
757                 return trace_clock_local();
758
759         ts = ring_buffer_time_stamp(buf->buffer);
760         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761
762         return ts;
763 }
764
765 u64 ftrace_now(int cpu)
766 {
767         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769
770 /**
771  * tracing_is_enabled - Show if global_trace has been enabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781         /*
782          * For quick access (irqsoff uses this in fast path), just
783          * return the mirror variable of the state of the ring buffer.
784          * It's a little racy, but we don't really care.
785          */
786         smp_rmb();
787         return !global_trace.buffer_disabled;
788 }
789
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
801
802 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer            *trace_types __read_mostly;
806
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewritten
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837
838 static inline void trace_access_lock(int cpu)
839 {
840         if (cpu == RING_BUFFER_ALL_CPUS) {
841                 /* gain it for accessing the whole ring buffer. */
842                 down_write(&all_cpu_access_lock);
843         } else {
844                 /* gain it for accessing a cpu ring buffer. */
845
846                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847                 down_read(&all_cpu_access_lock);
848
849                 /* Secondly block other access to this @cpu ring buffer. */
850                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
851         }
852 }
853
854 static inline void trace_access_unlock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 up_write(&all_cpu_access_lock);
858         } else {
859                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860                 up_read(&all_cpu_access_lock);
861         }
862 }
863
864 static inline void trace_access_lock_init(void)
865 {
866         int cpu;
867
868         for_each_possible_cpu(cpu)
869                 mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871
872 #else
873
874 static DEFINE_MUTEX(access_lock);
875
876 static inline void trace_access_lock(int cpu)
877 {
878         (void)cpu;
879         mutex_lock(&access_lock);
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884         (void)cpu;
885         mutex_unlock(&access_lock);
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890 }
891
892 #endif
893
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896                                  unsigned int trace_ctx,
897                                  int skip, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899                                       struct trace_buffer *buffer,
900                                       unsigned int trace_ctx,
901                                       int skip, struct pt_regs *regs);
902
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905                                         unsigned int trace_ctx,
906                                         int skip, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910                                       struct trace_buffer *buffer,
911                                       unsigned long trace_ctx,
912                                       int skip, struct pt_regs *regs)
913 {
914 }
915
916 #endif
917
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920                   int type, unsigned int trace_ctx)
921 {
922         struct trace_entry *ent = ring_buffer_event_data(event);
923
924         tracing_generic_entry_update(ent, type, trace_ctx);
925 }
926
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929                           int type,
930                           unsigned long len,
931                           unsigned int trace_ctx)
932 {
933         struct ring_buffer_event *event;
934
935         event = ring_buffer_lock_reserve(buffer, len);
936         if (event != NULL)
937                 trace_event_setup(event, type, trace_ctx);
938
939         return event;
940 }
941
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944         if (tr->array_buffer.buffer)
945                 ring_buffer_record_on(tr->array_buffer.buffer);
946         /*
947          * This flag is looked at when buffers haven't been allocated
948          * yet, or by some tracers (like irqsoff), that just want to
949          * know if the ring buffer has been disabled, but it can handle
950          * races of where it gets disabled but we still do a record.
951          * As the check is in the fast path of the tracers, it is more
952          * important to be fast than accurate.
953          */
954         tr->buffer_disabled = 0;
955         /* Make the flag seen by readers */
956         smp_wmb();
957 }
958
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967         tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970
971
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975         __this_cpu_write(trace_taskinfo_save, true);
976
977         /* If this is the temp buffer, we need to commit fully */
978         if (this_cpu_read(trace_buffered_event) == event) {
979                 /* Length is in event->array[0] */
980                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
981                 /* Release the temp buffer */
982                 this_cpu_dec(trace_buffered_event_cnt);
983         } else
984                 ring_buffer_unlock_commit(buffer, event);
985 }
986
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:    The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995         struct ring_buffer_event *event;
996         struct trace_buffer *buffer;
997         struct print_entry *entry;
998         unsigned int trace_ctx;
999         int alloc;
1000
1001         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1002                 return 0;
1003
1004         if (unlikely(tracing_selftest_running || tracing_disabled))
1005                 return 0;
1006
1007         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1008
1009         trace_ctx = tracing_gen_ctx();
1010         buffer = global_trace.array_buffer.buffer;
1011         ring_buffer_nest_start(buffer);
1012         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1013                                             trace_ctx);
1014         if (!event) {
1015                 size = 0;
1016                 goto out;
1017         }
1018
1019         entry = ring_buffer_event_data(event);
1020         entry->ip = ip;
1021
1022         memcpy(&entry->buf, str, size);
1023
1024         /* Add a newline if necessary */
1025         if (entry->buf[size - 1] != '\n') {
1026                 entry->buf[size] = '\n';
1027                 entry->buf[size + 1] = '\0';
1028         } else
1029                 entry->buf[size] = '\0';
1030
1031         __buffer_unlock_commit(buffer, event);
1032         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1033  out:
1034         ring_buffer_nest_end(buffer);
1035         return size;
1036 }
1037 EXPORT_SYMBOL_GPL(__trace_puts);
1038
1039 /**
1040  * __trace_bputs - write the pointer to a constant string into trace buffer
1041  * @ip:    The address of the caller
1042  * @str:   The constant string to write to the buffer to
1043  */
1044 int __trace_bputs(unsigned long ip, const char *str)
1045 {
1046         struct ring_buffer_event *event;
1047         struct trace_buffer *buffer;
1048         struct bputs_entry *entry;
1049         unsigned int trace_ctx;
1050         int size = sizeof(struct bputs_entry);
1051         int ret = 0;
1052
1053         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1054                 return 0;
1055
1056         if (unlikely(tracing_selftest_running || tracing_disabled))
1057                 return 0;
1058
1059         trace_ctx = tracing_gen_ctx();
1060         buffer = global_trace.array_buffer.buffer;
1061
1062         ring_buffer_nest_start(buffer);
1063         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1064                                             trace_ctx);
1065         if (!event)
1066                 goto out;
1067
1068         entry = ring_buffer_event_data(event);
1069         entry->ip                       = ip;
1070         entry->str                      = str;
1071
1072         __buffer_unlock_commit(buffer, event);
1073         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1074
1075         ret = 1;
1076  out:
1077         ring_buffer_nest_end(buffer);
1078         return ret;
1079 }
1080 EXPORT_SYMBOL_GPL(__trace_bputs);
1081
1082 #ifdef CONFIG_TRACER_SNAPSHOT
1083 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1084                                            void *cond_data)
1085 {
1086         struct tracer *tracer = tr->current_trace;
1087         unsigned long flags;
1088
1089         if (in_nmi()) {
1090                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1091                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1092                 return;
1093         }
1094
1095         if (!tr->allocated_snapshot) {
1096                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1097                 internal_trace_puts("*** stopping trace here!   ***\n");
1098                 tracing_off();
1099                 return;
1100         }
1101
1102         /* Note, snapshot can not be used when the tracer uses it */
1103         if (tracer->use_max_tr) {
1104                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1105                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1106                 return;
1107         }
1108
1109         local_irq_save(flags);
1110         update_max_tr(tr, current, smp_processor_id(), cond_data);
1111         local_irq_restore(flags);
1112 }
1113
1114 void tracing_snapshot_instance(struct trace_array *tr)
1115 {
1116         tracing_snapshot_instance_cond(tr, NULL);
1117 }
1118
1119 /**
1120  * tracing_snapshot - take a snapshot of the current buffer.
1121  *
1122  * This causes a swap between the snapshot buffer and the current live
1123  * tracing buffer. You can use this to take snapshots of the live
1124  * trace when some condition is triggered, but continue to trace.
1125  *
1126  * Note, make sure to allocate the snapshot with either
1127  * a tracing_snapshot_alloc(), or by doing it manually
1128  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1129  *
1130  * If the snapshot buffer is not allocated, it will stop tracing.
1131  * Basically making a permanent snapshot.
1132  */
1133 void tracing_snapshot(void)
1134 {
1135         struct trace_array *tr = &global_trace;
1136
1137         tracing_snapshot_instance(tr);
1138 }
1139 EXPORT_SYMBOL_GPL(tracing_snapshot);
1140
1141 /**
1142  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1143  * @tr:         The tracing instance to snapshot
1144  * @cond_data:  The data to be tested conditionally, and possibly saved
1145  *
1146  * This is the same as tracing_snapshot() except that the snapshot is
1147  * conditional - the snapshot will only happen if the
1148  * cond_snapshot.update() implementation receiving the cond_data
1149  * returns true, which means that the trace array's cond_snapshot
1150  * update() operation used the cond_data to determine whether the
1151  * snapshot should be taken, and if it was, presumably saved it along
1152  * with the snapshot.
1153  */
1154 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1155 {
1156         tracing_snapshot_instance_cond(tr, cond_data);
1157 }
1158 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1159
1160 /**
1161  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1162  * @tr:         The tracing instance
1163  *
1164  * When the user enables a conditional snapshot using
1165  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1166  * with the snapshot.  This accessor is used to retrieve it.
1167  *
1168  * Should not be called from cond_snapshot.update(), since it takes
1169  * the tr->max_lock lock, which the code calling
1170  * cond_snapshot.update() has already done.
1171  *
1172  * Returns the cond_data associated with the trace array's snapshot.
1173  */
1174 void *tracing_cond_snapshot_data(struct trace_array *tr)
1175 {
1176         void *cond_data = NULL;
1177
1178         arch_spin_lock(&tr->max_lock);
1179
1180         if (tr->cond_snapshot)
1181                 cond_data = tr->cond_snapshot->cond_data;
1182
1183         arch_spin_unlock(&tr->max_lock);
1184
1185         return cond_data;
1186 }
1187 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1188
1189 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1190                                         struct array_buffer *size_buf, int cpu_id);
1191 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1192
1193 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1194 {
1195         int ret;
1196
1197         if (!tr->allocated_snapshot) {
1198
1199                 /* allocate spare buffer */
1200                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1201                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1202                 if (ret < 0)
1203                         return ret;
1204
1205                 tr->allocated_snapshot = true;
1206         }
1207
1208         return 0;
1209 }
1210
1211 static void free_snapshot(struct trace_array *tr)
1212 {
1213         /*
1214          * We don't free the ring buffer. instead, resize it because
1215          * The max_tr ring buffer has some state (e.g. ring->clock) and
1216          * we want preserve it.
1217          */
1218         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1219         set_buffer_entries(&tr->max_buffer, 1);
1220         tracing_reset_online_cpus(&tr->max_buffer);
1221         tr->allocated_snapshot = false;
1222 }
1223
1224 /**
1225  * tracing_alloc_snapshot - allocate snapshot buffer.
1226  *
1227  * This only allocates the snapshot buffer if it isn't already
1228  * allocated - it doesn't also take a snapshot.
1229  *
1230  * This is meant to be used in cases where the snapshot buffer needs
1231  * to be set up for events that can't sleep but need to be able to
1232  * trigger a snapshot.
1233  */
1234 int tracing_alloc_snapshot(void)
1235 {
1236         struct trace_array *tr = &global_trace;
1237         int ret;
1238
1239         ret = tracing_alloc_snapshot_instance(tr);
1240         WARN_ON(ret < 0);
1241
1242         return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1245
1246 /**
1247  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1248  *
1249  * This is similar to tracing_snapshot(), but it will allocate the
1250  * snapshot buffer if it isn't already allocated. Use this only
1251  * where it is safe to sleep, as the allocation may sleep.
1252  *
1253  * This causes a swap between the snapshot buffer and the current live
1254  * tracing buffer. You can use this to take snapshots of the live
1255  * trace when some condition is triggered, but continue to trace.
1256  */
1257 void tracing_snapshot_alloc(void)
1258 {
1259         int ret;
1260
1261         ret = tracing_alloc_snapshot();
1262         if (ret < 0)
1263                 return;
1264
1265         tracing_snapshot();
1266 }
1267 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1268
1269 /**
1270  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1271  * @tr:         The tracing instance
1272  * @cond_data:  User data to associate with the snapshot
1273  * @update:     Implementation of the cond_snapshot update function
1274  *
1275  * Check whether the conditional snapshot for the given instance has
1276  * already been enabled, or if the current tracer is already using a
1277  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1278  * save the cond_data and update function inside.
1279  *
1280  * Returns 0 if successful, error otherwise.
1281  */
1282 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1283                                  cond_update_fn_t update)
1284 {
1285         struct cond_snapshot *cond_snapshot;
1286         int ret = 0;
1287
1288         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1289         if (!cond_snapshot)
1290                 return -ENOMEM;
1291
1292         cond_snapshot->cond_data = cond_data;
1293         cond_snapshot->update = update;
1294
1295         mutex_lock(&trace_types_lock);
1296
1297         ret = tracing_alloc_snapshot_instance(tr);
1298         if (ret)
1299                 goto fail_unlock;
1300
1301         if (tr->current_trace->use_max_tr) {
1302                 ret = -EBUSY;
1303                 goto fail_unlock;
1304         }
1305
1306         /*
1307          * The cond_snapshot can only change to NULL without the
1308          * trace_types_lock. We don't care if we race with it going
1309          * to NULL, but we want to make sure that it's not set to
1310          * something other than NULL when we get here, which we can
1311          * do safely with only holding the trace_types_lock and not
1312          * having to take the max_lock.
1313          */
1314         if (tr->cond_snapshot) {
1315                 ret = -EBUSY;
1316                 goto fail_unlock;
1317         }
1318
1319         arch_spin_lock(&tr->max_lock);
1320         tr->cond_snapshot = cond_snapshot;
1321         arch_spin_unlock(&tr->max_lock);
1322
1323         mutex_unlock(&trace_types_lock);
1324
1325         return ret;
1326
1327  fail_unlock:
1328         mutex_unlock(&trace_types_lock);
1329         kfree(cond_snapshot);
1330         return ret;
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1333
1334 /**
1335  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1336  * @tr:         The tracing instance
1337  *
1338  * Check whether the conditional snapshot for the given instance is
1339  * enabled; if so, free the cond_snapshot associated with it,
1340  * otherwise return -EINVAL.
1341  *
1342  * Returns 0 if successful, error otherwise.
1343  */
1344 int tracing_snapshot_cond_disable(struct trace_array *tr)
1345 {
1346         int ret = 0;
1347
1348         arch_spin_lock(&tr->max_lock);
1349
1350         if (!tr->cond_snapshot)
1351                 ret = -EINVAL;
1352         else {
1353                 kfree(tr->cond_snapshot);
1354                 tr->cond_snapshot = NULL;
1355         }
1356
1357         arch_spin_unlock(&tr->max_lock);
1358
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1362 #else
1363 void tracing_snapshot(void)
1364 {
1365         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot);
1368 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1369 {
1370         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1371 }
1372 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1373 int tracing_alloc_snapshot(void)
1374 {
1375         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1376         return -ENODEV;
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1379 void tracing_snapshot_alloc(void)
1380 {
1381         /* Give warning */
1382         tracing_snapshot();
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1385 void *tracing_cond_snapshot_data(struct trace_array *tr)
1386 {
1387         return NULL;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1390 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1391 {
1392         return -ENODEV;
1393 }
1394 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1395 int tracing_snapshot_cond_disable(struct trace_array *tr)
1396 {
1397         return false;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1400 #endif /* CONFIG_TRACER_SNAPSHOT */
1401
1402 void tracer_tracing_off(struct trace_array *tr)
1403 {
1404         if (tr->array_buffer.buffer)
1405                 ring_buffer_record_off(tr->array_buffer.buffer);
1406         /*
1407          * This flag is looked at when buffers haven't been allocated
1408          * yet, or by some tracers (like irqsoff), that just want to
1409          * know if the ring buffer has been disabled, but it can handle
1410          * races of where it gets disabled but we still do a record.
1411          * As the check is in the fast path of the tracers, it is more
1412          * important to be fast than accurate.
1413          */
1414         tr->buffer_disabled = 1;
1415         /* Make the flag seen by readers */
1416         smp_wmb();
1417 }
1418
1419 /**
1420  * tracing_off - turn off tracing buffers
1421  *
1422  * This function stops the tracing buffers from recording data.
1423  * It does not disable any overhead the tracers themselves may
1424  * be causing. This function simply causes all recording to
1425  * the ring buffers to fail.
1426  */
1427 void tracing_off(void)
1428 {
1429         tracer_tracing_off(&global_trace);
1430 }
1431 EXPORT_SYMBOL_GPL(tracing_off);
1432
1433 void disable_trace_on_warning(void)
1434 {
1435         if (__disable_trace_on_warning) {
1436                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1437                         "Disabling tracing due to warning\n");
1438                 tracing_off();
1439         }
1440 }
1441
1442 /**
1443  * tracer_tracing_is_on - show real state of ring buffer enabled
1444  * @tr : the trace array to know if ring buffer is enabled
1445  *
1446  * Shows real state of the ring buffer if it is enabled or not.
1447  */
1448 bool tracer_tracing_is_on(struct trace_array *tr)
1449 {
1450         if (tr->array_buffer.buffer)
1451                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1452         return !tr->buffer_disabled;
1453 }
1454
1455 /**
1456  * tracing_is_on - show state of ring buffers enabled
1457  */
1458 int tracing_is_on(void)
1459 {
1460         return tracer_tracing_is_on(&global_trace);
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_is_on);
1463
1464 static int __init set_buf_size(char *str)
1465 {
1466         unsigned long buf_size;
1467
1468         if (!str)
1469                 return 0;
1470         buf_size = memparse(str, &str);
1471         /* nr_entries can not be zero */
1472         if (buf_size == 0)
1473                 return 0;
1474         trace_buf_size = buf_size;
1475         return 1;
1476 }
1477 __setup("trace_buf_size=", set_buf_size);
1478
1479 static int __init set_tracing_thresh(char *str)
1480 {
1481         unsigned long threshold;
1482         int ret;
1483
1484         if (!str)
1485                 return 0;
1486         ret = kstrtoul(str, 0, &threshold);
1487         if (ret < 0)
1488                 return 0;
1489         tracing_thresh = threshold * 1000;
1490         return 1;
1491 }
1492 __setup("tracing_thresh=", set_tracing_thresh);
1493
1494 unsigned long nsecs_to_usecs(unsigned long nsecs)
1495 {
1496         return nsecs / 1000;
1497 }
1498
1499 /*
1500  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1501  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1502  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1503  * of strings in the order that the evals (enum) were defined.
1504  */
1505 #undef C
1506 #define C(a, b) b
1507
1508 /* These must match the bit positions in trace_iterator_flags */
1509 static const char *trace_options[] = {
1510         TRACE_FLAGS
1511         NULL
1512 };
1513
1514 static struct {
1515         u64 (*func)(void);
1516         const char *name;
1517         int in_ns;              /* is this clock in nanoseconds? */
1518 } trace_clocks[] = {
1519         { trace_clock_local,            "local",        1 },
1520         { trace_clock_global,           "global",       1 },
1521         { trace_clock_counter,          "counter",      0 },
1522         { trace_clock_jiffies,          "uptime",       0 },
1523         { trace_clock,                  "perf",         1 },
1524         { ktime_get_mono_fast_ns,       "mono",         1 },
1525         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1526         { ktime_get_boot_fast_ns,       "boot",         1 },
1527         ARCH_TRACE_CLOCKS
1528 };
1529
1530 bool trace_clock_in_ns(struct trace_array *tr)
1531 {
1532         if (trace_clocks[tr->clock_id].in_ns)
1533                 return true;
1534
1535         return false;
1536 }
1537
1538 /*
1539  * trace_parser_get_init - gets the buffer for trace parser
1540  */
1541 int trace_parser_get_init(struct trace_parser *parser, int size)
1542 {
1543         memset(parser, 0, sizeof(*parser));
1544
1545         parser->buffer = kmalloc(size, GFP_KERNEL);
1546         if (!parser->buffer)
1547                 return 1;
1548
1549         parser->size = size;
1550         return 0;
1551 }
1552
1553 /*
1554  * trace_parser_put - frees the buffer for trace parser
1555  */
1556 void trace_parser_put(struct trace_parser *parser)
1557 {
1558         kfree(parser->buffer);
1559         parser->buffer = NULL;
1560 }
1561
1562 /*
1563  * trace_get_user - reads the user input string separated by  space
1564  * (matched by isspace(ch))
1565  *
1566  * For each string found the 'struct trace_parser' is updated,
1567  * and the function returns.
1568  *
1569  * Returns number of bytes read.
1570  *
1571  * See kernel/trace/trace.h for 'struct trace_parser' details.
1572  */
1573 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1574         size_t cnt, loff_t *ppos)
1575 {
1576         char ch;
1577         size_t read = 0;
1578         ssize_t ret;
1579
1580         if (!*ppos)
1581                 trace_parser_clear(parser);
1582
1583         ret = get_user(ch, ubuf++);
1584         if (ret)
1585                 goto out;
1586
1587         read++;
1588         cnt--;
1589
1590         /*
1591          * The parser is not finished with the last write,
1592          * continue reading the user input without skipping spaces.
1593          */
1594         if (!parser->cont) {
1595                 /* skip white space */
1596                 while (cnt && isspace(ch)) {
1597                         ret = get_user(ch, ubuf++);
1598                         if (ret)
1599                                 goto out;
1600                         read++;
1601                         cnt--;
1602                 }
1603
1604                 parser->idx = 0;
1605
1606                 /* only spaces were written */
1607                 if (isspace(ch) || !ch) {
1608                         *ppos += read;
1609                         ret = read;
1610                         goto out;
1611                 }
1612         }
1613
1614         /* read the non-space input */
1615         while (cnt && !isspace(ch) && ch) {
1616                 if (parser->idx < parser->size - 1)
1617                         parser->buffer[parser->idx++] = ch;
1618                 else {
1619                         ret = -EINVAL;
1620                         goto out;
1621                 }
1622                 ret = get_user(ch, ubuf++);
1623                 if (ret)
1624                         goto out;
1625                 read++;
1626                 cnt--;
1627         }
1628
1629         /* We either got finished input or we have to wait for another call. */
1630         if (isspace(ch) || !ch) {
1631                 parser->buffer[parser->idx] = 0;
1632                 parser->cont = false;
1633         } else if (parser->idx < parser->size - 1) {
1634                 parser->cont = true;
1635                 parser->buffer[parser->idx++] = ch;
1636                 /* Make sure the parsed string always terminates with '\0'. */
1637                 parser->buffer[parser->idx] = 0;
1638         } else {
1639                 ret = -EINVAL;
1640                 goto out;
1641         }
1642
1643         *ppos += read;
1644         ret = read;
1645
1646 out:
1647         return ret;
1648 }
1649
1650 /* TODO add a seq_buf_to_buffer() */
1651 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1652 {
1653         int len;
1654
1655         if (trace_seq_used(s) <= s->seq.readpos)
1656                 return -EBUSY;
1657
1658         len = trace_seq_used(s) - s->seq.readpos;
1659         if (cnt > len)
1660                 cnt = len;
1661         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1662
1663         s->seq.readpos += cnt;
1664         return cnt;
1665 }
1666
1667 unsigned long __read_mostly     tracing_thresh;
1668 static const struct file_operations tracing_max_lat_fops;
1669
1670 #ifdef LATENCY_FS_NOTIFY
1671
1672 static struct workqueue_struct *fsnotify_wq;
1673
1674 static void latency_fsnotify_workfn(struct work_struct *work)
1675 {
1676         struct trace_array *tr = container_of(work, struct trace_array,
1677                                               fsnotify_work);
1678         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1679 }
1680
1681 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1682 {
1683         struct trace_array *tr = container_of(iwork, struct trace_array,
1684                                               fsnotify_irqwork);
1685         queue_work(fsnotify_wq, &tr->fsnotify_work);
1686 }
1687
1688 static void trace_create_maxlat_file(struct trace_array *tr,
1689                                      struct dentry *d_tracer)
1690 {
1691         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1692         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1693         tr->d_max_latency = trace_create_file("tracing_max_latency",
1694                                               TRACE_MODE_WRITE,
1695                                               d_tracer, &tr->max_latency,
1696                                               &tracing_max_lat_fops);
1697 }
1698
1699 __init static int latency_fsnotify_init(void)
1700 {
1701         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1702                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1703         if (!fsnotify_wq) {
1704                 pr_err("Unable to allocate tr_max_lat_wq\n");
1705                 return -ENOMEM;
1706         }
1707         return 0;
1708 }
1709
1710 late_initcall_sync(latency_fsnotify_init);
1711
1712 void latency_fsnotify(struct trace_array *tr)
1713 {
1714         if (!fsnotify_wq)
1715                 return;
1716         /*
1717          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1718          * possible that we are called from __schedule() or do_idle(), which
1719          * could cause a deadlock.
1720          */
1721         irq_work_queue(&tr->fsnotify_irqwork);
1722 }
1723
1724 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1725         || defined(CONFIG_OSNOISE_TRACER)
1726
1727 #define trace_create_maxlat_file(tr, d_tracer)                          \
1728         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1729                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1730
1731 #else
1732 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1733 #endif
1734
1735 #ifdef CONFIG_TRACER_MAX_TRACE
1736 /*
1737  * Copy the new maximum trace into the separate maximum-trace
1738  * structure. (this way the maximum trace is permanently saved,
1739  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1740  */
1741 static void
1742 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1743 {
1744         struct array_buffer *trace_buf = &tr->array_buffer;
1745         struct array_buffer *max_buf = &tr->max_buffer;
1746         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1747         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1748
1749         max_buf->cpu = cpu;
1750         max_buf->time_start = data->preempt_timestamp;
1751
1752         max_data->saved_latency = tr->max_latency;
1753         max_data->critical_start = data->critical_start;
1754         max_data->critical_end = data->critical_end;
1755
1756         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1757         max_data->pid = tsk->pid;
1758         /*
1759          * If tsk == current, then use current_uid(), as that does not use
1760          * RCU. The irq tracer can be called out of RCU scope.
1761          */
1762         if (tsk == current)
1763                 max_data->uid = current_uid();
1764         else
1765                 max_data->uid = task_uid(tsk);
1766
1767         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1768         max_data->policy = tsk->policy;
1769         max_data->rt_priority = tsk->rt_priority;
1770
1771         /* record this tasks comm */
1772         tracing_record_cmdline(tsk);
1773         latency_fsnotify(tr);
1774 }
1775
1776 /**
1777  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1778  * @tr: tracer
1779  * @tsk: the task with the latency
1780  * @cpu: The cpu that initiated the trace.
1781  * @cond_data: User data associated with a conditional snapshot
1782  *
1783  * Flip the buffers between the @tr and the max_tr and record information
1784  * about which task was the cause of this latency.
1785  */
1786 void
1787 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1788               void *cond_data)
1789 {
1790         if (tr->stop_count)
1791                 return;
1792
1793         WARN_ON_ONCE(!irqs_disabled());
1794
1795         if (!tr->allocated_snapshot) {
1796                 /* Only the nop tracer should hit this when disabling */
1797                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1798                 return;
1799         }
1800
1801         arch_spin_lock(&tr->max_lock);
1802
1803         /* Inherit the recordable setting from array_buffer */
1804         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1805                 ring_buffer_record_on(tr->max_buffer.buffer);
1806         else
1807                 ring_buffer_record_off(tr->max_buffer.buffer);
1808
1809 #ifdef CONFIG_TRACER_SNAPSHOT
1810         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1811                 goto out_unlock;
1812 #endif
1813         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1814
1815         __update_max_tr(tr, tsk, cpu);
1816
1817  out_unlock:
1818         arch_spin_unlock(&tr->max_lock);
1819 }
1820
1821 /**
1822  * update_max_tr_single - only copy one trace over, and reset the rest
1823  * @tr: tracer
1824  * @tsk: task with the latency
1825  * @cpu: the cpu of the buffer to copy.
1826  *
1827  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1828  */
1829 void
1830 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1831 {
1832         int ret;
1833
1834         if (tr->stop_count)
1835                 return;
1836
1837         WARN_ON_ONCE(!irqs_disabled());
1838         if (!tr->allocated_snapshot) {
1839                 /* Only the nop tracer should hit this when disabling */
1840                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1841                 return;
1842         }
1843
1844         arch_spin_lock(&tr->max_lock);
1845
1846         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1847
1848         if (ret == -EBUSY) {
1849                 /*
1850                  * We failed to swap the buffer due to a commit taking
1851                  * place on this CPU. We fail to record, but we reset
1852                  * the max trace buffer (no one writes directly to it)
1853                  * and flag that it failed.
1854                  */
1855                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1856                         "Failed to swap buffers due to commit in progress\n");
1857         }
1858
1859         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1860
1861         __update_max_tr(tr, tsk, cpu);
1862         arch_spin_unlock(&tr->max_lock);
1863 }
1864 #endif /* CONFIG_TRACER_MAX_TRACE */
1865
1866 static int wait_on_pipe(struct trace_iterator *iter, int full)
1867 {
1868         /* Iterators are static, they should be filled or empty */
1869         if (trace_buffer_iter(iter, iter->cpu_file))
1870                 return 0;
1871
1872         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1873                                 full);
1874 }
1875
1876 #ifdef CONFIG_FTRACE_STARTUP_TEST
1877 static bool selftests_can_run;
1878
1879 struct trace_selftests {
1880         struct list_head                list;
1881         struct tracer                   *type;
1882 };
1883
1884 static LIST_HEAD(postponed_selftests);
1885
1886 static int save_selftest(struct tracer *type)
1887 {
1888         struct trace_selftests *selftest;
1889
1890         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1891         if (!selftest)
1892                 return -ENOMEM;
1893
1894         selftest->type = type;
1895         list_add(&selftest->list, &postponed_selftests);
1896         return 0;
1897 }
1898
1899 static int run_tracer_selftest(struct tracer *type)
1900 {
1901         struct trace_array *tr = &global_trace;
1902         struct tracer *saved_tracer = tr->current_trace;
1903         int ret;
1904
1905         if (!type->selftest || tracing_selftest_disabled)
1906                 return 0;
1907
1908         /*
1909          * If a tracer registers early in boot up (before scheduling is
1910          * initialized and such), then do not run its selftests yet.
1911          * Instead, run it a little later in the boot process.
1912          */
1913         if (!selftests_can_run)
1914                 return save_selftest(type);
1915
1916         if (!tracing_is_on()) {
1917                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1918                         type->name);
1919                 return 0;
1920         }
1921
1922         /*
1923          * Run a selftest on this tracer.
1924          * Here we reset the trace buffer, and set the current
1925          * tracer to be this tracer. The tracer can then run some
1926          * internal tracing to verify that everything is in order.
1927          * If we fail, we do not register this tracer.
1928          */
1929         tracing_reset_online_cpus(&tr->array_buffer);
1930
1931         tr->current_trace = type;
1932
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934         if (type->use_max_tr) {
1935                 /* If we expanded the buffers, make sure the max is expanded too */
1936                 if (ring_buffer_expanded)
1937                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1938                                            RING_BUFFER_ALL_CPUS);
1939                 tr->allocated_snapshot = true;
1940         }
1941 #endif
1942
1943         /* the test is responsible for initializing and enabling */
1944         pr_info("Testing tracer %s: ", type->name);
1945         ret = type->selftest(type, tr);
1946         /* the test is responsible for resetting too */
1947         tr->current_trace = saved_tracer;
1948         if (ret) {
1949                 printk(KERN_CONT "FAILED!\n");
1950                 /* Add the warning after printing 'FAILED' */
1951                 WARN_ON(1);
1952                 return -1;
1953         }
1954         /* Only reset on passing, to avoid touching corrupted buffers */
1955         tracing_reset_online_cpus(&tr->array_buffer);
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 tr->allocated_snapshot = false;
1960
1961                 /* Shrink the max buffer again */
1962                 if (ring_buffer_expanded)
1963                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1964                                            RING_BUFFER_ALL_CPUS);
1965         }
1966 #endif
1967
1968         printk(KERN_CONT "PASSED\n");
1969         return 0;
1970 }
1971
1972 static __init int init_trace_selftests(void)
1973 {
1974         struct trace_selftests *p, *n;
1975         struct tracer *t, **last;
1976         int ret;
1977
1978         selftests_can_run = true;
1979
1980         mutex_lock(&trace_types_lock);
1981
1982         if (list_empty(&postponed_selftests))
1983                 goto out;
1984
1985         pr_info("Running postponed tracer tests:\n");
1986
1987         tracing_selftest_running = true;
1988         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1989                 /* This loop can take minutes when sanitizers are enabled, so
1990                  * lets make sure we allow RCU processing.
1991                  */
1992                 cond_resched();
1993                 ret = run_tracer_selftest(p->type);
1994                 /* If the test fails, then warn and remove from available_tracers */
1995                 if (ret < 0) {
1996                         WARN(1, "tracer: %s failed selftest, disabling\n",
1997                              p->type->name);
1998                         last = &trace_types;
1999                         for (t = trace_types; t; t = t->next) {
2000                                 if (t == p->type) {
2001                                         *last = t->next;
2002                                         break;
2003                                 }
2004                                 last = &t->next;
2005                         }
2006                 }
2007                 list_del(&p->list);
2008                 kfree(p);
2009         }
2010         tracing_selftest_running = false;
2011
2012  out:
2013         mutex_unlock(&trace_types_lock);
2014
2015         return 0;
2016 }
2017 core_initcall(init_trace_selftests);
2018 #else
2019 static inline int run_tracer_selftest(struct tracer *type)
2020 {
2021         return 0;
2022 }
2023 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2024
2025 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2026
2027 static void __init apply_trace_boot_options(void);
2028
2029 /**
2030  * register_tracer - register a tracer with the ftrace system.
2031  * @type: the plugin for the tracer
2032  *
2033  * Register a new plugin tracer.
2034  */
2035 int __init register_tracer(struct tracer *type)
2036 {
2037         struct tracer *t;
2038         int ret = 0;
2039
2040         if (!type->name) {
2041                 pr_info("Tracer must have a name\n");
2042                 return -1;
2043         }
2044
2045         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2046                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2047                 return -1;
2048         }
2049
2050         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2051                 pr_warn("Can not register tracer %s due to lockdown\n",
2052                            type->name);
2053                 return -EPERM;
2054         }
2055
2056         mutex_lock(&trace_types_lock);
2057
2058         tracing_selftest_running = true;
2059
2060         for (t = trace_types; t; t = t->next) {
2061                 if (strcmp(type->name, t->name) == 0) {
2062                         /* already found */
2063                         pr_info("Tracer %s already registered\n",
2064                                 type->name);
2065                         ret = -1;
2066                         goto out;
2067                 }
2068         }
2069
2070         if (!type->set_flag)
2071                 type->set_flag = &dummy_set_flag;
2072         if (!type->flags) {
2073                 /*allocate a dummy tracer_flags*/
2074                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2075                 if (!type->flags) {
2076                         ret = -ENOMEM;
2077                         goto out;
2078                 }
2079                 type->flags->val = 0;
2080                 type->flags->opts = dummy_tracer_opt;
2081         } else
2082                 if (!type->flags->opts)
2083                         type->flags->opts = dummy_tracer_opt;
2084
2085         /* store the tracer for __set_tracer_option */
2086         type->flags->trace = type;
2087
2088         ret = run_tracer_selftest(type);
2089         if (ret < 0)
2090                 goto out;
2091
2092         type->next = trace_types;
2093         trace_types = type;
2094         add_tracer_options(&global_trace, type);
2095
2096  out:
2097         tracing_selftest_running = false;
2098         mutex_unlock(&trace_types_lock);
2099
2100         if (ret || !default_bootup_tracer)
2101                 goto out_unlock;
2102
2103         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2104                 goto out_unlock;
2105
2106         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2107         /* Do we want this tracer to start on bootup? */
2108         tracing_set_tracer(&global_trace, type->name);
2109         default_bootup_tracer = NULL;
2110
2111         apply_trace_boot_options();
2112
2113         /* disable other selftests, since this will break it. */
2114         disable_tracing_selftest("running a tracer");
2115
2116  out_unlock:
2117         return ret;
2118 }
2119
2120 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2121 {
2122         struct trace_buffer *buffer = buf->buffer;
2123
2124         if (!buffer)
2125                 return;
2126
2127         ring_buffer_record_disable(buffer);
2128
2129         /* Make sure all commits have finished */
2130         synchronize_rcu();
2131         ring_buffer_reset_cpu(buffer, cpu);
2132
2133         ring_buffer_record_enable(buffer);
2134 }
2135
2136 void tracing_reset_online_cpus(struct array_buffer *buf)
2137 {
2138         struct trace_buffer *buffer = buf->buffer;
2139
2140         if (!buffer)
2141                 return;
2142
2143         ring_buffer_record_disable(buffer);
2144
2145         /* Make sure all commits have finished */
2146         synchronize_rcu();
2147
2148         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2149
2150         ring_buffer_reset_online_cpus(buffer);
2151
2152         ring_buffer_record_enable(buffer);
2153 }
2154
2155 /* Must have trace_types_lock held */
2156 void tracing_reset_all_online_cpus(void)
2157 {
2158         struct trace_array *tr;
2159
2160         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2161                 if (!tr->clear_trace)
2162                         continue;
2163                 tr->clear_trace = false;
2164                 tracing_reset_online_cpus(&tr->array_buffer);
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166                 tracing_reset_online_cpus(&tr->max_buffer);
2167 #endif
2168         }
2169 }
2170
2171 /*
2172  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2173  * is the tgid last observed corresponding to pid=i.
2174  */
2175 static int *tgid_map;
2176
2177 /* The maximum valid index into tgid_map. */
2178 static size_t tgid_map_max;
2179
2180 #define SAVED_CMDLINES_DEFAULT 128
2181 #define NO_CMDLINE_MAP UINT_MAX
2182 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2183 struct saved_cmdlines_buffer {
2184         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2185         unsigned *map_cmdline_to_pid;
2186         unsigned cmdline_num;
2187         int cmdline_idx;
2188         char *saved_cmdlines;
2189 };
2190 static struct saved_cmdlines_buffer *savedcmd;
2191
2192 static inline char *get_saved_cmdlines(int idx)
2193 {
2194         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2195 }
2196
2197 static inline void set_cmdline(int idx, const char *cmdline)
2198 {
2199         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2200 }
2201
2202 static int allocate_cmdlines_buffer(unsigned int val,
2203                                     struct saved_cmdlines_buffer *s)
2204 {
2205         s->map_cmdline_to_pid = kmalloc_array(val,
2206                                               sizeof(*s->map_cmdline_to_pid),
2207                                               GFP_KERNEL);
2208         if (!s->map_cmdline_to_pid)
2209                 return -ENOMEM;
2210
2211         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2212         if (!s->saved_cmdlines) {
2213                 kfree(s->map_cmdline_to_pid);
2214                 return -ENOMEM;
2215         }
2216
2217         s->cmdline_idx = 0;
2218         s->cmdline_num = val;
2219         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2220                sizeof(s->map_pid_to_cmdline));
2221         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2222                val * sizeof(*s->map_cmdline_to_pid));
2223
2224         return 0;
2225 }
2226
2227 static int trace_create_savedcmd(void)
2228 {
2229         int ret;
2230
2231         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2232         if (!savedcmd)
2233                 return -ENOMEM;
2234
2235         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2236         if (ret < 0) {
2237                 kfree(savedcmd);
2238                 savedcmd = NULL;
2239                 return -ENOMEM;
2240         }
2241
2242         return 0;
2243 }
2244
2245 int is_tracing_stopped(void)
2246 {
2247         return global_trace.stop_count;
2248 }
2249
2250 /**
2251  * tracing_start - quick start of the tracer
2252  *
2253  * If tracing is enabled but was stopped by tracing_stop,
2254  * this will start the tracer back up.
2255  */
2256 void tracing_start(void)
2257 {
2258         struct trace_buffer *buffer;
2259         unsigned long flags;
2260
2261         if (tracing_disabled)
2262                 return;
2263
2264         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2265         if (--global_trace.stop_count) {
2266                 if (global_trace.stop_count < 0) {
2267                         /* Someone screwed up their debugging */
2268                         WARN_ON_ONCE(1);
2269                         global_trace.stop_count = 0;
2270                 }
2271                 goto out;
2272         }
2273
2274         /* Prevent the buffers from switching */
2275         arch_spin_lock(&global_trace.max_lock);
2276
2277         buffer = global_trace.array_buffer.buffer;
2278         if (buffer)
2279                 ring_buffer_record_enable(buffer);
2280
2281 #ifdef CONFIG_TRACER_MAX_TRACE
2282         buffer = global_trace.max_buffer.buffer;
2283         if (buffer)
2284                 ring_buffer_record_enable(buffer);
2285 #endif
2286
2287         arch_spin_unlock(&global_trace.max_lock);
2288
2289  out:
2290         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2291 }
2292
2293 static void tracing_start_tr(struct trace_array *tr)
2294 {
2295         struct trace_buffer *buffer;
2296         unsigned long flags;
2297
2298         if (tracing_disabled)
2299                 return;
2300
2301         /* If global, we need to also start the max tracer */
2302         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2303                 return tracing_start();
2304
2305         raw_spin_lock_irqsave(&tr->start_lock, flags);
2306
2307         if (--tr->stop_count) {
2308                 if (tr->stop_count < 0) {
2309                         /* Someone screwed up their debugging */
2310                         WARN_ON_ONCE(1);
2311                         tr->stop_count = 0;
2312                 }
2313                 goto out;
2314         }
2315
2316         buffer = tr->array_buffer.buffer;
2317         if (buffer)
2318                 ring_buffer_record_enable(buffer);
2319
2320  out:
2321         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2322 }
2323
2324 /**
2325  * tracing_stop - quick stop of the tracer
2326  *
2327  * Light weight way to stop tracing. Use in conjunction with
2328  * tracing_start.
2329  */
2330 void tracing_stop(void)
2331 {
2332         struct trace_buffer *buffer;
2333         unsigned long flags;
2334
2335         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2336         if (global_trace.stop_count++)
2337                 goto out;
2338
2339         /* Prevent the buffers from switching */
2340         arch_spin_lock(&global_trace.max_lock);
2341
2342         buffer = global_trace.array_buffer.buffer;
2343         if (buffer)
2344                 ring_buffer_record_disable(buffer);
2345
2346 #ifdef CONFIG_TRACER_MAX_TRACE
2347         buffer = global_trace.max_buffer.buffer;
2348         if (buffer)
2349                 ring_buffer_record_disable(buffer);
2350 #endif
2351
2352         arch_spin_unlock(&global_trace.max_lock);
2353
2354  out:
2355         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2356 }
2357
2358 static void tracing_stop_tr(struct trace_array *tr)
2359 {
2360         struct trace_buffer *buffer;
2361         unsigned long flags;
2362
2363         /* If global, we need to also stop the max tracer */
2364         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2365                 return tracing_stop();
2366
2367         raw_spin_lock_irqsave(&tr->start_lock, flags);
2368         if (tr->stop_count++)
2369                 goto out;
2370
2371         buffer = tr->array_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374
2375  out:
2376         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2377 }
2378
2379 static int trace_save_cmdline(struct task_struct *tsk)
2380 {
2381         unsigned tpid, idx;
2382
2383         /* treat recording of idle task as a success */
2384         if (!tsk->pid)
2385                 return 1;
2386
2387         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2388
2389         /*
2390          * It's not the end of the world if we don't get
2391          * the lock, but we also don't want to spin
2392          * nor do we want to disable interrupts,
2393          * so if we miss here, then better luck next time.
2394          */
2395         if (!arch_spin_trylock(&trace_cmdline_lock))
2396                 return 0;
2397
2398         idx = savedcmd->map_pid_to_cmdline[tpid];
2399         if (idx == NO_CMDLINE_MAP) {
2400                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2401
2402                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2403                 savedcmd->cmdline_idx = idx;
2404         }
2405
2406         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2407         set_cmdline(idx, tsk->comm);
2408
2409         arch_spin_unlock(&trace_cmdline_lock);
2410
2411         return 1;
2412 }
2413
2414 static void __trace_find_cmdline(int pid, char comm[])
2415 {
2416         unsigned map;
2417         int tpid;
2418
2419         if (!pid) {
2420                 strcpy(comm, "<idle>");
2421                 return;
2422         }
2423
2424         if (WARN_ON_ONCE(pid < 0)) {
2425                 strcpy(comm, "<XXX>");
2426                 return;
2427         }
2428
2429         tpid = pid & (PID_MAX_DEFAULT - 1);
2430         map = savedcmd->map_pid_to_cmdline[tpid];
2431         if (map != NO_CMDLINE_MAP) {
2432                 tpid = savedcmd->map_cmdline_to_pid[map];
2433                 if (tpid == pid) {
2434                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2435                         return;
2436                 }
2437         }
2438         strcpy(comm, "<...>");
2439 }
2440
2441 void trace_find_cmdline(int pid, char comm[])
2442 {
2443         preempt_disable();
2444         arch_spin_lock(&trace_cmdline_lock);
2445
2446         __trace_find_cmdline(pid, comm);
2447
2448         arch_spin_unlock(&trace_cmdline_lock);
2449         preempt_enable();
2450 }
2451
2452 static int *trace_find_tgid_ptr(int pid)
2453 {
2454         /*
2455          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2456          * if we observe a non-NULL tgid_map then we also observe the correct
2457          * tgid_map_max.
2458          */
2459         int *map = smp_load_acquire(&tgid_map);
2460
2461         if (unlikely(!map || pid > tgid_map_max))
2462                 return NULL;
2463
2464         return &map[pid];
2465 }
2466
2467 int trace_find_tgid(int pid)
2468 {
2469         int *ptr = trace_find_tgid_ptr(pid);
2470
2471         return ptr ? *ptr : 0;
2472 }
2473
2474 static int trace_save_tgid(struct task_struct *tsk)
2475 {
2476         int *ptr;
2477
2478         /* treat recording of idle task as a success */
2479         if (!tsk->pid)
2480                 return 1;
2481
2482         ptr = trace_find_tgid_ptr(tsk->pid);
2483         if (!ptr)
2484                 return 0;
2485
2486         *ptr = tsk->tgid;
2487         return 1;
2488 }
2489
2490 static bool tracing_record_taskinfo_skip(int flags)
2491 {
2492         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493                 return true;
2494         if (!__this_cpu_read(trace_taskinfo_save))
2495                 return true;
2496         return false;
2497 }
2498
2499 /**
2500  * tracing_record_taskinfo - record the task info of a task
2501  *
2502  * @task:  task to record
2503  * @flags: TRACE_RECORD_CMDLINE for recording comm
2504  *         TRACE_RECORD_TGID for recording tgid
2505  */
2506 void tracing_record_taskinfo(struct task_struct *task, int flags)
2507 {
2508         bool done;
2509
2510         if (tracing_record_taskinfo_skip(flags))
2511                 return;
2512
2513         /*
2514          * Record as much task information as possible. If some fail, continue
2515          * to try to record the others.
2516          */
2517         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2518         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2519
2520         /* If recording any information failed, retry again soon. */
2521         if (!done)
2522                 return;
2523
2524         __this_cpu_write(trace_taskinfo_save, false);
2525 }
2526
2527 /**
2528  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2529  *
2530  * @prev: previous task during sched_switch
2531  * @next: next task during sched_switch
2532  * @flags: TRACE_RECORD_CMDLINE for recording comm
2533  *         TRACE_RECORD_TGID for recording tgid
2534  */
2535 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2536                                           struct task_struct *next, int flags)
2537 {
2538         bool done;
2539
2540         if (tracing_record_taskinfo_skip(flags))
2541                 return;
2542
2543         /*
2544          * Record as much task information as possible. If some fail, continue
2545          * to try to record the others.
2546          */
2547         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2548         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2549         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2550         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2551
2552         /* If recording any information failed, retry again soon. */
2553         if (!done)
2554                 return;
2555
2556         __this_cpu_write(trace_taskinfo_save, false);
2557 }
2558
2559 /* Helpers to record a specific task information */
2560 void tracing_record_cmdline(struct task_struct *task)
2561 {
2562         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2563 }
2564
2565 void tracing_record_tgid(struct task_struct *task)
2566 {
2567         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2568 }
2569
2570 /*
2571  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2572  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2573  * simplifies those functions and keeps them in sync.
2574  */
2575 enum print_line_t trace_handle_return(struct trace_seq *s)
2576 {
2577         return trace_seq_has_overflowed(s) ?
2578                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2579 }
2580 EXPORT_SYMBOL_GPL(trace_handle_return);
2581
2582 static unsigned short migration_disable_value(void)
2583 {
2584 #if defined(CONFIG_SMP)
2585         return current->migration_disabled;
2586 #else
2587         return 0;
2588 #endif
2589 }
2590
2591 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2592 {
2593         unsigned int trace_flags = irqs_status;
2594         unsigned int pc;
2595
2596         pc = preempt_count();
2597
2598         if (pc & NMI_MASK)
2599                 trace_flags |= TRACE_FLAG_NMI;
2600         if (pc & HARDIRQ_MASK)
2601                 trace_flags |= TRACE_FLAG_HARDIRQ;
2602         if (in_serving_softirq())
2603                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2604
2605         if (tif_need_resched())
2606                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2607         if (test_preempt_need_resched())
2608                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2609         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2610                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2611 }
2612
2613 struct ring_buffer_event *
2614 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2615                           int type,
2616                           unsigned long len,
2617                           unsigned int trace_ctx)
2618 {
2619         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2620 }
2621
2622 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2623 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2624 static int trace_buffered_event_ref;
2625
2626 /**
2627  * trace_buffered_event_enable - enable buffering events
2628  *
2629  * When events are being filtered, it is quicker to use a temporary
2630  * buffer to write the event data into if there's a likely chance
2631  * that it will not be committed. The discard of the ring buffer
2632  * is not as fast as committing, and is much slower than copying
2633  * a commit.
2634  *
2635  * When an event is to be filtered, allocate per cpu buffers to
2636  * write the event data into, and if the event is filtered and discarded
2637  * it is simply dropped, otherwise, the entire data is to be committed
2638  * in one shot.
2639  */
2640 void trace_buffered_event_enable(void)
2641 {
2642         struct ring_buffer_event *event;
2643         struct page *page;
2644         int cpu;
2645
2646         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647
2648         if (trace_buffered_event_ref++)
2649                 return;
2650
2651         for_each_tracing_cpu(cpu) {
2652                 page = alloc_pages_node(cpu_to_node(cpu),
2653                                         GFP_KERNEL | __GFP_NORETRY, 0);
2654                 if (!page)
2655                         goto failed;
2656
2657                 event = page_address(page);
2658                 memset(event, 0, sizeof(*event));
2659
2660                 per_cpu(trace_buffered_event, cpu) = event;
2661
2662                 preempt_disable();
2663                 if (cpu == smp_processor_id() &&
2664                     __this_cpu_read(trace_buffered_event) !=
2665                     per_cpu(trace_buffered_event, cpu))
2666                         WARN_ON_ONCE(1);
2667                 preempt_enable();
2668         }
2669
2670         return;
2671  failed:
2672         trace_buffered_event_disable();
2673 }
2674
2675 static void enable_trace_buffered_event(void *data)
2676 {
2677         /* Probably not needed, but do it anyway */
2678         smp_rmb();
2679         this_cpu_dec(trace_buffered_event_cnt);
2680 }
2681
2682 static void disable_trace_buffered_event(void *data)
2683 {
2684         this_cpu_inc(trace_buffered_event_cnt);
2685 }
2686
2687 /**
2688  * trace_buffered_event_disable - disable buffering events
2689  *
2690  * When a filter is removed, it is faster to not use the buffered
2691  * events, and to commit directly into the ring buffer. Free up
2692  * the temp buffers when there are no more users. This requires
2693  * special synchronization with current events.
2694  */
2695 void trace_buffered_event_disable(void)
2696 {
2697         int cpu;
2698
2699         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2700
2701         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2702                 return;
2703
2704         if (--trace_buffered_event_ref)
2705                 return;
2706
2707         preempt_disable();
2708         /* For each CPU, set the buffer as used. */
2709         smp_call_function_many(tracing_buffer_mask,
2710                                disable_trace_buffered_event, NULL, 1);
2711         preempt_enable();
2712
2713         /* Wait for all current users to finish */
2714         synchronize_rcu();
2715
2716         for_each_tracing_cpu(cpu) {
2717                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2718                 per_cpu(trace_buffered_event, cpu) = NULL;
2719         }
2720         /*
2721          * Make sure trace_buffered_event is NULL before clearing
2722          * trace_buffered_event_cnt.
2723          */
2724         smp_wmb();
2725
2726         preempt_disable();
2727         /* Do the work on each cpu */
2728         smp_call_function_many(tracing_buffer_mask,
2729                                enable_trace_buffered_event, NULL, 1);
2730         preempt_enable();
2731 }
2732
2733 static struct trace_buffer *temp_buffer;
2734
2735 struct ring_buffer_event *
2736 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2737                           struct trace_event_file *trace_file,
2738                           int type, unsigned long len,
2739                           unsigned int trace_ctx)
2740 {
2741         struct ring_buffer_event *entry;
2742         struct trace_array *tr = trace_file->tr;
2743         int val;
2744
2745         *current_rb = tr->array_buffer.buffer;
2746
2747         if (!tr->no_filter_buffering_ref &&
2748             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2749             (entry = this_cpu_read(trace_buffered_event))) {
2750                 /*
2751                  * Filtering is on, so try to use the per cpu buffer first.
2752                  * This buffer will simulate a ring_buffer_event,
2753                  * where the type_len is zero and the array[0] will
2754                  * hold the full length.
2755                  * (see include/linux/ring-buffer.h for details on
2756                  *  how the ring_buffer_event is structured).
2757                  *
2758                  * Using a temp buffer during filtering and copying it
2759                  * on a matched filter is quicker than writing directly
2760                  * into the ring buffer and then discarding it when
2761                  * it doesn't match. That is because the discard
2762                  * requires several atomic operations to get right.
2763                  * Copying on match and doing nothing on a failed match
2764                  * is still quicker than no copy on match, but having
2765                  * to discard out of the ring buffer on a failed match.
2766                  */
2767                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2768
2769                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2770
2771                 /*
2772                  * Preemption is disabled, but interrupts and NMIs
2773                  * can still come in now. If that happens after
2774                  * the above increment, then it will have to go
2775                  * back to the old method of allocating the event
2776                  * on the ring buffer, and if the filter fails, it
2777                  * will have to call ring_buffer_discard_commit()
2778                  * to remove it.
2779                  *
2780                  * Need to also check the unlikely case that the
2781                  * length is bigger than the temp buffer size.
2782                  * If that happens, then the reserve is pretty much
2783                  * guaranteed to fail, as the ring buffer currently
2784                  * only allows events less than a page. But that may
2785                  * change in the future, so let the ring buffer reserve
2786                  * handle the failure in that case.
2787                  */
2788                 if (val == 1 && likely(len <= max_len)) {
2789                         trace_event_setup(entry, type, trace_ctx);
2790                         entry->array[0] = len;
2791                         return entry;
2792                 }
2793                 this_cpu_dec(trace_buffered_event_cnt);
2794         }
2795
2796         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2797                                             trace_ctx);
2798         /*
2799          * If tracing is off, but we have triggers enabled
2800          * we still need to look at the event data. Use the temp_buffer
2801          * to store the trace event for the trigger to use. It's recursive
2802          * safe and will not be recorded anywhere.
2803          */
2804         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2805                 *current_rb = temp_buffer;
2806                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2807                                                     trace_ctx);
2808         }
2809         return entry;
2810 }
2811 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2812
2813 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2814 static DEFINE_MUTEX(tracepoint_printk_mutex);
2815
2816 static void output_printk(struct trace_event_buffer *fbuffer)
2817 {
2818         struct trace_event_call *event_call;
2819         struct trace_event_file *file;
2820         struct trace_event *event;
2821         unsigned long flags;
2822         struct trace_iterator *iter = tracepoint_print_iter;
2823
2824         /* We should never get here if iter is NULL */
2825         if (WARN_ON_ONCE(!iter))
2826                 return;
2827
2828         event_call = fbuffer->trace_file->event_call;
2829         if (!event_call || !event_call->event.funcs ||
2830             !event_call->event.funcs->trace)
2831                 return;
2832
2833         file = fbuffer->trace_file;
2834         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2835             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2836              !filter_match_preds(file->filter, fbuffer->entry)))
2837                 return;
2838
2839         event = &fbuffer->trace_file->event_call->event;
2840
2841         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2842         trace_seq_init(&iter->seq);
2843         iter->ent = fbuffer->entry;
2844         event_call->event.funcs->trace(iter, 0, event);
2845         trace_seq_putc(&iter->seq, 0);
2846         printk("%s", iter->seq.buffer);
2847
2848         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2849 }
2850
2851 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2852                              void *buffer, size_t *lenp,
2853                              loff_t *ppos)
2854 {
2855         int save_tracepoint_printk;
2856         int ret;
2857
2858         mutex_lock(&tracepoint_printk_mutex);
2859         save_tracepoint_printk = tracepoint_printk;
2860
2861         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2862
2863         /*
2864          * This will force exiting early, as tracepoint_printk
2865          * is always zero when tracepoint_printk_iter is not allocated
2866          */
2867         if (!tracepoint_print_iter)
2868                 tracepoint_printk = 0;
2869
2870         if (save_tracepoint_printk == tracepoint_printk)
2871                 goto out;
2872
2873         if (tracepoint_printk)
2874                 static_key_enable(&tracepoint_printk_key.key);
2875         else
2876                 static_key_disable(&tracepoint_printk_key.key);
2877
2878  out:
2879         mutex_unlock(&tracepoint_printk_mutex);
2880
2881         return ret;
2882 }
2883
2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886         enum event_trigger_type tt = ETT_NONE;
2887         struct trace_event_file *file = fbuffer->trace_file;
2888
2889         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890                         fbuffer->entry, &tt))
2891                 goto discard;
2892
2893         if (static_key_false(&tracepoint_printk_key.key))
2894                 output_printk(fbuffer);
2895
2896         if (static_branch_unlikely(&trace_event_exports_enabled))
2897                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898
2899         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901
2902 discard:
2903         if (tt)
2904                 event_triggers_post_call(file, tt);
2905
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917
2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919                                      struct trace_buffer *buffer,
2920                                      struct ring_buffer_event *event,
2921                                      unsigned int trace_ctx,
2922                                      struct pt_regs *regs)
2923 {
2924         __buffer_unlock_commit(buffer, event);
2925
2926         /*
2927          * If regs is not set, then skip the necessary functions.
2928          * Note, we can still get here via blktrace, wakeup tracer
2929          * and mmiotrace, but that's ok if they lose a function or
2930          * two. They are not that meaningful.
2931          */
2932         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933         ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941                                    struct ring_buffer_event *event)
2942 {
2943         __buffer_unlock_commit(buffer, event);
2944 }
2945
2946 void
2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948                parent_ip, unsigned int trace_ctx)
2949 {
2950         struct trace_event_call *call = &event_function;
2951         struct trace_buffer *buffer = tr->array_buffer.buffer;
2952         struct ring_buffer_event *event;
2953         struct ftrace_entry *entry;
2954
2955         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2956                                             trace_ctx);
2957         if (!event)
2958                 return;
2959         entry   = ring_buffer_event_data(event);
2960         entry->ip                       = ip;
2961         entry->parent_ip                = parent_ip;
2962
2963         if (!call_filter_check_discard(call, entry, buffer, event)) {
2964                 if (static_branch_unlikely(&trace_function_exports_enabled))
2965                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2966                 __buffer_unlock_commit(buffer, event);
2967         }
2968 }
2969
2970 #ifdef CONFIG_STACKTRACE
2971
2972 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2973 #define FTRACE_KSTACK_NESTING   4
2974
2975 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2976
2977 struct ftrace_stack {
2978         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2979 };
2980
2981
2982 struct ftrace_stacks {
2983         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2984 };
2985
2986 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2987 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2988
2989 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2990                                  unsigned int trace_ctx,
2991                                  int skip, struct pt_regs *regs)
2992 {
2993         struct trace_event_call *call = &event_kernel_stack;
2994         struct ring_buffer_event *event;
2995         unsigned int size, nr_entries;
2996         struct ftrace_stack *fstack;
2997         struct stack_entry *entry;
2998         int stackidx;
2999
3000         /*
3001          * Add one, for this function and the call to save_stack_trace()
3002          * If regs is set, then these functions will not be in the way.
3003          */
3004 #ifndef CONFIG_UNWINDER_ORC
3005         if (!regs)
3006                 skip++;
3007 #endif
3008
3009         preempt_disable_notrace();
3010
3011         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3012
3013         /* This should never happen. If it does, yell once and skip */
3014         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3015                 goto out;
3016
3017         /*
3018          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3019          * interrupt will either see the value pre increment or post
3020          * increment. If the interrupt happens pre increment it will have
3021          * restored the counter when it returns.  We just need a barrier to
3022          * keep gcc from moving things around.
3023          */
3024         barrier();
3025
3026         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3027         size = ARRAY_SIZE(fstack->calls);
3028
3029         if (regs) {
3030                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3031                                                    size, skip);
3032         } else {
3033                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3034         }
3035
3036         size = nr_entries * sizeof(unsigned long);
3037         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3038                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3039                                     trace_ctx);
3040         if (!event)
3041                 goto out;
3042         entry = ring_buffer_event_data(event);
3043
3044         memcpy(&entry->caller, fstack->calls, size);
3045         entry->size = nr_entries;
3046
3047         if (!call_filter_check_discard(call, entry, buffer, event))
3048                 __buffer_unlock_commit(buffer, event);
3049
3050  out:
3051         /* Again, don't let gcc optimize things here */
3052         barrier();
3053         __this_cpu_dec(ftrace_stack_reserve);
3054         preempt_enable_notrace();
3055
3056 }
3057
3058 static inline void ftrace_trace_stack(struct trace_array *tr,
3059                                       struct trace_buffer *buffer,
3060                                       unsigned int trace_ctx,
3061                                       int skip, struct pt_regs *regs)
3062 {
3063         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3064                 return;
3065
3066         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3067 }
3068
3069 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3070                    int skip)
3071 {
3072         struct trace_buffer *buffer = tr->array_buffer.buffer;
3073
3074         if (rcu_is_watching()) {
3075                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3076                 return;
3077         }
3078
3079         /*
3080          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3081          * but if the above rcu_is_watching() failed, then the NMI
3082          * triggered someplace critical, and rcu_irq_enter() should
3083          * not be called from NMI.
3084          */
3085         if (unlikely(in_nmi()))
3086                 return;
3087
3088         rcu_irq_enter_irqson();
3089         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090         rcu_irq_exit_irqson();
3091 }
3092
3093 /**
3094  * trace_dump_stack - record a stack back trace in the trace buffer
3095  * @skip: Number of functions to skip (helper handlers)
3096  */
3097 void trace_dump_stack(int skip)
3098 {
3099         if (tracing_disabled || tracing_selftest_running)
3100                 return;
3101
3102 #ifndef CONFIG_UNWINDER_ORC
3103         /* Skip 1 to skip this function. */
3104         skip++;
3105 #endif
3106         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3107                              tracing_gen_ctx(), skip, NULL);
3108 }
3109 EXPORT_SYMBOL_GPL(trace_dump_stack);
3110
3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3112 static DEFINE_PER_CPU(int, user_stack_count);
3113
3114 static void
3115 ftrace_trace_userstack(struct trace_array *tr,
3116                        struct trace_buffer *buffer, unsigned int trace_ctx)
3117 {
3118         struct trace_event_call *call = &event_user_stack;
3119         struct ring_buffer_event *event;
3120         struct userstack_entry *entry;
3121
3122         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3123                 return;
3124
3125         /*
3126          * NMIs can not handle page faults, even with fix ups.
3127          * The save user stack can (and often does) fault.
3128          */
3129         if (unlikely(in_nmi()))
3130                 return;
3131
3132         /*
3133          * prevent recursion, since the user stack tracing may
3134          * trigger other kernel events.
3135          */
3136         preempt_disable();
3137         if (__this_cpu_read(user_stack_count))
3138                 goto out;
3139
3140         __this_cpu_inc(user_stack_count);
3141
3142         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3143                                             sizeof(*entry), trace_ctx);
3144         if (!event)
3145                 goto out_drop_count;
3146         entry   = ring_buffer_event_data(event);
3147
3148         entry->tgid             = current->tgid;
3149         memset(&entry->caller, 0, sizeof(entry->caller));
3150
3151         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3152         if (!call_filter_check_discard(call, entry, buffer, event))
3153                 __buffer_unlock_commit(buffer, event);
3154
3155  out_drop_count:
3156         __this_cpu_dec(user_stack_count);
3157  out:
3158         preempt_enable();
3159 }
3160 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3161 static void ftrace_trace_userstack(struct trace_array *tr,
3162                                    struct trace_buffer *buffer,
3163                                    unsigned int trace_ctx)
3164 {
3165 }
3166 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3167
3168 #endif /* CONFIG_STACKTRACE */
3169
3170 static inline void
3171 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3172                           unsigned long long delta)
3173 {
3174         entry->bottom_delta_ts = delta & U32_MAX;
3175         entry->top_delta_ts = (delta >> 32);
3176 }
3177
3178 void trace_last_func_repeats(struct trace_array *tr,
3179                              struct trace_func_repeats *last_info,
3180                              unsigned int trace_ctx)
3181 {
3182         struct trace_buffer *buffer = tr->array_buffer.buffer;
3183         struct func_repeats_entry *entry;
3184         struct ring_buffer_event *event;
3185         u64 delta;
3186
3187         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3188                                             sizeof(*entry), trace_ctx);
3189         if (!event)
3190                 return;
3191
3192         delta = ring_buffer_event_time_stamp(buffer, event) -
3193                 last_info->ts_last_call;
3194
3195         entry = ring_buffer_event_data(event);
3196         entry->ip = last_info->ip;
3197         entry->parent_ip = last_info->parent_ip;
3198         entry->count = last_info->count;
3199         func_repeats_set_delta_ts(entry, delta);
3200
3201         __buffer_unlock_commit(buffer, event);
3202 }
3203
3204 /* created for use with alloc_percpu */
3205 struct trace_buffer_struct {
3206         int nesting;
3207         char buffer[4][TRACE_BUF_SIZE];
3208 };
3209
3210 static struct trace_buffer_struct *trace_percpu_buffer;
3211
3212 /*
3213  * This allows for lockless recording.  If we're nested too deeply, then
3214  * this returns NULL.
3215  */
3216 static char *get_trace_buf(void)
3217 {
3218         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3219
3220         if (!buffer || buffer->nesting >= 4)
3221                 return NULL;
3222
3223         buffer->nesting++;
3224
3225         /* Interrupts must see nesting incremented before we use the buffer */
3226         barrier();
3227         return &buffer->buffer[buffer->nesting - 1][0];
3228 }
3229
3230 static void put_trace_buf(void)
3231 {
3232         /* Don't let the decrement of nesting leak before this */
3233         barrier();
3234         this_cpu_dec(trace_percpu_buffer->nesting);
3235 }
3236
3237 static int alloc_percpu_trace_buffer(void)
3238 {
3239         struct trace_buffer_struct *buffers;
3240
3241         if (trace_percpu_buffer)
3242                 return 0;
3243
3244         buffers = alloc_percpu(struct trace_buffer_struct);
3245         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3246                 return -ENOMEM;
3247
3248         trace_percpu_buffer = buffers;
3249         return 0;
3250 }
3251
3252 static int buffers_allocated;
3253
3254 void trace_printk_init_buffers(void)
3255 {
3256         if (buffers_allocated)
3257                 return;
3258
3259         if (alloc_percpu_trace_buffer())
3260                 return;
3261
3262         /* trace_printk() is for debug use only. Don't use it in production. */
3263
3264         pr_warn("\n");
3265         pr_warn("**********************************************************\n");
3266         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3267         pr_warn("**                                                      **\n");
3268         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3269         pr_warn("**                                                      **\n");
3270         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3271         pr_warn("** unsafe for production use.                           **\n");
3272         pr_warn("**                                                      **\n");
3273         pr_warn("** If you see this message and you are not debugging    **\n");
3274         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3275         pr_warn("**                                                      **\n");
3276         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3277         pr_warn("**********************************************************\n");
3278
3279         /* Expand the buffers to set size */
3280         tracing_update_buffers();
3281
3282         buffers_allocated = 1;
3283
3284         /*
3285          * trace_printk_init_buffers() can be called by modules.
3286          * If that happens, then we need to start cmdline recording
3287          * directly here. If the global_trace.buffer is already
3288          * allocated here, then this was called by module code.
3289          */
3290         if (global_trace.array_buffer.buffer)
3291                 tracing_start_cmdline_record();
3292 }
3293 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3294
3295 void trace_printk_start_comm(void)
3296 {
3297         /* Start tracing comms if trace printk is set */
3298         if (!buffers_allocated)
3299                 return;
3300         tracing_start_cmdline_record();
3301 }
3302
3303 static void trace_printk_start_stop_comm(int enabled)
3304 {
3305         if (!buffers_allocated)
3306                 return;
3307
3308         if (enabled)
3309                 tracing_start_cmdline_record();
3310         else
3311                 tracing_stop_cmdline_record();
3312 }
3313
3314 /**
3315  * trace_vbprintk - write binary msg to tracing buffer
3316  * @ip:    The address of the caller
3317  * @fmt:   The string format to write to the buffer
3318  * @args:  Arguments for @fmt
3319  */
3320 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3321 {
3322         struct trace_event_call *call = &event_bprint;
3323         struct ring_buffer_event *event;
3324         struct trace_buffer *buffer;
3325         struct trace_array *tr = &global_trace;
3326         struct bprint_entry *entry;
3327         unsigned int trace_ctx;
3328         char *tbuffer;
3329         int len = 0, size;
3330
3331         if (unlikely(tracing_selftest_running || tracing_disabled))
3332                 return 0;
3333
3334         /* Don't pollute graph traces with trace_vprintk internals */
3335         pause_graph_tracing();
3336
3337         trace_ctx = tracing_gen_ctx();
3338         preempt_disable_notrace();
3339
3340         tbuffer = get_trace_buf();
3341         if (!tbuffer) {
3342                 len = 0;
3343                 goto out_nobuffer;
3344         }
3345
3346         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3347
3348         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3349                 goto out_put;
3350
3351         size = sizeof(*entry) + sizeof(u32) * len;
3352         buffer = tr->array_buffer.buffer;
3353         ring_buffer_nest_start(buffer);
3354         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3355                                             trace_ctx);
3356         if (!event)
3357                 goto out;
3358         entry = ring_buffer_event_data(event);
3359         entry->ip                       = ip;
3360         entry->fmt                      = fmt;
3361
3362         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3363         if (!call_filter_check_discard(call, entry, buffer, event)) {
3364                 __buffer_unlock_commit(buffer, event);
3365                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3366         }
3367
3368 out:
3369         ring_buffer_nest_end(buffer);
3370 out_put:
3371         put_trace_buf();
3372
3373 out_nobuffer:
3374         preempt_enable_notrace();
3375         unpause_graph_tracing();
3376
3377         return len;
3378 }
3379 EXPORT_SYMBOL_GPL(trace_vbprintk);
3380
3381 __printf(3, 0)
3382 static int
3383 __trace_array_vprintk(struct trace_buffer *buffer,
3384                       unsigned long ip, const char *fmt, va_list args)
3385 {
3386         struct trace_event_call *call = &event_print;
3387         struct ring_buffer_event *event;
3388         int len = 0, size;
3389         struct print_entry *entry;
3390         unsigned int trace_ctx;
3391         char *tbuffer;
3392
3393         if (tracing_disabled || tracing_selftest_running)
3394                 return 0;
3395
3396         /* Don't pollute graph traces with trace_vprintk internals */
3397         pause_graph_tracing();
3398
3399         trace_ctx = tracing_gen_ctx();
3400         preempt_disable_notrace();
3401
3402
3403         tbuffer = get_trace_buf();
3404         if (!tbuffer) {
3405                 len = 0;
3406                 goto out_nobuffer;
3407         }
3408
3409         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3410
3411         size = sizeof(*entry) + len + 1;
3412         ring_buffer_nest_start(buffer);
3413         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3414                                             trace_ctx);
3415         if (!event)
3416                 goto out;
3417         entry = ring_buffer_event_data(event);
3418         entry->ip = ip;
3419
3420         memcpy(&entry->buf, tbuffer, len + 1);
3421         if (!call_filter_check_discard(call, entry, buffer, event)) {
3422                 __buffer_unlock_commit(buffer, event);
3423                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3424         }
3425
3426 out:
3427         ring_buffer_nest_end(buffer);
3428         put_trace_buf();
3429
3430 out_nobuffer:
3431         preempt_enable_notrace();
3432         unpause_graph_tracing();
3433
3434         return len;
3435 }
3436
3437 __printf(3, 0)
3438 int trace_array_vprintk(struct trace_array *tr,
3439                         unsigned long ip, const char *fmt, va_list args)
3440 {
3441         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442 }
3443
3444 /**
3445  * trace_array_printk - Print a message to a specific instance
3446  * @tr: The instance trace_array descriptor
3447  * @ip: The instruction pointer that this is called from.
3448  * @fmt: The format to print (printf format)
3449  *
3450  * If a subsystem sets up its own instance, they have the right to
3451  * printk strings into their tracing instance buffer using this
3452  * function. Note, this function will not write into the top level
3453  * buffer (use trace_printk() for that), as writing into the top level
3454  * buffer should only have events that can be individually disabled.
3455  * trace_printk() is only used for debugging a kernel, and should not
3456  * be ever incorporated in normal use.
3457  *
3458  * trace_array_printk() can be used, as it will not add noise to the
3459  * top level tracing buffer.
3460  *
3461  * Note, trace_array_init_printk() must be called on @tr before this
3462  * can be used.
3463  */
3464 __printf(3, 0)
3465 int trace_array_printk(struct trace_array *tr,
3466                        unsigned long ip, const char *fmt, ...)
3467 {
3468         int ret;
3469         va_list ap;
3470
3471         if (!tr)
3472                 return -ENOENT;
3473
3474         /* This is only allowed for created instances */
3475         if (tr == &global_trace)
3476                 return 0;
3477
3478         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3479                 return 0;
3480
3481         va_start(ap, fmt);
3482         ret = trace_array_vprintk(tr, ip, fmt, ap);
3483         va_end(ap);
3484         return ret;
3485 }
3486 EXPORT_SYMBOL_GPL(trace_array_printk);
3487
3488 /**
3489  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3490  * @tr: The trace array to initialize the buffers for
3491  *
3492  * As trace_array_printk() only writes into instances, they are OK to
3493  * have in the kernel (unlike trace_printk()). This needs to be called
3494  * before trace_array_printk() can be used on a trace_array.
3495  */
3496 int trace_array_init_printk(struct trace_array *tr)
3497 {
3498         if (!tr)
3499                 return -ENOENT;
3500
3501         /* This is only allowed for created instances */
3502         if (tr == &global_trace)
3503                 return -EINVAL;
3504
3505         return alloc_percpu_trace_buffer();
3506 }
3507 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3508
3509 __printf(3, 4)
3510 int trace_array_printk_buf(struct trace_buffer *buffer,
3511                            unsigned long ip, const char *fmt, ...)
3512 {
3513         int ret;
3514         va_list ap;
3515
3516         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3517                 return 0;
3518
3519         va_start(ap, fmt);
3520         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3521         va_end(ap);
3522         return ret;
3523 }
3524
3525 __printf(2, 0)
3526 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3527 {
3528         return trace_array_vprintk(&global_trace, ip, fmt, args);
3529 }
3530 EXPORT_SYMBOL_GPL(trace_vprintk);
3531
3532 static void trace_iterator_increment(struct trace_iterator *iter)
3533 {
3534         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3535
3536         iter->idx++;
3537         if (buf_iter)
3538                 ring_buffer_iter_advance(buf_iter);
3539 }
3540
3541 static struct trace_entry *
3542 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3543                 unsigned long *lost_events)
3544 {
3545         struct ring_buffer_event *event;
3546         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3547
3548         if (buf_iter) {
3549                 event = ring_buffer_iter_peek(buf_iter, ts);
3550                 if (lost_events)
3551                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3552                                 (unsigned long)-1 : 0;
3553         } else {
3554                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3555                                          lost_events);
3556         }
3557
3558         if (event) {
3559                 iter->ent_size = ring_buffer_event_length(event);
3560                 return ring_buffer_event_data(event);
3561         }
3562         iter->ent_size = 0;
3563         return NULL;
3564 }
3565
3566 static struct trace_entry *
3567 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3568                   unsigned long *missing_events, u64 *ent_ts)
3569 {
3570         struct trace_buffer *buffer = iter->array_buffer->buffer;
3571         struct trace_entry *ent, *next = NULL;
3572         unsigned long lost_events = 0, next_lost = 0;
3573         int cpu_file = iter->cpu_file;
3574         u64 next_ts = 0, ts;
3575         int next_cpu = -1;
3576         int next_size = 0;
3577         int cpu;
3578
3579         /*
3580          * If we are in a per_cpu trace file, don't bother by iterating over
3581          * all cpu and peek directly.
3582          */
3583         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3584                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3585                         return NULL;
3586                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3587                 if (ent_cpu)
3588                         *ent_cpu = cpu_file;
3589
3590                 return ent;
3591         }
3592
3593         for_each_tracing_cpu(cpu) {
3594
3595                 if (ring_buffer_empty_cpu(buffer, cpu))
3596                         continue;
3597
3598                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3599
3600                 /*
3601                  * Pick the entry with the smallest timestamp:
3602                  */
3603                 if (ent && (!next || ts < next_ts)) {
3604                         next = ent;
3605                         next_cpu = cpu;
3606                         next_ts = ts;
3607                         next_lost = lost_events;
3608                         next_size = iter->ent_size;
3609                 }
3610         }
3611
3612         iter->ent_size = next_size;
3613
3614         if (ent_cpu)
3615                 *ent_cpu = next_cpu;
3616
3617         if (ent_ts)
3618                 *ent_ts = next_ts;
3619
3620         if (missing_events)
3621                 *missing_events = next_lost;
3622
3623         return next;
3624 }
3625
3626 #define STATIC_FMT_BUF_SIZE     128
3627 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3628
3629 static char *trace_iter_expand_format(struct trace_iterator *iter)
3630 {
3631         char *tmp;
3632
3633         /*
3634          * iter->tr is NULL when used with tp_printk, which makes
3635          * this get called where it is not safe to call krealloc().
3636          */
3637         if (!iter->tr || iter->fmt == static_fmt_buf)
3638                 return NULL;
3639
3640         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3641                        GFP_KERNEL);
3642         if (tmp) {
3643                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3644                 iter->fmt = tmp;
3645         }
3646
3647         return tmp;
3648 }
3649
3650 /* Returns true if the string is safe to dereference from an event */
3651 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3652 {
3653         unsigned long addr = (unsigned long)str;
3654         struct trace_event *trace_event;
3655         struct trace_event_call *event;
3656
3657         /* OK if part of the event data */
3658         if ((addr >= (unsigned long)iter->ent) &&
3659             (addr < (unsigned long)iter->ent + iter->ent_size))
3660                 return true;
3661
3662         /* OK if part of the temp seq buffer */
3663         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3664             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3665                 return true;
3666
3667         /* Core rodata can not be freed */
3668         if (is_kernel_rodata(addr))
3669                 return true;
3670
3671         if (trace_is_tracepoint_string(str))
3672                 return true;
3673
3674         /*
3675          * Now this could be a module event, referencing core module
3676          * data, which is OK.
3677          */
3678         if (!iter->ent)
3679                 return false;
3680
3681         trace_event = ftrace_find_event(iter->ent->type);
3682         if (!trace_event)
3683                 return false;
3684
3685         event = container_of(trace_event, struct trace_event_call, event);
3686         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3687                 return false;
3688
3689         /* Would rather have rodata, but this will suffice */
3690         if (within_module_core(addr, event->module))
3691                 return true;
3692
3693         return false;
3694 }
3695
3696 static const char *show_buffer(struct trace_seq *s)
3697 {
3698         struct seq_buf *seq = &s->seq;
3699
3700         seq_buf_terminate(seq);
3701
3702         return seq->buffer;
3703 }
3704
3705 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3706
3707 static int test_can_verify_check(const char *fmt, ...)
3708 {
3709         char buf[16];
3710         va_list ap;
3711         int ret;
3712
3713         /*
3714          * The verifier is dependent on vsnprintf() modifies the va_list
3715          * passed to it, where it is sent as a reference. Some architectures
3716          * (like x86_32) passes it by value, which means that vsnprintf()
3717          * does not modify the va_list passed to it, and the verifier
3718          * would then need to be able to understand all the values that
3719          * vsnprintf can use. If it is passed by value, then the verifier
3720          * is disabled.
3721          */
3722         va_start(ap, fmt);
3723         vsnprintf(buf, 16, "%d", ap);
3724         ret = va_arg(ap, int);
3725         va_end(ap);
3726
3727         return ret;
3728 }
3729
3730 static void test_can_verify(void)
3731 {
3732         if (!test_can_verify_check("%d %d", 0, 1)) {
3733                 pr_info("trace event string verifier disabled\n");
3734                 static_branch_inc(&trace_no_verify);
3735         }
3736 }
3737
3738 /**
3739  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3740  * @iter: The iterator that holds the seq buffer and the event being printed
3741  * @fmt: The format used to print the event
3742  * @ap: The va_list holding the data to print from @fmt.
3743  *
3744  * This writes the data into the @iter->seq buffer using the data from
3745  * @fmt and @ap. If the format has a %s, then the source of the string
3746  * is examined to make sure it is safe to print, otherwise it will
3747  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3748  * pointer.
3749  */
3750 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3751                          va_list ap)
3752 {
3753         const char *p = fmt;
3754         const char *str;
3755         int i, j;
3756
3757         if (WARN_ON_ONCE(!fmt))
3758                 return;
3759
3760         if (static_branch_unlikely(&trace_no_verify))
3761                 goto print;
3762
3763         /* Don't bother checking when doing a ftrace_dump() */
3764         if (iter->fmt == static_fmt_buf)
3765                 goto print;
3766
3767         while (*p) {
3768                 bool star = false;
3769                 int len = 0;
3770
3771                 j = 0;
3772
3773                 /* We only care about %s and variants */
3774                 for (i = 0; p[i]; i++) {
3775                         if (i + 1 >= iter->fmt_size) {
3776                                 /*
3777                                  * If we can't expand the copy buffer,
3778                                  * just print it.
3779                                  */
3780                                 if (!trace_iter_expand_format(iter))
3781                                         goto print;
3782                         }
3783
3784                         if (p[i] == '\\' && p[i+1]) {
3785                                 i++;
3786                                 continue;
3787                         }
3788                         if (p[i] == '%') {
3789                                 /* Need to test cases like %08.*s */
3790                                 for (j = 1; p[i+j]; j++) {
3791                                         if (isdigit(p[i+j]) ||
3792                                             p[i+j] == '.')
3793                                                 continue;
3794                                         if (p[i+j] == '*') {
3795                                                 star = true;
3796                                                 continue;
3797                                         }
3798                                         break;
3799                                 }
3800                                 if (p[i+j] == 's')
3801                                         break;
3802                                 star = false;
3803                         }
3804                         j = 0;
3805                 }
3806                 /* If no %s found then just print normally */
3807                 if (!p[i])
3808                         break;
3809
3810                 /* Copy up to the %s, and print that */
3811                 strncpy(iter->fmt, p, i);
3812                 iter->fmt[i] = '\0';
3813                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3814
3815                 if (star)
3816                         len = va_arg(ap, int);
3817
3818                 /* The ap now points to the string data of the %s */
3819                 str = va_arg(ap, const char *);
3820
3821                 /*
3822                  * If you hit this warning, it is likely that the
3823                  * trace event in question used %s on a string that
3824                  * was saved at the time of the event, but may not be
3825                  * around when the trace is read. Use __string(),
3826                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3827                  * instead. See samples/trace_events/trace-events-sample.h
3828                  * for reference.
3829                  */
3830                 if (WARN_ONCE(!trace_safe_str(iter, str),
3831                               "fmt: '%s' current_buffer: '%s'",
3832                               fmt, show_buffer(&iter->seq))) {
3833                         int ret;
3834
3835                         /* Try to safely read the string */
3836                         if (star) {
3837                                 if (len + 1 > iter->fmt_size)
3838                                         len = iter->fmt_size - 1;
3839                                 if (len < 0)
3840                                         len = 0;
3841                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3842                                 iter->fmt[len] = 0;
3843                                 star = false;
3844                         } else {
3845                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3846                                                                   iter->fmt_size);
3847                         }
3848                         if (ret < 0)
3849                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3850                         else
3851                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3852                                                  str, iter->fmt);
3853                         str = "[UNSAFE-MEMORY]";
3854                         strcpy(iter->fmt, "%s");
3855                 } else {
3856                         strncpy(iter->fmt, p + i, j + 1);
3857                         iter->fmt[j+1] = '\0';
3858                 }
3859                 if (star)
3860                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3861                 else
3862                         trace_seq_printf(&iter->seq, iter->fmt, str);
3863
3864                 p += i + j + 1;
3865         }
3866  print:
3867         if (*p)
3868                 trace_seq_vprintf(&iter->seq, p, ap);
3869 }
3870
3871 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3872 {
3873         const char *p, *new_fmt;
3874         char *q;
3875
3876         if (WARN_ON_ONCE(!fmt))
3877                 return fmt;
3878
3879         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3880                 return fmt;
3881
3882         p = fmt;
3883         new_fmt = q = iter->fmt;
3884         while (*p) {
3885                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3886                         if (!trace_iter_expand_format(iter))
3887                                 return fmt;
3888
3889                         q += iter->fmt - new_fmt;
3890                         new_fmt = iter->fmt;
3891                 }
3892
3893                 *q++ = *p++;
3894
3895                 /* Replace %p with %px */
3896                 if (p[-1] == '%') {
3897                         if (p[0] == '%') {
3898                                 *q++ = *p++;
3899                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3900                                 *q++ = *p++;
3901                                 *q++ = 'x';
3902                         }
3903                 }
3904         }
3905         *q = '\0';
3906
3907         return new_fmt;
3908 }
3909
3910 #define STATIC_TEMP_BUF_SIZE    128
3911 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3912
3913 /* Find the next real entry, without updating the iterator itself */
3914 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3915                                           int *ent_cpu, u64 *ent_ts)
3916 {
3917         /* __find_next_entry will reset ent_size */
3918         int ent_size = iter->ent_size;
3919         struct trace_entry *entry;
3920
3921         /*
3922          * If called from ftrace_dump(), then the iter->temp buffer
3923          * will be the static_temp_buf and not created from kmalloc.
3924          * If the entry size is greater than the buffer, we can
3925          * not save it. Just return NULL in that case. This is only
3926          * used to add markers when two consecutive events' time
3927          * stamps have a large delta. See trace_print_lat_context()
3928          */
3929         if (iter->temp == static_temp_buf &&
3930             STATIC_TEMP_BUF_SIZE < ent_size)
3931                 return NULL;
3932
3933         /*
3934          * The __find_next_entry() may call peek_next_entry(), which may
3935          * call ring_buffer_peek() that may make the contents of iter->ent
3936          * undefined. Need to copy iter->ent now.
3937          */
3938         if (iter->ent && iter->ent != iter->temp) {
3939                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3940                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3941                         void *temp;
3942                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3943                         if (!temp)
3944                                 return NULL;
3945                         kfree(iter->temp);
3946                         iter->temp = temp;
3947                         iter->temp_size = iter->ent_size;
3948                 }
3949                 memcpy(iter->temp, iter->ent, iter->ent_size);
3950                 iter->ent = iter->temp;
3951         }
3952         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3953         /* Put back the original ent_size */
3954         iter->ent_size = ent_size;
3955
3956         return entry;
3957 }
3958
3959 /* Find the next real entry, and increment the iterator to the next entry */
3960 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3961 {
3962         iter->ent = __find_next_entry(iter, &iter->cpu,
3963                                       &iter->lost_events, &iter->ts);
3964
3965         if (iter->ent)
3966                 trace_iterator_increment(iter);
3967
3968         return iter->ent ? iter : NULL;
3969 }
3970
3971 static void trace_consume(struct trace_iterator *iter)
3972 {
3973         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3974                             &iter->lost_events);
3975 }
3976
3977 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3978 {
3979         struct trace_iterator *iter = m->private;
3980         int i = (int)*pos;
3981         void *ent;
3982
3983         WARN_ON_ONCE(iter->leftover);
3984
3985         (*pos)++;
3986
3987         /* can't go backwards */
3988         if (iter->idx > i)
3989                 return NULL;
3990
3991         if (iter->idx < 0)
3992                 ent = trace_find_next_entry_inc(iter);
3993         else
3994                 ent = iter;
3995
3996         while (ent && iter->idx < i)
3997                 ent = trace_find_next_entry_inc(iter);
3998
3999         iter->pos = *pos;
4000
4001         return ent;
4002 }
4003
4004 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4005 {
4006         struct ring_buffer_iter *buf_iter;
4007         unsigned long entries = 0;
4008         u64 ts;
4009
4010         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4011
4012         buf_iter = trace_buffer_iter(iter, cpu);
4013         if (!buf_iter)
4014                 return;
4015
4016         ring_buffer_iter_reset(buf_iter);
4017
4018         /*
4019          * We could have the case with the max latency tracers
4020          * that a reset never took place on a cpu. This is evident
4021          * by the timestamp being before the start of the buffer.
4022          */
4023         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4024                 if (ts >= iter->array_buffer->time_start)
4025                         break;
4026                 entries++;
4027                 ring_buffer_iter_advance(buf_iter);
4028         }
4029
4030         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4031 }
4032
4033 /*
4034  * The current tracer is copied to avoid a global locking
4035  * all around.
4036  */
4037 static void *s_start(struct seq_file *m, loff_t *pos)
4038 {
4039         struct trace_iterator *iter = m->private;
4040         struct trace_array *tr = iter->tr;
4041         int cpu_file = iter->cpu_file;
4042         void *p = NULL;
4043         loff_t l = 0;
4044         int cpu;
4045
4046         /*
4047          * copy the tracer to avoid using a global lock all around.
4048          * iter->trace is a copy of current_trace, the pointer to the
4049          * name may be used instead of a strcmp(), as iter->trace->name
4050          * will point to the same string as current_trace->name.
4051          */
4052         mutex_lock(&trace_types_lock);
4053         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4054                 *iter->trace = *tr->current_trace;
4055         mutex_unlock(&trace_types_lock);
4056
4057 #ifdef CONFIG_TRACER_MAX_TRACE
4058         if (iter->snapshot && iter->trace->use_max_tr)
4059                 return ERR_PTR(-EBUSY);
4060 #endif
4061
4062         if (*pos != iter->pos) {
4063                 iter->ent = NULL;
4064                 iter->cpu = 0;
4065                 iter->idx = -1;
4066
4067                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4068                         for_each_tracing_cpu(cpu)
4069                                 tracing_iter_reset(iter, cpu);
4070                 } else
4071                         tracing_iter_reset(iter, cpu_file);
4072
4073                 iter->leftover = 0;
4074                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4075                         ;
4076
4077         } else {
4078                 /*
4079                  * If we overflowed the seq_file before, then we want
4080                  * to just reuse the trace_seq buffer again.
4081                  */
4082                 if (iter->leftover)
4083                         p = iter;
4084                 else {
4085                         l = *pos - 1;
4086                         p = s_next(m, p, &l);
4087                 }
4088         }
4089
4090         trace_event_read_lock();
4091         trace_access_lock(cpu_file);
4092         return p;
4093 }
4094
4095 static void s_stop(struct seq_file *m, void *p)
4096 {
4097         struct trace_iterator *iter = m->private;
4098
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         if (iter->snapshot && iter->trace->use_max_tr)
4101                 return;
4102 #endif
4103
4104         trace_access_unlock(iter->cpu_file);
4105         trace_event_read_unlock();
4106 }
4107
4108 static void
4109 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4110                       unsigned long *entries, int cpu)
4111 {
4112         unsigned long count;
4113
4114         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4115         /*
4116          * If this buffer has skipped entries, then we hold all
4117          * entries for the trace and we need to ignore the
4118          * ones before the time stamp.
4119          */
4120         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4121                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4122                 /* total is the same as the entries */
4123                 *total = count;
4124         } else
4125                 *total = count +
4126                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4127         *entries = count;
4128 }
4129
4130 static void
4131 get_total_entries(struct array_buffer *buf,
4132                   unsigned long *total, unsigned long *entries)
4133 {
4134         unsigned long t, e;
4135         int cpu;
4136
4137         *total = 0;
4138         *entries = 0;
4139
4140         for_each_tracing_cpu(cpu) {
4141                 get_total_entries_cpu(buf, &t, &e, cpu);
4142                 *total += t;
4143                 *entries += e;
4144         }
4145 }
4146
4147 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4148 {
4149         unsigned long total, entries;
4150
4151         if (!tr)
4152                 tr = &global_trace;
4153
4154         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4155
4156         return entries;
4157 }
4158
4159 unsigned long trace_total_entries(struct trace_array *tr)
4160 {
4161         unsigned long total, entries;
4162
4163         if (!tr)
4164                 tr = &global_trace;
4165
4166         get_total_entries(&tr->array_buffer, &total, &entries);
4167
4168         return entries;
4169 }
4170
4171 static void print_lat_help_header(struct seq_file *m)
4172 {
4173         seq_puts(m, "#                    _------=> CPU#            \n"
4174                     "#                   / _-----=> irqs-off        \n"
4175                     "#                  | / _----=> need-resched    \n"
4176                     "#                  || / _---=> hardirq/softirq \n"
4177                     "#                  ||| / _--=> preempt-depth   \n"
4178                     "#                  |||| / _-=> migrate-disable \n"
4179                     "#                  ||||| /     delay           \n"
4180                     "#  cmd     pid     |||||| time  |   caller     \n"
4181                     "#     \\   /        ||||||  \\    |    /       \n");
4182 }
4183
4184 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4185 {
4186         unsigned long total;
4187         unsigned long entries;
4188
4189         get_total_entries(buf, &total, &entries);
4190         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4191                    entries, total, num_online_cpus());
4192         seq_puts(m, "#\n");
4193 }
4194
4195 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4196                                    unsigned int flags)
4197 {
4198         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4199
4200         print_event_info(buf, m);
4201
4202         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4203         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4204 }
4205
4206 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4207                                        unsigned int flags)
4208 {
4209         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4210         const char *space = "            ";
4211         int prec = tgid ? 12 : 2;
4212
4213         print_event_info(buf, m);
4214
4215         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4216         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4217         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4218         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4219         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4220         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4221         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4223 }
4224
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229         struct array_buffer *buf = iter->array_buffer;
4230         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231         struct tracer *type = iter->trace;
4232         unsigned long entries;
4233         unsigned long total;
4234         const char *name = "preemption";
4235
4236         name = type->name;
4237
4238         get_total_entries(buf, &total, &entries);
4239
4240         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241                    name, UTS_RELEASE);
4242         seq_puts(m, "# -----------------------------------"
4243                  "---------------------------------\n");
4244         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246                    nsecs_to_usecs(data->saved_latency),
4247                    entries,
4248                    total,
4249                    buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251                    "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253                    "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255                    "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257                    "preempt_rt",
4258 #else
4259                    "unknown",
4260 #endif
4261                    /* These are reserved for later use */
4262                    0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264         seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266         seq_puts(m, ")\n");
4267 #endif
4268         seq_puts(m, "#    -----------------\n");
4269         seq_printf(m, "#    | task: %.16s-%d "
4270                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271                    data->comm, data->pid,
4272                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273                    data->policy, data->rt_priority);
4274         seq_puts(m, "#    -----------------\n");
4275
4276         if (data->critical_start) {
4277                 seq_puts(m, "#  => started at: ");
4278                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279                 trace_print_seq(m, &iter->seq);
4280                 seq_puts(m, "\n#  => ended at:   ");
4281                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282                 trace_print_seq(m, &iter->seq);
4283                 seq_puts(m, "\n#\n");
4284         }
4285
4286         seq_puts(m, "#\n");
4287 }
4288
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291         struct trace_seq *s = &iter->seq;
4292         struct trace_array *tr = iter->tr;
4293
4294         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295                 return;
4296
4297         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298                 return;
4299
4300         if (cpumask_available(iter->started) &&
4301             cpumask_test_cpu(iter->cpu, iter->started))
4302                 return;
4303
4304         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305                 return;
4306
4307         if (cpumask_available(iter->started))
4308                 cpumask_set_cpu(iter->cpu, iter->started);
4309
4310         /* Don't print started cpu buffer for the first entry of the trace */
4311         if (iter->idx > 1)
4312                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313                                 iter->cpu);
4314 }
4315
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318         struct trace_array *tr = iter->tr;
4319         struct trace_seq *s = &iter->seq;
4320         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321         struct trace_entry *entry;
4322         struct trace_event *event;
4323
4324         entry = iter->ent;
4325
4326         test_cpu_buff_start(iter);
4327
4328         event = ftrace_find_event(entry->type);
4329
4330         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332                         trace_print_lat_context(iter);
4333                 else
4334                         trace_print_context(iter);
4335         }
4336
4337         if (trace_seq_has_overflowed(s))
4338                 return TRACE_TYPE_PARTIAL_LINE;
4339
4340         if (event)
4341                 return event->funcs->trace(iter, sym_flags, event);
4342
4343         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344
4345         return trace_handle_return(s);
4346 }
4347
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350         struct trace_array *tr = iter->tr;
4351         struct trace_seq *s = &iter->seq;
4352         struct trace_entry *entry;
4353         struct trace_event *event;
4354
4355         entry = iter->ent;
4356
4357         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358                 trace_seq_printf(s, "%d %d %llu ",
4359                                  entry->pid, iter->cpu, iter->ts);
4360
4361         if (trace_seq_has_overflowed(s))
4362                 return TRACE_TYPE_PARTIAL_LINE;
4363
4364         event = ftrace_find_event(entry->type);
4365         if (event)
4366                 return event->funcs->raw(iter, 0, event);
4367
4368         trace_seq_printf(s, "%d ?\n", entry->type);
4369
4370         return trace_handle_return(s);
4371 }
4372
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375         struct trace_array *tr = iter->tr;
4376         struct trace_seq *s = &iter->seq;
4377         unsigned char newline = '\n';
4378         struct trace_entry *entry;
4379         struct trace_event *event;
4380
4381         entry = iter->ent;
4382
4383         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4385                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4387                 if (trace_seq_has_overflowed(s))
4388                         return TRACE_TYPE_PARTIAL_LINE;
4389         }
4390
4391         event = ftrace_find_event(entry->type);
4392         if (event) {
4393                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394                 if (ret != TRACE_TYPE_HANDLED)
4395                         return ret;
4396         }
4397
4398         SEQ_PUT_FIELD(s, newline);
4399
4400         return trace_handle_return(s);
4401 }
4402
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405         struct trace_array *tr = iter->tr;
4406         struct trace_seq *s = &iter->seq;
4407         struct trace_entry *entry;
4408         struct trace_event *event;
4409
4410         entry = iter->ent;
4411
4412         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413                 SEQ_PUT_FIELD(s, entry->pid);
4414                 SEQ_PUT_FIELD(s, iter->cpu);
4415                 SEQ_PUT_FIELD(s, iter->ts);
4416                 if (trace_seq_has_overflowed(s))
4417                         return TRACE_TYPE_PARTIAL_LINE;
4418         }
4419
4420         event = ftrace_find_event(entry->type);
4421         return event ? event->funcs->binary(iter, 0, event) :
4422                 TRACE_TYPE_HANDLED;
4423 }
4424
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427         struct ring_buffer_iter *buf_iter;
4428         int cpu;
4429
4430         /* If we are looking at one CPU buffer, only check that one */
4431         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432                 cpu = iter->cpu_file;
4433                 buf_iter = trace_buffer_iter(iter, cpu);
4434                 if (buf_iter) {
4435                         if (!ring_buffer_iter_empty(buf_iter))
4436                                 return 0;
4437                 } else {
4438                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439                                 return 0;
4440                 }
4441                 return 1;
4442         }
4443
4444         for_each_tracing_cpu(cpu) {
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453         }
4454
4455         return 1;
4456 }
4457
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461         struct trace_array *tr = iter->tr;
4462         unsigned long trace_flags = tr->trace_flags;
4463         enum print_line_t ret;
4464
4465         if (iter->lost_events) {
4466                 if (iter->lost_events == (unsigned long)-1)
4467                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468                                          iter->cpu);
4469                 else
4470                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471                                          iter->cpu, iter->lost_events);
4472                 if (trace_seq_has_overflowed(&iter->seq))
4473                         return TRACE_TYPE_PARTIAL_LINE;
4474         }
4475
4476         if (iter->trace && iter->trace->print_line) {
4477                 ret = iter->trace->print_line(iter);
4478                 if (ret != TRACE_TYPE_UNHANDLED)
4479                         return ret;
4480         }
4481
4482         if (iter->ent->type == TRACE_BPUTS &&
4483                         trace_flags & TRACE_ITER_PRINTK &&
4484                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485                 return trace_print_bputs_msg_only(iter);
4486
4487         if (iter->ent->type == TRACE_BPRINT &&
4488                         trace_flags & TRACE_ITER_PRINTK &&
4489                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490                 return trace_print_bprintk_msg_only(iter);
4491
4492         if (iter->ent->type == TRACE_PRINT &&
4493                         trace_flags & TRACE_ITER_PRINTK &&
4494                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495                 return trace_print_printk_msg_only(iter);
4496
4497         if (trace_flags & TRACE_ITER_BIN)
4498                 return print_bin_fmt(iter);
4499
4500         if (trace_flags & TRACE_ITER_HEX)
4501                 return print_hex_fmt(iter);
4502
4503         if (trace_flags & TRACE_ITER_RAW)
4504                 return print_raw_fmt(iter);
4505
4506         return print_trace_fmt(iter);
4507 }
4508
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511         struct trace_iterator *iter = m->private;
4512         struct trace_array *tr = iter->tr;
4513
4514         /* print nothing if the buffers are empty */
4515         if (trace_empty(iter))
4516                 return;
4517
4518         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519                 print_trace_header(m, iter);
4520
4521         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522                 print_lat_help_header(m);
4523 }
4524
4525 void trace_default_header(struct seq_file *m)
4526 {
4527         struct trace_iterator *iter = m->private;
4528         struct trace_array *tr = iter->tr;
4529         unsigned long trace_flags = tr->trace_flags;
4530
4531         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532                 return;
4533
4534         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535                 /* print nothing if the buffers are empty */
4536                 if (trace_empty(iter))
4537                         return;
4538                 print_trace_header(m, iter);
4539                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4540                         print_lat_help_header(m);
4541         } else {
4542                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4544                                 print_func_help_header_irq(iter->array_buffer,
4545                                                            m, trace_flags);
4546                         else
4547                                 print_func_help_header(iter->array_buffer, m,
4548                                                        trace_flags);
4549                 }
4550         }
4551 }
4552
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555         if (!ftrace_is_dead())
4556                 return;
4557         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566                     "#                      Takes a snapshot of the main buffer.\n"
4567                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568                     "#                      (Doesn't have to be '2' works with any number that\n"
4569                     "#                       is not a '0' or '1')\n");
4570 }
4571
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580                     "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583                     "#                      (Doesn't have to be '2' works with any number that\n"
4584                     "#                       is not a '0' or '1')\n");
4585 }
4586
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589         if (iter->tr->allocated_snapshot)
4590                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591         else
4592                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593
4594         seq_puts(m, "# Snapshot commands:\n");
4595         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596                 show_snapshot_main_help(m);
4597         else
4598                 show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607         struct trace_iterator *iter = v;
4608         int ret;
4609
4610         if (iter->ent == NULL) {
4611                 if (iter->tr) {
4612                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613                         seq_puts(m, "#\n");
4614                         test_ftrace_alive(m);
4615                 }
4616                 if (iter->snapshot && trace_empty(iter))
4617                         print_snapshot_help(m, iter);
4618                 else if (iter->trace && iter->trace->print_header)
4619                         iter->trace->print_header(m);
4620                 else
4621                         trace_default_header(m);
4622
4623         } else if (iter->leftover) {
4624                 /*
4625                  * If we filled the seq_file buffer earlier, we
4626                  * want to just show it now.
4627                  */
4628                 ret = trace_print_seq(m, &iter->seq);
4629
4630                 /* ret should this time be zero, but you never know */
4631                 iter->leftover = ret;
4632
4633         } else {
4634                 print_trace_line(iter);
4635                 ret = trace_print_seq(m, &iter->seq);
4636                 /*
4637                  * If we overflow the seq_file buffer, then it will
4638                  * ask us for this data again at start up.
4639                  * Use that instead.
4640                  *  ret is 0 if seq_file write succeeded.
4641                  *        -1 otherwise.
4642                  */
4643                 iter->leftover = ret;
4644         }
4645
4646         return 0;
4647 }
4648
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655         if (inode->i_cdev) /* See trace_create_cpu_file() */
4656                 return (long)inode->i_cdev - 1;
4657         return RING_BUFFER_ALL_CPUS;
4658 }
4659
4660 static const struct seq_operations tracer_seq_ops = {
4661         .start          = s_start,
4662         .next           = s_next,
4663         .stop           = s_stop,
4664         .show           = s_show,
4665 };
4666
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670         struct trace_array *tr = inode->i_private;
4671         struct trace_iterator *iter;
4672         int cpu;
4673
4674         if (tracing_disabled)
4675                 return ERR_PTR(-ENODEV);
4676
4677         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678         if (!iter)
4679                 return ERR_PTR(-ENOMEM);
4680
4681         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682                                     GFP_KERNEL);
4683         if (!iter->buffer_iter)
4684                 goto release;
4685
4686         /*
4687          * trace_find_next_entry() may need to save off iter->ent.
4688          * It will place it into the iter->temp buffer. As most
4689          * events are less than 128, allocate a buffer of that size.
4690          * If one is greater, then trace_find_next_entry() will
4691          * allocate a new buffer to adjust for the bigger iter->ent.
4692          * It's not critical if it fails to get allocated here.
4693          */
4694         iter->temp = kmalloc(128, GFP_KERNEL);
4695         if (iter->temp)
4696                 iter->temp_size = 128;
4697
4698         /*
4699          * trace_event_printf() may need to modify given format
4700          * string to replace %p with %px so that it shows real address
4701          * instead of hash value. However, that is only for the event
4702          * tracing, other tracer may not need. Defer the allocation
4703          * until it is needed.
4704          */
4705         iter->fmt = NULL;
4706         iter->fmt_size = 0;
4707
4708         /*
4709          * We make a copy of the current tracer to avoid concurrent
4710          * changes on it while we are reading.
4711          */
4712         mutex_lock(&trace_types_lock);
4713         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714         if (!iter->trace)
4715                 goto fail;
4716
4717         *iter->trace = *tr->current_trace;
4718
4719         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720                 goto fail;
4721
4722         iter->tr = tr;
4723
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725         /* Currently only the top directory has a snapshot */
4726         if (tr->current_trace->print_max || snapshot)
4727                 iter->array_buffer = &tr->max_buffer;
4728         else
4729 #endif
4730                 iter->array_buffer = &tr->array_buffer;
4731         iter->snapshot = snapshot;
4732         iter->pos = -1;
4733         iter->cpu_file = tracing_get_cpu(inode);
4734         mutex_init(&iter->mutex);
4735
4736         /* Notify the tracer early; before we stop tracing. */
4737         if (iter->trace->open)
4738                 iter->trace->open(iter);
4739
4740         /* Annotate start of buffers if we had overruns */
4741         if (ring_buffer_overruns(iter->array_buffer->buffer))
4742                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743
4744         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745         if (trace_clocks[tr->clock_id].in_ns)
4746                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747
4748         /*
4749          * If pause-on-trace is enabled, then stop the trace while
4750          * dumping, unless this is the "snapshot" file
4751          */
4752         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753                 tracing_stop_tr(tr);
4754
4755         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756                 for_each_tracing_cpu(cpu) {
4757                         iter->buffer_iter[cpu] =
4758                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4759                                                          cpu, GFP_KERNEL);
4760                 }
4761                 ring_buffer_read_prepare_sync();
4762                 for_each_tracing_cpu(cpu) {
4763                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4764                         tracing_iter_reset(iter, cpu);
4765                 }
4766         } else {
4767                 cpu = iter->cpu_file;
4768                 iter->buffer_iter[cpu] =
4769                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4770                                                  cpu, GFP_KERNEL);
4771                 ring_buffer_read_prepare_sync();
4772                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4773                 tracing_iter_reset(iter, cpu);
4774         }
4775
4776         mutex_unlock(&trace_types_lock);
4777
4778         return iter;
4779
4780  fail:
4781         mutex_unlock(&trace_types_lock);
4782         kfree(iter->trace);
4783         kfree(iter->temp);
4784         kfree(iter->buffer_iter);
4785 release:
4786         seq_release_private(inode, file);
4787         return ERR_PTR(-ENOMEM);
4788 }
4789
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792         int ret;
4793
4794         ret = tracing_check_open_get_tr(NULL);
4795         if (ret)
4796                 return ret;
4797
4798         filp->private_data = inode->i_private;
4799         return 0;
4800 }
4801
4802 bool tracing_is_disabled(void)
4803 {
4804         return (tracing_disabled) ? true: false;
4805 }
4806
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813         struct trace_array *tr = inode->i_private;
4814         int ret;
4815
4816         ret = tracing_check_open_get_tr(tr);
4817         if (ret)
4818                 return ret;
4819
4820         filp->private_data = inode->i_private;
4821
4822         return 0;
4823 }
4824
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct seq_file *m = file->private_data;
4829         struct trace_iterator *iter;
4830         int cpu;
4831
4832         if (!(file->f_mode & FMODE_READ)) {
4833                 trace_array_put(tr);
4834                 return 0;
4835         }
4836
4837         /* Writes do not use seq_file */
4838         iter = m->private;
4839         mutex_lock(&trace_types_lock);
4840
4841         for_each_tracing_cpu(cpu) {
4842                 if (iter->buffer_iter[cpu])
4843                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844         }
4845
4846         if (iter->trace && iter->trace->close)
4847                 iter->trace->close(iter);
4848
4849         if (!iter->snapshot && tr->stop_count)
4850                 /* reenable tracing if it was previously enabled */
4851                 tracing_start_tr(tr);
4852
4853         __trace_array_put(tr);
4854
4855         mutex_unlock(&trace_types_lock);
4856
4857         mutex_destroy(&iter->mutex);
4858         free_cpumask_var(iter->started);
4859         kfree(iter->fmt);
4860         kfree(iter->temp);
4861         kfree(iter->trace);
4862         kfree(iter->buffer_iter);
4863         seq_release_private(inode, file);
4864
4865         return 0;
4866 }
4867
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871
4872         trace_array_put(tr);
4873         return 0;
4874 }
4875
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878         struct trace_array *tr = inode->i_private;
4879
4880         trace_array_put(tr);
4881
4882         return single_release(inode, file);
4883 }
4884
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887         struct trace_array *tr = inode->i_private;
4888         struct trace_iterator *iter;
4889         int ret;
4890
4891         ret = tracing_check_open_get_tr(tr);
4892         if (ret)
4893                 return ret;
4894
4895         /* If this file was open for write, then erase contents */
4896         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897                 int cpu = tracing_get_cpu(inode);
4898                 struct array_buffer *trace_buf = &tr->array_buffer;
4899
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901                 if (tr->current_trace->print_max)
4902                         trace_buf = &tr->max_buffer;
4903 #endif
4904
4905                 if (cpu == RING_BUFFER_ALL_CPUS)
4906                         tracing_reset_online_cpus(trace_buf);
4907                 else
4908                         tracing_reset_cpu(trace_buf, cpu);
4909         }
4910
4911         if (file->f_mode & FMODE_READ) {
4912                 iter = __tracing_open(inode, file, false);
4913                 if (IS_ERR(iter))
4914                         ret = PTR_ERR(iter);
4915                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917         }
4918
4919         if (ret < 0)
4920                 trace_array_put(tr);
4921
4922         return ret;
4923 }
4924
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940         while (t && !trace_ok_for_array(t, tr))
4941                 t = t->next;
4942
4943         return t;
4944 }
4945
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949         struct trace_array *tr = m->private;
4950         struct tracer *t = v;
4951
4952         (*pos)++;
4953
4954         if (t)
4955                 t = get_tracer_for_array(tr, t->next);
4956
4957         return t;
4958 }
4959
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962         struct trace_array *tr = m->private;
4963         struct tracer *t;
4964         loff_t l = 0;
4965
4966         mutex_lock(&trace_types_lock);
4967
4968         t = get_tracer_for_array(tr, trace_types);
4969         for (; t && l < *pos; t = t_next(m, t, &l))
4970                         ;
4971
4972         return t;
4973 }
4974
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977         mutex_unlock(&trace_types_lock);
4978 }
4979
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982         struct tracer *t = v;
4983
4984         if (!t)
4985                 return 0;
4986
4987         seq_puts(m, t->name);
4988         if (t->next)
4989                 seq_putc(m, ' ');
4990         else
4991                 seq_putc(m, '\n');
4992
4993         return 0;
4994 }
4995
4996 static const struct seq_operations show_traces_seq_ops = {
4997         .start          = t_start,
4998         .next           = t_next,
4999         .stop           = t_stop,
5000         .show           = t_show,
5001 };
5002
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005         struct trace_array *tr = inode->i_private;
5006         struct seq_file *m;
5007         int ret;
5008
5009         ret = tracing_check_open_get_tr(tr);
5010         if (ret)
5011                 return ret;
5012
5013         ret = seq_open(file, &show_traces_seq_ops);
5014         if (ret) {
5015                 trace_array_put(tr);
5016                 return ret;
5017         }
5018
5019         m = file->private_data;
5020         m->private = tr;
5021
5022         return 0;
5023 }
5024
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027         struct trace_array *tr = inode->i_private;
5028
5029         trace_array_put(tr);
5030         return seq_release(inode, file);
5031 }
5032
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035                    size_t count, loff_t *ppos)
5036 {
5037         return count;
5038 }
5039
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042         int ret;
5043
5044         if (file->f_mode & FMODE_READ)
5045                 ret = seq_lseek(file, offset, whence);
5046         else
5047                 file->f_pos = ret = 0;
5048
5049         return ret;
5050 }
5051
5052 static const struct file_operations tracing_fops = {
5053         .open           = tracing_open,
5054         .read           = seq_read,
5055         .write          = tracing_write_stub,
5056         .llseek         = tracing_lseek,
5057         .release        = tracing_release,
5058 };
5059
5060 static const struct file_operations show_traces_fops = {
5061         .open           = show_traces_open,
5062         .read           = seq_read,
5063         .llseek         = seq_lseek,
5064         .release        = show_traces_release,
5065 };
5066
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069                      size_t count, loff_t *ppos)
5070 {
5071         struct trace_array *tr = file_inode(filp)->i_private;
5072         char *mask_str;
5073         int len;
5074
5075         len = snprintf(NULL, 0, "%*pb\n",
5076                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077         mask_str = kmalloc(len, GFP_KERNEL);
5078         if (!mask_str)
5079                 return -ENOMEM;
5080
5081         len = snprintf(mask_str, len, "%*pb\n",
5082                        cpumask_pr_args(tr->tracing_cpumask));
5083         if (len >= count) {
5084                 count = -EINVAL;
5085                 goto out_err;
5086         }
5087         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088
5089 out_err:
5090         kfree(mask_str);
5091
5092         return count;
5093 }
5094
5095 int tracing_set_cpumask(struct trace_array *tr,
5096                         cpumask_var_t tracing_cpumask_new)
5097 {
5098         int cpu;
5099
5100         if (!tr)
5101                 return -EINVAL;
5102
5103         local_irq_disable();
5104         arch_spin_lock(&tr->max_lock);
5105         for_each_tracing_cpu(cpu) {
5106                 /*
5107                  * Increase/decrease the disabled counter if we are
5108                  * about to flip a bit in the cpumask:
5109                  */
5110                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114                 }
5115                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119                 }
5120         }
5121         arch_spin_unlock(&tr->max_lock);
5122         local_irq_enable();
5123
5124         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125
5126         return 0;
5127 }
5128
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131                       size_t count, loff_t *ppos)
5132 {
5133         struct trace_array *tr = file_inode(filp)->i_private;
5134         cpumask_var_t tracing_cpumask_new;
5135         int err;
5136
5137         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138                 return -ENOMEM;
5139
5140         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141         if (err)
5142                 goto err_free;
5143
5144         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145         if (err)
5146                 goto err_free;
5147
5148         free_cpumask_var(tracing_cpumask_new);
5149
5150         return count;
5151
5152 err_free:
5153         free_cpumask_var(tracing_cpumask_new);
5154
5155         return err;
5156 }
5157
5158 static const struct file_operations tracing_cpumask_fops = {
5159         .open           = tracing_open_generic_tr,
5160         .read           = tracing_cpumask_read,
5161         .write          = tracing_cpumask_write,
5162         .release        = tracing_release_generic_tr,
5163         .llseek         = generic_file_llseek,
5164 };
5165
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168         struct tracer_opt *trace_opts;
5169         struct trace_array *tr = m->private;
5170         u32 tracer_flags;
5171         int i;
5172
5173         mutex_lock(&trace_types_lock);
5174         tracer_flags = tr->current_trace->flags->val;
5175         trace_opts = tr->current_trace->flags->opts;
5176
5177         for (i = 0; trace_options[i]; i++) {
5178                 if (tr->trace_flags & (1 << i))
5179                         seq_printf(m, "%s\n", trace_options[i]);
5180                 else
5181                         seq_printf(m, "no%s\n", trace_options[i]);
5182         }
5183
5184         for (i = 0; trace_opts[i].name; i++) {
5185                 if (tracer_flags & trace_opts[i].bit)
5186                         seq_printf(m, "%s\n", trace_opts[i].name);
5187                 else
5188                         seq_printf(m, "no%s\n", trace_opts[i].name);
5189         }
5190         mutex_unlock(&trace_types_lock);
5191
5192         return 0;
5193 }
5194
5195 static int __set_tracer_option(struct trace_array *tr,
5196                                struct tracer_flags *tracer_flags,
5197                                struct tracer_opt *opts, int neg)
5198 {
5199         struct tracer *trace = tracer_flags->trace;
5200         int ret;
5201
5202         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203         if (ret)
5204                 return ret;
5205
5206         if (neg)
5207                 tracer_flags->val &= ~opts->bit;
5208         else
5209                 tracer_flags->val |= opts->bit;
5210         return 0;
5211 }
5212
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216         struct tracer *trace = tr->current_trace;
5217         struct tracer_flags *tracer_flags = trace->flags;
5218         struct tracer_opt *opts = NULL;
5219         int i;
5220
5221         for (i = 0; tracer_flags->opts[i].name; i++) {
5222                 opts = &tracer_flags->opts[i];
5223
5224                 if (strcmp(cmp, opts->name) == 0)
5225                         return __set_tracer_option(tr, trace->flags, opts, neg);
5226         }
5227
5228         return -EINVAL;
5229 }
5230
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235                 return -1;
5236
5237         return 0;
5238 }
5239
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242         int *map;
5243
5244         if ((mask == TRACE_ITER_RECORD_TGID) ||
5245             (mask == TRACE_ITER_RECORD_CMD))
5246                 lockdep_assert_held(&event_mutex);
5247
5248         /* do nothing if flag is already set */
5249         if (!!(tr->trace_flags & mask) == !!enabled)
5250                 return 0;
5251
5252         /* Give the tracer a chance to approve the change */
5253         if (tr->current_trace->flag_changed)
5254                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255                         return -EINVAL;
5256
5257         if (enabled)
5258                 tr->trace_flags |= mask;
5259         else
5260                 tr->trace_flags &= ~mask;
5261
5262         if (mask == TRACE_ITER_RECORD_CMD)
5263                 trace_event_enable_cmd_record(enabled);
5264
5265         if (mask == TRACE_ITER_RECORD_TGID) {
5266                 if (!tgid_map) {
5267                         tgid_map_max = pid_max;
5268                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269                                        GFP_KERNEL);
5270
5271                         /*
5272                          * Pairs with smp_load_acquire() in
5273                          * trace_find_tgid_ptr() to ensure that if it observes
5274                          * the tgid_map we just allocated then it also observes
5275                          * the corresponding tgid_map_max value.
5276                          */
5277                         smp_store_release(&tgid_map, map);
5278                 }
5279                 if (!tgid_map) {
5280                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281                         return -ENOMEM;
5282                 }
5283
5284                 trace_event_enable_tgid_record(enabled);
5285         }
5286
5287         if (mask == TRACE_ITER_EVENT_FORK)
5288                 trace_event_follow_fork(tr, enabled);
5289
5290         if (mask == TRACE_ITER_FUNC_FORK)
5291                 ftrace_pid_follow_fork(tr, enabled);
5292
5293         if (mask == TRACE_ITER_OVERWRITE) {
5294                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298         }
5299
5300         if (mask == TRACE_ITER_PRINTK) {
5301                 trace_printk_start_stop_comm(enabled);
5302                 trace_printk_control(enabled);
5303         }
5304
5305         return 0;
5306 }
5307
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310         char *cmp;
5311         int neg = 0;
5312         int ret;
5313         size_t orig_len = strlen(option);
5314         int len;
5315
5316         cmp = strstrip(option);
5317
5318         len = str_has_prefix(cmp, "no");
5319         if (len)
5320                 neg = 1;
5321
5322         cmp += len;
5323
5324         mutex_lock(&event_mutex);
5325         mutex_lock(&trace_types_lock);
5326
5327         ret = match_string(trace_options, -1, cmp);
5328         /* If no option could be set, test the specific tracer options */
5329         if (ret < 0)
5330                 ret = set_tracer_option(tr, cmp, neg);
5331         else
5332                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5333
5334         mutex_unlock(&trace_types_lock);
5335         mutex_unlock(&event_mutex);
5336
5337         /*
5338          * If the first trailing whitespace is replaced with '\0' by strstrip,
5339          * turn it back into a space.
5340          */
5341         if (orig_len > strlen(option))
5342                 option[strlen(option)] = ' ';
5343
5344         return ret;
5345 }
5346
5347 static void __init apply_trace_boot_options(void)
5348 {
5349         char *buf = trace_boot_options_buf;
5350         char *option;
5351
5352         while (true) {
5353                 option = strsep(&buf, ",");
5354
5355                 if (!option)
5356                         break;
5357
5358                 if (*option)
5359                         trace_set_options(&global_trace, option);
5360
5361                 /* Put back the comma to allow this to be called again */
5362                 if (buf)
5363                         *(buf - 1) = ',';
5364         }
5365 }
5366
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369                         size_t cnt, loff_t *ppos)
5370 {
5371         struct seq_file *m = filp->private_data;
5372         struct trace_array *tr = m->private;
5373         char buf[64];
5374         int ret;
5375
5376         if (cnt >= sizeof(buf))
5377                 return -EINVAL;
5378
5379         if (copy_from_user(buf, ubuf, cnt))
5380                 return -EFAULT;
5381
5382         buf[cnt] = 0;
5383
5384         ret = trace_set_options(tr, buf);
5385         if (ret < 0)
5386                 return ret;
5387
5388         *ppos += cnt;
5389
5390         return cnt;
5391 }
5392
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395         struct trace_array *tr = inode->i_private;
5396         int ret;
5397
5398         ret = tracing_check_open_get_tr(tr);
5399         if (ret)
5400                 return ret;
5401
5402         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403         if (ret < 0)
5404                 trace_array_put(tr);
5405
5406         return ret;
5407 }
5408
5409 static const struct file_operations tracing_iter_fops = {
5410         .open           = tracing_trace_options_open,
5411         .read           = seq_read,
5412         .llseek         = seq_lseek,
5413         .release        = tracing_single_release_tr,
5414         .write          = tracing_trace_options_write,
5415 };
5416
5417 static const char readme_msg[] =
5418         "tracing mini-HOWTO:\n\n"
5419         "# echo 0 > tracing_on : quick way to disable tracing\n"
5420         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421         " Important files:\n"
5422         "  trace\t\t\t- The static contents of the buffer\n"
5423         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425         "  current_tracer\t- function and latency tracers\n"
5426         "  available_tracers\t- list of configured tracers for current_tracer\n"
5427         "  error_log\t- error log for failed commands (that support it)\n"
5428         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430         "  trace_clock\t\t-change the clock used to order events\n"
5431         "       local:   Per cpu clock but may not be synced across CPUs\n"
5432         "      global:   Synced across CPUs but slows tracing down.\n"
5433         "     counter:   Not a clock, but just an increment\n"
5434         "      uptime:   Jiffy counter from time of boot\n"
5435         "        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437         "     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440         "       delta:   Delta difference against a buffer-wide timestamp\n"
5441         "    absolute:   Absolute (standalone) timestamp\n"
5442         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5445         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446         "\t\t\t  Remove sub-buffer with rmdir\n"
5447         "  trace_options\t\t- Set format or modify how tracing happens\n"
5448         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5449         "\t\t\t  option name\n"
5450         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452         "\n  available_filter_functions - list of functions that can be filtered on\n"
5453         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454         "\t\t\t  functions\n"
5455         "\t     accepts: func_full_name or glob-matching-pattern\n"
5456         "\t     modules: Can select a group via module\n"
5457         "\t      Format: :mod:<module-name>\n"
5458         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459         "\t    triggers: a command to perform when function is hit\n"
5460         "\t      Format: <function>:<trigger>[:count]\n"
5461         "\t     trigger: traceon, traceoff\n"
5462         "\t\t      enable_event:<system>:<event>\n"
5463         "\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465         "\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468         "\t\t      snapshot\n"
5469 #endif
5470         "\t\t      dump\n"
5471         "\t\t      cpudump\n"
5472         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474         "\t     The first one will disable tracing every time do_fault is hit\n"
5475         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476         "\t       The first time do trap is hit and it disables tracing, the\n"
5477         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5478         "\t       the counter will not decrement. It only decrements when the\n"
5479         "\t       trigger did work\n"
5480         "\t     To remove trigger without count:\n"
5481         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482         "\t     To remove trigger with a count:\n"
5483         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486         "\t    modules: Can select a group via module command :mod:\n"
5487         "\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491         "\t\t    (function)\n"
5492         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493         "\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502         "\t\t\t  snapshot buffer. Read the contents for more\n"
5503         "\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506         "  stack_trace\t\t- Shows the max stack trace when active\n"
5507         "  stack_max_size\t- Shows current max stack size that was traced\n"
5508         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5509         "\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512         "\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528         "\t  accepts: event-definitions (one definition per line)\n"
5529         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5535         "\t           -:[<group>/]<event>\n"
5536 #ifdef CONFIG_KPROBE_EVENTS
5537         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5538   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5539 #endif
5540 #ifdef CONFIG_UPROBE_EVENTS
5541   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5542 #endif
5543         "\t     args: <name>=fetcharg[:type]\n"
5544         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5545 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5546         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5547 #else
5548         "\t           $stack<index>, $stack, $retval, $comm,\n"
5549 #endif
5550         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5551         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5552         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5553         "\t           <type>\\[<array-size>\\]\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555         "\t    field: <stype> <name>;\n"
5556         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5557         "\t           [unsigned] char/int/long\n"
5558 #endif
5559         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5560         "\t            of the <attached-group>/<attached-event>.\n"
5561 #endif
5562         "  events/\t\t- Directory containing all trace event subsystems:\n"
5563         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5564         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5565         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5566         "\t\t\t  events\n"
5567         "      filter\t\t- If set, only events passing filter are traced\n"
5568         "  events/<system>/<event>/\t- Directory containing control files for\n"
5569         "\t\t\t  <event>:\n"
5570         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5571         "      filter\t\t- If set, only events passing filter are traced\n"
5572         "      trigger\t\t- If set, a command to perform when event is hit\n"
5573         "\t    Format: <trigger>[:count][if <filter>]\n"
5574         "\t   trigger: traceon, traceoff\n"
5575         "\t            enable_event:<system>:<event>\n"
5576         "\t            disable_event:<system>:<event>\n"
5577 #ifdef CONFIG_HIST_TRIGGERS
5578         "\t            enable_hist:<system>:<event>\n"
5579         "\t            disable_hist:<system>:<event>\n"
5580 #endif
5581 #ifdef CONFIG_STACKTRACE
5582         "\t\t    stacktrace\n"
5583 #endif
5584 #ifdef CONFIG_TRACER_SNAPSHOT
5585         "\t\t    snapshot\n"
5586 #endif
5587 #ifdef CONFIG_HIST_TRIGGERS
5588         "\t\t    hist (see below)\n"
5589 #endif
5590         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5591         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5592         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5593         "\t                  events/block/block_unplug/trigger\n"
5594         "\t   The first disables tracing every time block_unplug is hit.\n"
5595         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5596         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5597         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5598         "\t   Like function triggers, the counter is only decremented if it\n"
5599         "\t    enabled or disabled tracing.\n"
5600         "\t   To remove a trigger without a count:\n"
5601         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5602         "\t   To remove a trigger with a count:\n"
5603         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5604         "\t   Filters can be ignored when removing a trigger.\n"
5605 #ifdef CONFIG_HIST_TRIGGERS
5606         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5607         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5608         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5609         "\t            [:values=<field1[,field2,...]>]\n"
5610         "\t            [:sort=<field1[,field2,...]>]\n"
5611         "\t            [:size=#entries]\n"
5612         "\t            [:pause][:continue][:clear]\n"
5613         "\t            [:name=histname1]\n"
5614         "\t            [:<handler>.<action>]\n"
5615         "\t            [if <filter>]\n\n"
5616         "\t    Note, special fields can be used as well:\n"
5617         "\t            common_timestamp - to record current timestamp\n"
5618         "\t            common_cpu - to record the CPU the event happened on\n"
5619         "\n"
5620         "\t    A hist trigger variable can be:\n"
5621         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5622         "\t        - a reference to another variable e.g. y=$x,\n"
5623         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5624         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5625         "\n"
5626         "\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5627         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5628         "\t    variable reference, field or numeric literal.\n"
5629         "\n"
5630         "\t    When a matching event is hit, an entry is added to a hash\n"
5631         "\t    table using the key(s) and value(s) named, and the value of a\n"
5632         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5633         "\t    correspond to fields in the event's format description.  Keys\n"
5634         "\t    can be any field, or the special string 'stacktrace'.\n"
5635         "\t    Compound keys consisting of up to two fields can be specified\n"
5636         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5637         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5638         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5639         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5640         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5641         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5642         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5643         "\t    its histogram data will be shared with other triggers of the\n"
5644         "\t    same name, and trigger hits will update this common data.\n\n"
5645         "\t    Reading the 'hist' file for the event will dump the hash\n"
5646         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5647         "\t    triggers attached to an event, there will be a table for each\n"
5648         "\t    trigger in the output.  The table displayed for a named\n"
5649         "\t    trigger will be the same as any other instance having the\n"
5650         "\t    same name.  The default format used to display a given field\n"
5651         "\t    can be modified by appending any of the following modifiers\n"
5652         "\t    to the field name, as applicable:\n\n"
5653         "\t            .hex        display a number as a hex value\n"
5654         "\t            .sym        display an address as a symbol\n"
5655         "\t            .sym-offset display an address as a symbol and offset\n"
5656         "\t            .execname   display a common_pid as a program name\n"
5657         "\t            .syscall    display a syscall id as a syscall name\n"
5658         "\t            .log2       display log2 value rather than raw number\n"
5659         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5660         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5661         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5662         "\t    trigger or to start a hist trigger but not log any events\n"
5663         "\t    until told to do so.  'continue' can be used to start or\n"
5664         "\t    restart a paused hist trigger.\n\n"
5665         "\t    The 'clear' parameter will clear the contents of a running\n"
5666         "\t    hist trigger and leave its current paused/active state\n"
5667         "\t    unchanged.\n\n"
5668         "\t    The enable_hist and disable_hist triggers can be used to\n"
5669         "\t    have one event conditionally start and stop another event's\n"
5670         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5671         "\t    the enable_event and disable_event triggers.\n\n"
5672         "\t    Hist trigger handlers and actions are executed whenever a\n"
5673         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5674         "\t        <handler>.<action>\n\n"
5675         "\t    The available handlers are:\n\n"
5676         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5677         "\t        onmax(var)               - invoke if var exceeds current max\n"
5678         "\t        onchange(var)            - invoke action if var changes\n\n"
5679         "\t    The available actions are:\n\n"
5680         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5681         "\t        save(field,...)                      - save current event fields\n"
5682 #ifdef CONFIG_TRACER_SNAPSHOT
5683         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5684 #endif
5685 #ifdef CONFIG_SYNTH_EVENTS
5686         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5687         "\t  Write into this file to define/undefine new synthetic events.\n"
5688         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5689 #endif
5690 #endif
5691 ;
5692
5693 static ssize_t
5694 tracing_readme_read(struct file *filp, char __user *ubuf,
5695                        size_t cnt, loff_t *ppos)
5696 {
5697         return simple_read_from_buffer(ubuf, cnt, ppos,
5698                                         readme_msg, strlen(readme_msg));
5699 }
5700
5701 static const struct file_operations tracing_readme_fops = {
5702         .open           = tracing_open_generic,
5703         .read           = tracing_readme_read,
5704         .llseek         = generic_file_llseek,
5705 };
5706
5707 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5708 {
5709         int pid = ++(*pos);
5710
5711         return trace_find_tgid_ptr(pid);
5712 }
5713
5714 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5715 {
5716         int pid = *pos;
5717
5718         return trace_find_tgid_ptr(pid);
5719 }
5720
5721 static void saved_tgids_stop(struct seq_file *m, void *v)
5722 {
5723 }
5724
5725 static int saved_tgids_show(struct seq_file *m, void *v)
5726 {
5727         int *entry = (int *)v;
5728         int pid = entry - tgid_map;
5729         int tgid = *entry;
5730
5731         if (tgid == 0)
5732                 return SEQ_SKIP;
5733
5734         seq_printf(m, "%d %d\n", pid, tgid);
5735         return 0;
5736 }
5737
5738 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5739         .start          = saved_tgids_start,
5740         .stop           = saved_tgids_stop,
5741         .next           = saved_tgids_next,
5742         .show           = saved_tgids_show,
5743 };
5744
5745 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5746 {
5747         int ret;
5748
5749         ret = tracing_check_open_get_tr(NULL);
5750         if (ret)
5751                 return ret;
5752
5753         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5754 }
5755
5756
5757 static const struct file_operations tracing_saved_tgids_fops = {
5758         .open           = tracing_saved_tgids_open,
5759         .read           = seq_read,
5760         .llseek         = seq_lseek,
5761         .release        = seq_release,
5762 };
5763
5764 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5765 {
5766         unsigned int *ptr = v;
5767
5768         if (*pos || m->count)
5769                 ptr++;
5770
5771         (*pos)++;
5772
5773         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5774              ptr++) {
5775                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5776                         continue;
5777
5778                 return ptr;
5779         }
5780
5781         return NULL;
5782 }
5783
5784 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5785 {
5786         void *v;
5787         loff_t l = 0;
5788
5789         preempt_disable();
5790         arch_spin_lock(&trace_cmdline_lock);
5791
5792         v = &savedcmd->map_cmdline_to_pid[0];
5793         while (l <= *pos) {
5794                 v = saved_cmdlines_next(m, v, &l);
5795                 if (!v)
5796                         return NULL;
5797         }
5798
5799         return v;
5800 }
5801
5802 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5803 {
5804         arch_spin_unlock(&trace_cmdline_lock);
5805         preempt_enable();
5806 }
5807
5808 static int saved_cmdlines_show(struct seq_file *m, void *v)
5809 {
5810         char buf[TASK_COMM_LEN];
5811         unsigned int *pid = v;
5812
5813         __trace_find_cmdline(*pid, buf);
5814         seq_printf(m, "%d %s\n", *pid, buf);
5815         return 0;
5816 }
5817
5818 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5819         .start          = saved_cmdlines_start,
5820         .next           = saved_cmdlines_next,
5821         .stop           = saved_cmdlines_stop,
5822         .show           = saved_cmdlines_show,
5823 };
5824
5825 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5826 {
5827         int ret;
5828
5829         ret = tracing_check_open_get_tr(NULL);
5830         if (ret)
5831                 return ret;
5832
5833         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5834 }
5835
5836 static const struct file_operations tracing_saved_cmdlines_fops = {
5837         .open           = tracing_saved_cmdlines_open,
5838         .read           = seq_read,
5839         .llseek         = seq_lseek,
5840         .release        = seq_release,
5841 };
5842
5843 static ssize_t
5844 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5845                                  size_t cnt, loff_t *ppos)
5846 {
5847         char buf[64];
5848         int r;
5849
5850         arch_spin_lock(&trace_cmdline_lock);
5851         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5852         arch_spin_unlock(&trace_cmdline_lock);
5853
5854         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5855 }
5856
5857 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5858 {
5859         kfree(s->saved_cmdlines);
5860         kfree(s->map_cmdline_to_pid);
5861         kfree(s);
5862 }
5863
5864 static int tracing_resize_saved_cmdlines(unsigned int val)
5865 {
5866         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5867
5868         s = kmalloc(sizeof(*s), GFP_KERNEL);
5869         if (!s)
5870                 return -ENOMEM;
5871
5872         if (allocate_cmdlines_buffer(val, s) < 0) {
5873                 kfree(s);
5874                 return -ENOMEM;
5875         }
5876
5877         arch_spin_lock(&trace_cmdline_lock);
5878         savedcmd_temp = savedcmd;
5879         savedcmd = s;
5880         arch_spin_unlock(&trace_cmdline_lock);
5881         free_saved_cmdlines_buffer(savedcmd_temp);
5882
5883         return 0;
5884 }
5885
5886 static ssize_t
5887 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5888                                   size_t cnt, loff_t *ppos)
5889 {
5890         unsigned long val;
5891         int ret;
5892
5893         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5894         if (ret)
5895                 return ret;
5896
5897         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5898         if (!val || val > PID_MAX_DEFAULT)
5899                 return -EINVAL;
5900
5901         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5902         if (ret < 0)
5903                 return ret;
5904
5905         *ppos += cnt;
5906
5907         return cnt;
5908 }
5909
5910 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5911         .open           = tracing_open_generic,
5912         .read           = tracing_saved_cmdlines_size_read,
5913         .write          = tracing_saved_cmdlines_size_write,
5914 };
5915
5916 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5917 static union trace_eval_map_item *
5918 update_eval_map(union trace_eval_map_item *ptr)
5919 {
5920         if (!ptr->map.eval_string) {
5921                 if (ptr->tail.next) {
5922                         ptr = ptr->tail.next;
5923                         /* Set ptr to the next real item (skip head) */
5924                         ptr++;
5925                 } else
5926                         return NULL;
5927         }
5928         return ptr;
5929 }
5930
5931 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5932 {
5933         union trace_eval_map_item *ptr = v;
5934
5935         /*
5936          * Paranoid! If ptr points to end, we don't want to increment past it.
5937          * This really should never happen.
5938          */
5939         (*pos)++;
5940         ptr = update_eval_map(ptr);
5941         if (WARN_ON_ONCE(!ptr))
5942                 return NULL;
5943
5944         ptr++;
5945         ptr = update_eval_map(ptr);
5946
5947         return ptr;
5948 }
5949
5950 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5951 {
5952         union trace_eval_map_item *v;
5953         loff_t l = 0;
5954
5955         mutex_lock(&trace_eval_mutex);
5956
5957         v = trace_eval_maps;
5958         if (v)
5959                 v++;
5960
5961         while (v && l < *pos) {
5962                 v = eval_map_next(m, v, &l);
5963         }
5964
5965         return v;
5966 }
5967
5968 static void eval_map_stop(struct seq_file *m, void *v)
5969 {
5970         mutex_unlock(&trace_eval_mutex);
5971 }
5972
5973 static int eval_map_show(struct seq_file *m, void *v)
5974 {
5975         union trace_eval_map_item *ptr = v;
5976
5977         seq_printf(m, "%s %ld (%s)\n",
5978                    ptr->map.eval_string, ptr->map.eval_value,
5979                    ptr->map.system);
5980
5981         return 0;
5982 }
5983
5984 static const struct seq_operations tracing_eval_map_seq_ops = {
5985         .start          = eval_map_start,
5986         .next           = eval_map_next,
5987         .stop           = eval_map_stop,
5988         .show           = eval_map_show,
5989 };
5990
5991 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5992 {
5993         int ret;
5994
5995         ret = tracing_check_open_get_tr(NULL);
5996         if (ret)
5997                 return ret;
5998
5999         return seq_open(filp, &tracing_eval_map_seq_ops);
6000 }
6001
6002 static const struct file_operations tracing_eval_map_fops = {
6003         .open           = tracing_eval_map_open,
6004         .read           = seq_read,
6005         .llseek         = seq_lseek,
6006         .release        = seq_release,
6007 };
6008
6009 static inline union trace_eval_map_item *
6010 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6011 {
6012         /* Return tail of array given the head */
6013         return ptr + ptr->head.length + 1;
6014 }
6015
6016 static void
6017 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6018                            int len)
6019 {
6020         struct trace_eval_map **stop;
6021         struct trace_eval_map **map;
6022         union trace_eval_map_item *map_array;
6023         union trace_eval_map_item *ptr;
6024
6025         stop = start + len;
6026
6027         /*
6028          * The trace_eval_maps contains the map plus a head and tail item,
6029          * where the head holds the module and length of array, and the
6030          * tail holds a pointer to the next list.
6031          */
6032         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6033         if (!map_array) {
6034                 pr_warn("Unable to allocate trace eval mapping\n");
6035                 return;
6036         }
6037
6038         mutex_lock(&trace_eval_mutex);
6039
6040         if (!trace_eval_maps)
6041                 trace_eval_maps = map_array;
6042         else {
6043                 ptr = trace_eval_maps;
6044                 for (;;) {
6045                         ptr = trace_eval_jmp_to_tail(ptr);
6046                         if (!ptr->tail.next)
6047                                 break;
6048                         ptr = ptr->tail.next;
6049
6050                 }
6051                 ptr->tail.next = map_array;
6052         }
6053         map_array->head.mod = mod;
6054         map_array->head.length = len;
6055         map_array++;
6056
6057         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6058                 map_array->map = **map;
6059                 map_array++;
6060         }
6061         memset(map_array, 0, sizeof(*map_array));
6062
6063         mutex_unlock(&trace_eval_mutex);
6064 }
6065
6066 static void trace_create_eval_file(struct dentry *d_tracer)
6067 {
6068         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6069                           NULL, &tracing_eval_map_fops);
6070 }
6071
6072 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6073 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6074 static inline void trace_insert_eval_map_file(struct module *mod,
6075                               struct trace_eval_map **start, int len) { }
6076 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6077
6078 static void trace_insert_eval_map(struct module *mod,
6079                                   struct trace_eval_map **start, int len)
6080 {
6081         struct trace_eval_map **map;
6082
6083         if (len <= 0)
6084                 return;
6085
6086         map = start;
6087
6088         trace_event_eval_update(map, len);
6089
6090         trace_insert_eval_map_file(mod, start, len);
6091 }
6092
6093 static ssize_t
6094 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6095                        size_t cnt, loff_t *ppos)
6096 {
6097         struct trace_array *tr = filp->private_data;
6098         char buf[MAX_TRACER_SIZE+2];
6099         int r;
6100
6101         mutex_lock(&trace_types_lock);
6102         r = sprintf(buf, "%s\n", tr->current_trace->name);
6103         mutex_unlock(&trace_types_lock);
6104
6105         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6106 }
6107
6108 int tracer_init(struct tracer *t, struct trace_array *tr)
6109 {
6110         tracing_reset_online_cpus(&tr->array_buffer);
6111         return t->init(tr);
6112 }
6113
6114 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6115 {
6116         int cpu;
6117
6118         for_each_tracing_cpu(cpu)
6119                 per_cpu_ptr(buf->data, cpu)->entries = val;
6120 }
6121
6122 #ifdef CONFIG_TRACER_MAX_TRACE
6123 /* resize @tr's buffer to the size of @size_tr's entries */
6124 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6125                                         struct array_buffer *size_buf, int cpu_id)
6126 {
6127         int cpu, ret = 0;
6128
6129         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6130                 for_each_tracing_cpu(cpu) {
6131                         ret = ring_buffer_resize(trace_buf->buffer,
6132                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6133                         if (ret < 0)
6134                                 break;
6135                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6136                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6137                 }
6138         } else {
6139                 ret = ring_buffer_resize(trace_buf->buffer,
6140                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6141                 if (ret == 0)
6142                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6143                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6144         }
6145
6146         return ret;
6147 }
6148 #endif /* CONFIG_TRACER_MAX_TRACE */
6149
6150 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6151                                         unsigned long size, int cpu)
6152 {
6153         int ret;
6154
6155         /*
6156          * If kernel or user changes the size of the ring buffer
6157          * we use the size that was given, and we can forget about
6158          * expanding it later.
6159          */
6160         ring_buffer_expanded = true;
6161
6162         /* May be called before buffers are initialized */
6163         if (!tr->array_buffer.buffer)
6164                 return 0;
6165
6166         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6167         if (ret < 0)
6168                 return ret;
6169
6170 #ifdef CONFIG_TRACER_MAX_TRACE
6171         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6172             !tr->current_trace->use_max_tr)
6173                 goto out;
6174
6175         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6176         if (ret < 0) {
6177                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6178                                                      &tr->array_buffer, cpu);
6179                 if (r < 0) {
6180                         /*
6181                          * AARGH! We are left with different
6182                          * size max buffer!!!!
6183                          * The max buffer is our "snapshot" buffer.
6184                          * When a tracer needs a snapshot (one of the
6185                          * latency tracers), it swaps the max buffer
6186                          * with the saved snap shot. We succeeded to
6187                          * update the size of the main buffer, but failed to
6188                          * update the size of the max buffer. But when we tried
6189                          * to reset the main buffer to the original size, we
6190                          * failed there too. This is very unlikely to
6191                          * happen, but if it does, warn and kill all
6192                          * tracing.
6193                          */
6194                         WARN_ON(1);
6195                         tracing_disabled = 1;
6196                 }
6197                 return ret;
6198         }
6199
6200         if (cpu == RING_BUFFER_ALL_CPUS)
6201                 set_buffer_entries(&tr->max_buffer, size);
6202         else
6203                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6204
6205  out:
6206 #endif /* CONFIG_TRACER_MAX_TRACE */
6207
6208         if (cpu == RING_BUFFER_ALL_CPUS)
6209                 set_buffer_entries(&tr->array_buffer, size);
6210         else
6211                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6212
6213         return ret;
6214 }
6215
6216 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6217                                   unsigned long size, int cpu_id)
6218 {
6219         int ret;
6220
6221         mutex_lock(&trace_types_lock);
6222
6223         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6224                 /* make sure, this cpu is enabled in the mask */
6225                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6226                         ret = -EINVAL;
6227                         goto out;
6228                 }
6229         }
6230
6231         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6232         if (ret < 0)
6233                 ret = -ENOMEM;
6234
6235 out:
6236         mutex_unlock(&trace_types_lock);
6237
6238         return ret;
6239 }
6240
6241
6242 /**
6243  * tracing_update_buffers - used by tracing facility to expand ring buffers
6244  *
6245  * To save on memory when the tracing is never used on a system with it
6246  * configured in. The ring buffers are set to a minimum size. But once
6247  * a user starts to use the tracing facility, then they need to grow
6248  * to their default size.
6249  *
6250  * This function is to be called when a tracer is about to be used.
6251  */
6252 int tracing_update_buffers(void)
6253 {
6254         int ret = 0;
6255
6256         mutex_lock(&trace_types_lock);
6257         if (!ring_buffer_expanded)
6258                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6259                                                 RING_BUFFER_ALL_CPUS);
6260         mutex_unlock(&trace_types_lock);
6261
6262         return ret;
6263 }
6264
6265 struct trace_option_dentry;
6266
6267 static void
6268 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6269
6270 /*
6271  * Used to clear out the tracer before deletion of an instance.
6272  * Must have trace_types_lock held.
6273  */
6274 static void tracing_set_nop(struct trace_array *tr)
6275 {
6276         if (tr->current_trace == &nop_trace)
6277                 return;
6278         
6279         tr->current_trace->enabled--;
6280
6281         if (tr->current_trace->reset)
6282                 tr->current_trace->reset(tr);
6283
6284         tr->current_trace = &nop_trace;
6285 }
6286
6287 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6288 {
6289         /* Only enable if the directory has been created already. */
6290         if (!tr->dir)
6291                 return;
6292
6293         create_trace_option_files(tr, t);
6294 }
6295
6296 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6297 {
6298         struct tracer *t;
6299 #ifdef CONFIG_TRACER_MAX_TRACE
6300         bool had_max_tr;
6301 #endif
6302         int ret = 0;
6303
6304         mutex_lock(&trace_types_lock);
6305
6306         if (!ring_buffer_expanded) {
6307                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6308                                                 RING_BUFFER_ALL_CPUS);
6309                 if (ret < 0)
6310                         goto out;
6311                 ret = 0;
6312         }
6313
6314         for (t = trace_types; t; t = t->next) {
6315                 if (strcmp(t->name, buf) == 0)
6316                         break;
6317         }
6318         if (!t) {
6319                 ret = -EINVAL;
6320                 goto out;
6321         }
6322         if (t == tr->current_trace)
6323                 goto out;
6324
6325 #ifdef CONFIG_TRACER_SNAPSHOT
6326         if (t->use_max_tr) {
6327                 arch_spin_lock(&tr->max_lock);
6328                 if (tr->cond_snapshot)
6329                         ret = -EBUSY;
6330                 arch_spin_unlock(&tr->max_lock);
6331                 if (ret)
6332                         goto out;
6333         }
6334 #endif
6335         /* Some tracers won't work on kernel command line */
6336         if (system_state < SYSTEM_RUNNING && t->noboot) {
6337                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6338                         t->name);
6339                 goto out;
6340         }
6341
6342         /* Some tracers are only allowed for the top level buffer */
6343         if (!trace_ok_for_array(t, tr)) {
6344                 ret = -EINVAL;
6345                 goto out;
6346         }
6347
6348         /* If trace pipe files are being read, we can't change the tracer */
6349         if (tr->trace_ref) {
6350                 ret = -EBUSY;
6351                 goto out;
6352         }
6353
6354         trace_branch_disable();
6355
6356         tr->current_trace->enabled--;
6357
6358         if (tr->current_trace->reset)
6359                 tr->current_trace->reset(tr);
6360
6361         /* Current trace needs to be nop_trace before synchronize_rcu */
6362         tr->current_trace = &nop_trace;
6363
6364 #ifdef CONFIG_TRACER_MAX_TRACE
6365         had_max_tr = tr->allocated_snapshot;
6366
6367         if (had_max_tr && !t->use_max_tr) {
6368                 /*
6369                  * We need to make sure that the update_max_tr sees that
6370                  * current_trace changed to nop_trace to keep it from
6371                  * swapping the buffers after we resize it.
6372                  * The update_max_tr is called from interrupts disabled
6373                  * so a synchronized_sched() is sufficient.
6374                  */
6375                 synchronize_rcu();
6376                 free_snapshot(tr);
6377         }
6378 #endif
6379
6380 #ifdef CONFIG_TRACER_MAX_TRACE
6381         if (t->use_max_tr && !had_max_tr) {
6382                 ret = tracing_alloc_snapshot_instance(tr);
6383                 if (ret < 0)
6384                         goto out;
6385         }
6386 #endif
6387
6388         if (t->init) {
6389                 ret = tracer_init(t, tr);
6390                 if (ret)
6391                         goto out;
6392         }
6393
6394         tr->current_trace = t;
6395         tr->current_trace->enabled++;
6396         trace_branch_enable(tr);
6397  out:
6398         mutex_unlock(&trace_types_lock);
6399
6400         return ret;
6401 }
6402
6403 static ssize_t
6404 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6405                         size_t cnt, loff_t *ppos)
6406 {
6407         struct trace_array *tr = filp->private_data;
6408         char buf[MAX_TRACER_SIZE+1];
6409         int i;
6410         size_t ret;
6411         int err;
6412
6413         ret = cnt;
6414
6415         if (cnt > MAX_TRACER_SIZE)
6416                 cnt = MAX_TRACER_SIZE;
6417
6418         if (copy_from_user(buf, ubuf, cnt))
6419                 return -EFAULT;
6420
6421         buf[cnt] = 0;
6422
6423         /* strip ending whitespace. */
6424         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6425                 buf[i] = 0;
6426
6427         err = tracing_set_tracer(tr, buf);
6428         if (err)
6429                 return err;
6430
6431         *ppos += ret;
6432
6433         return ret;
6434 }
6435
6436 static ssize_t
6437 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6438                    size_t cnt, loff_t *ppos)
6439 {
6440         char buf[64];
6441         int r;
6442
6443         r = snprintf(buf, sizeof(buf), "%ld\n",
6444                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6445         if (r > sizeof(buf))
6446                 r = sizeof(buf);
6447         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6448 }
6449
6450 static ssize_t
6451 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6452                     size_t cnt, loff_t *ppos)
6453 {
6454         unsigned long val;
6455         int ret;
6456
6457         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6458         if (ret)
6459                 return ret;
6460
6461         *ptr = val * 1000;
6462
6463         return cnt;
6464 }
6465
6466 static ssize_t
6467 tracing_thresh_read(struct file *filp, char __user *ubuf,
6468                     size_t cnt, loff_t *ppos)
6469 {
6470         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6471 }
6472
6473 static ssize_t
6474 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6475                      size_t cnt, loff_t *ppos)
6476 {
6477         struct trace_array *tr = filp->private_data;
6478         int ret;
6479
6480         mutex_lock(&trace_types_lock);
6481         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6482         if (ret < 0)
6483                 goto out;
6484
6485         if (tr->current_trace->update_thresh) {
6486                 ret = tr->current_trace->update_thresh(tr);
6487                 if (ret < 0)
6488                         goto out;
6489         }
6490
6491         ret = cnt;
6492 out:
6493         mutex_unlock(&trace_types_lock);
6494
6495         return ret;
6496 }
6497
6498 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6499
6500 static ssize_t
6501 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6502                      size_t cnt, loff_t *ppos)
6503 {
6504         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6505 }
6506
6507 static ssize_t
6508 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6509                       size_t cnt, loff_t *ppos)
6510 {
6511         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6512 }
6513
6514 #endif
6515
6516 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6517 {
6518         struct trace_array *tr = inode->i_private;
6519         struct trace_iterator *iter;
6520         int ret;
6521
6522         ret = tracing_check_open_get_tr(tr);
6523         if (ret)
6524                 return ret;
6525
6526         mutex_lock(&trace_types_lock);
6527
6528         /* create a buffer to store the information to pass to userspace */
6529         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6530         if (!iter) {
6531                 ret = -ENOMEM;
6532                 __trace_array_put(tr);
6533                 goto out;
6534         }
6535
6536         trace_seq_init(&iter->seq);
6537         iter->trace = tr->current_trace;
6538
6539         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6540                 ret = -ENOMEM;
6541                 goto fail;
6542         }
6543
6544         /* trace pipe does not show start of buffer */
6545         cpumask_setall(iter->started);
6546
6547         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6548                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6549
6550         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6551         if (trace_clocks[tr->clock_id].in_ns)
6552                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6553
6554         iter->tr = tr;
6555         iter->array_buffer = &tr->array_buffer;
6556         iter->cpu_file = tracing_get_cpu(inode);
6557         mutex_init(&iter->mutex);
6558         filp->private_data = iter;
6559
6560         if (iter->trace->pipe_open)
6561                 iter->trace->pipe_open(iter);
6562
6563         nonseekable_open(inode, filp);
6564
6565         tr->trace_ref++;
6566 out:
6567         mutex_unlock(&trace_types_lock);
6568         return ret;
6569
6570 fail:
6571         kfree(iter);
6572         __trace_array_put(tr);
6573         mutex_unlock(&trace_types_lock);
6574         return ret;
6575 }
6576
6577 static int tracing_release_pipe(struct inode *inode, struct file *file)
6578 {
6579         struct trace_iterator *iter = file->private_data;
6580         struct trace_array *tr = inode->i_private;
6581
6582         mutex_lock(&trace_types_lock);
6583
6584         tr->trace_ref--;
6585
6586         if (iter->trace->pipe_close)
6587                 iter->trace->pipe_close(iter);
6588
6589         mutex_unlock(&trace_types_lock);
6590
6591         free_cpumask_var(iter->started);
6592         mutex_destroy(&iter->mutex);
6593         kfree(iter);
6594
6595         trace_array_put(tr);
6596
6597         return 0;
6598 }
6599
6600 static __poll_t
6601 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6602 {
6603         struct trace_array *tr = iter->tr;
6604
6605         /* Iterators are static, they should be filled or empty */
6606         if (trace_buffer_iter(iter, iter->cpu_file))
6607                 return EPOLLIN | EPOLLRDNORM;
6608
6609         if (tr->trace_flags & TRACE_ITER_BLOCK)
6610                 /*
6611                  * Always select as readable when in blocking mode
6612                  */
6613                 return EPOLLIN | EPOLLRDNORM;
6614         else
6615                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6616                                              filp, poll_table);
6617 }
6618
6619 static __poll_t
6620 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6621 {
6622         struct trace_iterator *iter = filp->private_data;
6623
6624         return trace_poll(iter, filp, poll_table);
6625 }
6626
6627 /* Must be called with iter->mutex held. */
6628 static int tracing_wait_pipe(struct file *filp)
6629 {
6630         struct trace_iterator *iter = filp->private_data;
6631         int ret;
6632
6633         while (trace_empty(iter)) {
6634
6635                 if ((filp->f_flags & O_NONBLOCK)) {
6636                         return -EAGAIN;
6637                 }
6638
6639                 /*
6640                  * We block until we read something and tracing is disabled.
6641                  * We still block if tracing is disabled, but we have never
6642                  * read anything. This allows a user to cat this file, and
6643                  * then enable tracing. But after we have read something,
6644                  * we give an EOF when tracing is again disabled.
6645                  *
6646                  * iter->pos will be 0 if we haven't read anything.
6647                  */
6648                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6649                         break;
6650
6651                 mutex_unlock(&iter->mutex);
6652
6653                 ret = wait_on_pipe(iter, 0);
6654
6655                 mutex_lock(&iter->mutex);
6656
6657                 if (ret)
6658                         return ret;
6659         }
6660
6661         return 1;
6662 }
6663
6664 /*
6665  * Consumer reader.
6666  */
6667 static ssize_t
6668 tracing_read_pipe(struct file *filp, char __user *ubuf,
6669                   size_t cnt, loff_t *ppos)
6670 {
6671         struct trace_iterator *iter = filp->private_data;
6672         ssize_t sret;
6673
6674         /*
6675          * Avoid more than one consumer on a single file descriptor
6676          * This is just a matter of traces coherency, the ring buffer itself
6677          * is protected.
6678          */
6679         mutex_lock(&iter->mutex);
6680
6681         /* return any leftover data */
6682         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6683         if (sret != -EBUSY)
6684                 goto out;
6685
6686         trace_seq_init(&iter->seq);
6687
6688         if (iter->trace->read) {
6689                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6690                 if (sret)
6691                         goto out;
6692         }
6693
6694 waitagain:
6695         sret = tracing_wait_pipe(filp);
6696         if (sret <= 0)
6697                 goto out;
6698
6699         /* stop when tracing is finished */
6700         if (trace_empty(iter)) {
6701                 sret = 0;
6702                 goto out;
6703         }
6704
6705         if (cnt >= PAGE_SIZE)
6706                 cnt = PAGE_SIZE - 1;
6707
6708         /* reset all but tr, trace, and overruns */
6709         memset(&iter->seq, 0,
6710                sizeof(struct trace_iterator) -
6711                offsetof(struct trace_iterator, seq));
6712         cpumask_clear(iter->started);
6713         trace_seq_init(&iter->seq);
6714         iter->pos = -1;
6715
6716         trace_event_read_lock();
6717         trace_access_lock(iter->cpu_file);
6718         while (trace_find_next_entry_inc(iter) != NULL) {
6719                 enum print_line_t ret;
6720                 int save_len = iter->seq.seq.len;
6721
6722                 ret = print_trace_line(iter);
6723                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6724                         /* don't print partial lines */
6725                         iter->seq.seq.len = save_len;
6726                         break;
6727                 }
6728                 if (ret != TRACE_TYPE_NO_CONSUME)
6729                         trace_consume(iter);
6730
6731                 if (trace_seq_used(&iter->seq) >= cnt)
6732                         break;
6733
6734                 /*
6735                  * Setting the full flag means we reached the trace_seq buffer
6736                  * size and we should leave by partial output condition above.
6737                  * One of the trace_seq_* functions is not used properly.
6738                  */
6739                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6740                           iter->ent->type);
6741         }
6742         trace_access_unlock(iter->cpu_file);
6743         trace_event_read_unlock();
6744
6745         /* Now copy what we have to the user */
6746         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6747         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6748                 trace_seq_init(&iter->seq);
6749
6750         /*
6751          * If there was nothing to send to user, in spite of consuming trace
6752          * entries, go back to wait for more entries.
6753          */
6754         if (sret == -EBUSY)
6755                 goto waitagain;
6756
6757 out:
6758         mutex_unlock(&iter->mutex);
6759
6760         return sret;
6761 }
6762
6763 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6764                                      unsigned int idx)
6765 {
6766         __free_page(spd->pages[idx]);
6767 }
6768
6769 static size_t
6770 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6771 {
6772         size_t count;
6773         int save_len;
6774         int ret;
6775
6776         /* Seq buffer is page-sized, exactly what we need. */
6777         for (;;) {
6778                 save_len = iter->seq.seq.len;
6779                 ret = print_trace_line(iter);
6780
6781                 if (trace_seq_has_overflowed(&iter->seq)) {
6782                         iter->seq.seq.len = save_len;
6783                         break;
6784                 }
6785
6786                 /*
6787                  * This should not be hit, because it should only
6788                  * be set if the iter->seq overflowed. But check it
6789                  * anyway to be safe.
6790                  */
6791                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6792                         iter->seq.seq.len = save_len;
6793                         break;
6794                 }
6795
6796                 count = trace_seq_used(&iter->seq) - save_len;
6797                 if (rem < count) {
6798                         rem = 0;
6799                         iter->seq.seq.len = save_len;
6800                         break;
6801                 }
6802
6803                 if (ret != TRACE_TYPE_NO_CONSUME)
6804                         trace_consume(iter);
6805                 rem -= count;
6806                 if (!trace_find_next_entry_inc(iter))   {
6807                         rem = 0;
6808                         iter->ent = NULL;
6809                         break;
6810                 }
6811         }
6812
6813         return rem;
6814 }
6815
6816 static ssize_t tracing_splice_read_pipe(struct file *filp,
6817                                         loff_t *ppos,
6818                                         struct pipe_inode_info *pipe,
6819                                         size_t len,
6820                                         unsigned int flags)
6821 {
6822         struct page *pages_def[PIPE_DEF_BUFFERS];
6823         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6824         struct trace_iterator *iter = filp->private_data;
6825         struct splice_pipe_desc spd = {
6826                 .pages          = pages_def,
6827                 .partial        = partial_def,
6828                 .nr_pages       = 0, /* This gets updated below. */
6829                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6830                 .ops            = &default_pipe_buf_ops,
6831                 .spd_release    = tracing_spd_release_pipe,
6832         };
6833         ssize_t ret;
6834         size_t rem;
6835         unsigned int i;
6836
6837         if (splice_grow_spd(pipe, &spd))
6838                 return -ENOMEM;
6839
6840         mutex_lock(&iter->mutex);
6841
6842         if (iter->trace->splice_read) {
6843                 ret = iter->trace->splice_read(iter, filp,
6844                                                ppos, pipe, len, flags);
6845                 if (ret)
6846                         goto out_err;
6847         }
6848
6849         ret = tracing_wait_pipe(filp);
6850         if (ret <= 0)
6851                 goto out_err;
6852
6853         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6854                 ret = -EFAULT;
6855                 goto out_err;
6856         }
6857
6858         trace_event_read_lock();
6859         trace_access_lock(iter->cpu_file);
6860
6861         /* Fill as many pages as possible. */
6862         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6863                 spd.pages[i] = alloc_page(GFP_KERNEL);
6864                 if (!spd.pages[i])
6865                         break;
6866
6867                 rem = tracing_fill_pipe_page(rem, iter);
6868
6869                 /* Copy the data into the page, so we can start over. */
6870                 ret = trace_seq_to_buffer(&iter->seq,
6871                                           page_address(spd.pages[i]),
6872                                           trace_seq_used(&iter->seq));
6873                 if (ret < 0) {
6874                         __free_page(spd.pages[i]);
6875                         break;
6876                 }
6877                 spd.partial[i].offset = 0;
6878                 spd.partial[i].len = trace_seq_used(&iter->seq);
6879
6880                 trace_seq_init(&iter->seq);
6881         }
6882
6883         trace_access_unlock(iter->cpu_file);
6884         trace_event_read_unlock();
6885         mutex_unlock(&iter->mutex);
6886
6887         spd.nr_pages = i;
6888
6889         if (i)
6890                 ret = splice_to_pipe(pipe, &spd);
6891         else
6892                 ret = 0;
6893 out:
6894         splice_shrink_spd(&spd);
6895         return ret;
6896
6897 out_err:
6898         mutex_unlock(&iter->mutex);
6899         goto out;
6900 }
6901
6902 static ssize_t
6903 tracing_entries_read(struct file *filp, char __user *ubuf,
6904                      size_t cnt, loff_t *ppos)
6905 {
6906         struct inode *inode = file_inode(filp);
6907         struct trace_array *tr = inode->i_private;
6908         int cpu = tracing_get_cpu(inode);
6909         char buf[64];
6910         int r = 0;
6911         ssize_t ret;
6912
6913         mutex_lock(&trace_types_lock);
6914
6915         if (cpu == RING_BUFFER_ALL_CPUS) {
6916                 int cpu, buf_size_same;
6917                 unsigned long size;
6918
6919                 size = 0;
6920                 buf_size_same = 1;
6921                 /* check if all cpu sizes are same */
6922                 for_each_tracing_cpu(cpu) {
6923                         /* fill in the size from first enabled cpu */
6924                         if (size == 0)
6925                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6926                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6927                                 buf_size_same = 0;
6928                                 break;
6929                         }
6930                 }
6931
6932                 if (buf_size_same) {
6933                         if (!ring_buffer_expanded)
6934                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6935                                             size >> 10,
6936                                             trace_buf_size >> 10);
6937                         else
6938                                 r = sprintf(buf, "%lu\n", size >> 10);
6939                 } else
6940                         r = sprintf(buf, "X\n");
6941         } else
6942                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6943
6944         mutex_unlock(&trace_types_lock);
6945
6946         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6947         return ret;
6948 }
6949
6950 static ssize_t
6951 tracing_entries_write(struct file *filp, const char __user *ubuf,
6952                       size_t cnt, loff_t *ppos)
6953 {
6954         struct inode *inode = file_inode(filp);
6955         struct trace_array *tr = inode->i_private;
6956         unsigned long val;
6957         int ret;
6958
6959         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6960         if (ret)
6961                 return ret;
6962
6963         /* must have at least 1 entry */
6964         if (!val)
6965                 return -EINVAL;
6966
6967         /* value is in KB */
6968         val <<= 10;
6969         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6970         if (ret < 0)
6971                 return ret;
6972
6973         *ppos += cnt;
6974
6975         return cnt;
6976 }
6977
6978 static ssize_t
6979 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6980                                 size_t cnt, loff_t *ppos)
6981 {
6982         struct trace_array *tr = filp->private_data;
6983         char buf[64];
6984         int r, cpu;
6985         unsigned long size = 0, expanded_size = 0;
6986
6987         mutex_lock(&trace_types_lock);
6988         for_each_tracing_cpu(cpu) {
6989                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6990                 if (!ring_buffer_expanded)
6991                         expanded_size += trace_buf_size >> 10;
6992         }
6993         if (ring_buffer_expanded)
6994                 r = sprintf(buf, "%lu\n", size);
6995         else
6996                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6997         mutex_unlock(&trace_types_lock);
6998
6999         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7000 }
7001
7002 static ssize_t
7003 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7004                           size_t cnt, loff_t *ppos)
7005 {
7006         /*
7007          * There is no need to read what the user has written, this function
7008          * is just to make sure that there is no error when "echo" is used
7009          */
7010
7011         *ppos += cnt;
7012
7013         return cnt;
7014 }
7015
7016 static int
7017 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7018 {
7019         struct trace_array *tr = inode->i_private;
7020
7021         /* disable tracing ? */
7022         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7023                 tracer_tracing_off(tr);
7024         /* resize the ring buffer to 0 */
7025         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7026
7027         trace_array_put(tr);
7028
7029         return 0;
7030 }
7031
7032 static ssize_t
7033 tracing_mark_write(struct file *filp, const char __user *ubuf,
7034                                         size_t cnt, loff_t *fpos)
7035 {
7036         struct trace_array *tr = filp->private_data;
7037         struct ring_buffer_event *event;
7038         enum event_trigger_type tt = ETT_NONE;
7039         struct trace_buffer *buffer;
7040         struct print_entry *entry;
7041         ssize_t written;
7042         int size;
7043         int len;
7044
7045 /* Used in tracing_mark_raw_write() as well */
7046 #define FAULTED_STR "<faulted>"
7047 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7048
7049         if (tracing_disabled)
7050                 return -EINVAL;
7051
7052         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7053                 return -EINVAL;
7054
7055         if (cnt > TRACE_BUF_SIZE)
7056                 cnt = TRACE_BUF_SIZE;
7057
7058         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7059
7060         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7061
7062         /* If less than "<faulted>", then make sure we can still add that */
7063         if (cnt < FAULTED_SIZE)
7064                 size += FAULTED_SIZE - cnt;
7065
7066         buffer = tr->array_buffer.buffer;
7067         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7068                                             tracing_gen_ctx());
7069         if (unlikely(!event))
7070                 /* Ring buffer disabled, return as if not open for write */
7071                 return -EBADF;
7072
7073         entry = ring_buffer_event_data(event);
7074         entry->ip = _THIS_IP_;
7075
7076         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7077         if (len) {
7078                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7079                 cnt = FAULTED_SIZE;
7080                 written = -EFAULT;
7081         } else
7082                 written = cnt;
7083
7084         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7085                 /* do not add \n before testing triggers, but add \0 */
7086                 entry->buf[cnt] = '\0';
7087                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7088         }
7089
7090         if (entry->buf[cnt - 1] != '\n') {
7091                 entry->buf[cnt] = '\n';
7092                 entry->buf[cnt + 1] = '\0';
7093         } else
7094                 entry->buf[cnt] = '\0';
7095
7096         if (static_branch_unlikely(&trace_marker_exports_enabled))
7097                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7098         __buffer_unlock_commit(buffer, event);
7099
7100         if (tt)
7101                 event_triggers_post_call(tr->trace_marker_file, tt);
7102
7103         if (written > 0)
7104                 *fpos += written;
7105
7106         return written;
7107 }
7108
7109 /* Limit it for now to 3K (including tag) */
7110 #define RAW_DATA_MAX_SIZE (1024*3)
7111
7112 static ssize_t
7113 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7114                                         size_t cnt, loff_t *fpos)
7115 {
7116         struct trace_array *tr = filp->private_data;
7117         struct ring_buffer_event *event;
7118         struct trace_buffer *buffer;
7119         struct raw_data_entry *entry;
7120         ssize_t written;
7121         int size;
7122         int len;
7123
7124 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7125
7126         if (tracing_disabled)
7127                 return -EINVAL;
7128
7129         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7130                 return -EINVAL;
7131
7132         /* The marker must at least have a tag id */
7133         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7134                 return -EINVAL;
7135
7136         if (cnt > TRACE_BUF_SIZE)
7137                 cnt = TRACE_BUF_SIZE;
7138
7139         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7140
7141         size = sizeof(*entry) + cnt;
7142         if (cnt < FAULT_SIZE_ID)
7143                 size += FAULT_SIZE_ID - cnt;
7144
7145         buffer = tr->array_buffer.buffer;
7146         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7147                                             tracing_gen_ctx());
7148         if (!event)
7149                 /* Ring buffer disabled, return as if not open for write */
7150                 return -EBADF;
7151
7152         entry = ring_buffer_event_data(event);
7153
7154         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7155         if (len) {
7156                 entry->id = -1;
7157                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7158                 written = -EFAULT;
7159         } else
7160                 written = cnt;
7161
7162         __buffer_unlock_commit(buffer, event);
7163
7164         if (written > 0)
7165                 *fpos += written;
7166
7167         return written;
7168 }
7169
7170 static int tracing_clock_show(struct seq_file *m, void *v)
7171 {
7172         struct trace_array *tr = m->private;
7173         int i;
7174
7175         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7176                 seq_printf(m,
7177                         "%s%s%s%s", i ? " " : "",
7178                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7179                         i == tr->clock_id ? "]" : "");
7180         seq_putc(m, '\n');
7181
7182         return 0;
7183 }
7184
7185 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7186 {
7187         int i;
7188
7189         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7190                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7191                         break;
7192         }
7193         if (i == ARRAY_SIZE(trace_clocks))
7194                 return -EINVAL;
7195
7196         mutex_lock(&trace_types_lock);
7197
7198         tr->clock_id = i;
7199
7200         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7201
7202         /*
7203          * New clock may not be consistent with the previous clock.
7204          * Reset the buffer so that it doesn't have incomparable timestamps.
7205          */
7206         tracing_reset_online_cpus(&tr->array_buffer);
7207
7208 #ifdef CONFIG_TRACER_MAX_TRACE
7209         if (tr->max_buffer.buffer)
7210                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7211         tracing_reset_online_cpus(&tr->max_buffer);
7212 #endif
7213
7214         mutex_unlock(&trace_types_lock);
7215
7216         return 0;
7217 }
7218
7219 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7220                                    size_t cnt, loff_t *fpos)
7221 {
7222         struct seq_file *m = filp->private_data;
7223         struct trace_array *tr = m->private;
7224         char buf[64];
7225         const char *clockstr;
7226         int ret;
7227
7228         if (cnt >= sizeof(buf))
7229                 return -EINVAL;
7230
7231         if (copy_from_user(buf, ubuf, cnt))
7232                 return -EFAULT;
7233
7234         buf[cnt] = 0;
7235
7236         clockstr = strstrip(buf);
7237
7238         ret = tracing_set_clock(tr, clockstr);
7239         if (ret)
7240                 return ret;
7241
7242         *fpos += cnt;
7243
7244         return cnt;
7245 }
7246
7247 static int tracing_clock_open(struct inode *inode, struct file *file)
7248 {
7249         struct trace_array *tr = inode->i_private;
7250         int ret;
7251
7252         ret = tracing_check_open_get_tr(tr);
7253         if (ret)
7254                 return ret;
7255
7256         ret = single_open(file, tracing_clock_show, inode->i_private);
7257         if (ret < 0)
7258                 trace_array_put(tr);
7259
7260         return ret;
7261 }
7262
7263 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7264 {
7265         struct trace_array *tr = m->private;
7266
7267         mutex_lock(&trace_types_lock);
7268
7269         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7270                 seq_puts(m, "delta [absolute]\n");
7271         else
7272                 seq_puts(m, "[delta] absolute\n");
7273
7274         mutex_unlock(&trace_types_lock);
7275
7276         return 0;
7277 }
7278
7279 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7280 {
7281         struct trace_array *tr = inode->i_private;
7282         int ret;
7283
7284         ret = tracing_check_open_get_tr(tr);
7285         if (ret)
7286                 return ret;
7287
7288         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7289         if (ret < 0)
7290                 trace_array_put(tr);
7291
7292         return ret;
7293 }
7294
7295 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7296 {
7297         if (rbe == this_cpu_read(trace_buffered_event))
7298                 return ring_buffer_time_stamp(buffer);
7299
7300         return ring_buffer_event_time_stamp(buffer, rbe);
7301 }
7302
7303 /*
7304  * Set or disable using the per CPU trace_buffer_event when possible.
7305  */
7306 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7307 {
7308         int ret = 0;
7309
7310         mutex_lock(&trace_types_lock);
7311
7312         if (set && tr->no_filter_buffering_ref++)
7313                 goto out;
7314
7315         if (!set) {
7316                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7317                         ret = -EINVAL;
7318                         goto out;
7319                 }
7320
7321                 --tr->no_filter_buffering_ref;
7322         }
7323  out:
7324         mutex_unlock(&trace_types_lock);
7325
7326         return ret;
7327 }
7328
7329 struct ftrace_buffer_info {
7330         struct trace_iterator   iter;
7331         void                    *spare;
7332         unsigned int            spare_cpu;
7333         unsigned int            read;
7334 };
7335
7336 #ifdef CONFIG_TRACER_SNAPSHOT
7337 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7338 {
7339         struct trace_array *tr = inode->i_private;
7340         struct trace_iterator *iter;
7341         struct seq_file *m;
7342         int ret;
7343
7344         ret = tracing_check_open_get_tr(tr);
7345         if (ret)
7346                 return ret;
7347
7348         if (file->f_mode & FMODE_READ) {
7349                 iter = __tracing_open(inode, file, true);
7350                 if (IS_ERR(iter))
7351                         ret = PTR_ERR(iter);
7352         } else {
7353                 /* Writes still need the seq_file to hold the private data */
7354                 ret = -ENOMEM;
7355                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7356                 if (!m)
7357                         goto out;
7358                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7359                 if (!iter) {
7360                         kfree(m);
7361                         goto out;
7362                 }
7363                 ret = 0;
7364
7365                 iter->tr = tr;
7366                 iter->array_buffer = &tr->max_buffer;
7367                 iter->cpu_file = tracing_get_cpu(inode);
7368                 m->private = iter;
7369                 file->private_data = m;
7370         }
7371 out:
7372         if (ret < 0)
7373                 trace_array_put(tr);
7374
7375         return ret;
7376 }
7377
7378 static ssize_t
7379 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7380                        loff_t *ppos)
7381 {
7382         struct seq_file *m = filp->private_data;
7383         struct trace_iterator *iter = m->private;
7384         struct trace_array *tr = iter->tr;
7385         unsigned long val;
7386         int ret;
7387
7388         ret = tracing_update_buffers();
7389         if (ret < 0)
7390                 return ret;
7391
7392         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7393         if (ret)
7394                 return ret;
7395
7396         mutex_lock(&trace_types_lock);
7397
7398         if (tr->current_trace->use_max_tr) {
7399                 ret = -EBUSY;
7400                 goto out;
7401         }
7402
7403         arch_spin_lock(&tr->max_lock);
7404         if (tr->cond_snapshot)
7405                 ret = -EBUSY;
7406         arch_spin_unlock(&tr->max_lock);
7407         if (ret)
7408                 goto out;
7409
7410         switch (val) {
7411         case 0:
7412                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7413                         ret = -EINVAL;
7414                         break;
7415                 }
7416                 if (tr->allocated_snapshot)
7417                         free_snapshot(tr);
7418                 break;
7419         case 1:
7420 /* Only allow per-cpu swap if the ring buffer supports it */
7421 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7422                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7423                         ret = -EINVAL;
7424                         break;
7425                 }
7426 #endif
7427                 if (tr->allocated_snapshot)
7428                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7429                                         &tr->array_buffer, iter->cpu_file);
7430                 else
7431                         ret = tracing_alloc_snapshot_instance(tr);
7432                 if (ret < 0)
7433                         break;
7434                 local_irq_disable();
7435                 /* Now, we're going to swap */
7436                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7437                         update_max_tr(tr, current, smp_processor_id(), NULL);
7438                 else
7439                         update_max_tr_single(tr, current, iter->cpu_file);
7440                 local_irq_enable();
7441                 break;
7442         default:
7443                 if (tr->allocated_snapshot) {
7444                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7445                                 tracing_reset_online_cpus(&tr->max_buffer);
7446                         else
7447                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7448                 }
7449                 break;
7450         }
7451
7452         if (ret >= 0) {
7453                 *ppos += cnt;
7454                 ret = cnt;
7455         }
7456 out:
7457         mutex_unlock(&trace_types_lock);
7458         return ret;
7459 }
7460
7461 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7462 {
7463         struct seq_file *m = file->private_data;
7464         int ret;
7465
7466         ret = tracing_release(inode, file);
7467
7468         if (file->f_mode & FMODE_READ)
7469                 return ret;
7470
7471         /* If write only, the seq_file is just a stub */
7472         if (m)
7473                 kfree(m->private);
7474         kfree(m);
7475
7476         return 0;
7477 }
7478
7479 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7480 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7481                                     size_t count, loff_t *ppos);
7482 static int tracing_buffers_release(struct inode *inode, struct file *file);
7483 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7484                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7485
7486 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7487 {
7488         struct ftrace_buffer_info *info;
7489         int ret;
7490
7491         /* The following checks for tracefs lockdown */
7492         ret = tracing_buffers_open(inode, filp);
7493         if (ret < 0)
7494                 return ret;
7495
7496         info = filp->private_data;
7497
7498         if (info->iter.trace->use_max_tr) {
7499                 tracing_buffers_release(inode, filp);
7500                 return -EBUSY;
7501         }
7502
7503         info->iter.snapshot = true;
7504         info->iter.array_buffer = &info->iter.tr->max_buffer;
7505
7506         return ret;
7507 }
7508
7509 #endif /* CONFIG_TRACER_SNAPSHOT */
7510
7511
7512 static const struct file_operations tracing_thresh_fops = {
7513         .open           = tracing_open_generic,
7514         .read           = tracing_thresh_read,
7515         .write          = tracing_thresh_write,
7516         .llseek         = generic_file_llseek,
7517 };
7518
7519 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7520 static const struct file_operations tracing_max_lat_fops = {
7521         .open           = tracing_open_generic,
7522         .read           = tracing_max_lat_read,
7523         .write          = tracing_max_lat_write,
7524         .llseek         = generic_file_llseek,
7525 };
7526 #endif
7527
7528 static const struct file_operations set_tracer_fops = {
7529         .open           = tracing_open_generic,
7530         .read           = tracing_set_trace_read,
7531         .write          = tracing_set_trace_write,
7532         .llseek         = generic_file_llseek,
7533 };
7534
7535 static const struct file_operations tracing_pipe_fops = {
7536         .open           = tracing_open_pipe,
7537         .poll           = tracing_poll_pipe,
7538         .read           = tracing_read_pipe,
7539         .splice_read    = tracing_splice_read_pipe,
7540         .release        = tracing_release_pipe,
7541         .llseek         = no_llseek,
7542 };
7543
7544 static const struct file_operations tracing_entries_fops = {
7545         .open           = tracing_open_generic_tr,
7546         .read           = tracing_entries_read,
7547         .write          = tracing_entries_write,
7548         .llseek         = generic_file_llseek,
7549         .release        = tracing_release_generic_tr,
7550 };
7551
7552 static const struct file_operations tracing_total_entries_fops = {
7553         .open           = tracing_open_generic_tr,
7554         .read           = tracing_total_entries_read,
7555         .llseek         = generic_file_llseek,
7556         .release        = tracing_release_generic_tr,
7557 };
7558
7559 static const struct file_operations tracing_free_buffer_fops = {
7560         .open           = tracing_open_generic_tr,
7561         .write          = tracing_free_buffer_write,
7562         .release        = tracing_free_buffer_release,
7563 };
7564
7565 static const struct file_operations tracing_mark_fops = {
7566         .open           = tracing_open_generic_tr,
7567         .write          = tracing_mark_write,
7568         .llseek         = generic_file_llseek,
7569         .release        = tracing_release_generic_tr,
7570 };
7571
7572 static const struct file_operations tracing_mark_raw_fops = {
7573         .open           = tracing_open_generic_tr,
7574         .write          = tracing_mark_raw_write,
7575         .llseek         = generic_file_llseek,
7576         .release        = tracing_release_generic_tr,
7577 };
7578
7579 static const struct file_operations trace_clock_fops = {
7580         .open           = tracing_clock_open,
7581         .read           = seq_read,
7582         .llseek         = seq_lseek,
7583         .release        = tracing_single_release_tr,
7584         .write          = tracing_clock_write,
7585 };
7586
7587 static const struct file_operations trace_time_stamp_mode_fops = {
7588         .open           = tracing_time_stamp_mode_open,
7589         .read           = seq_read,
7590         .llseek         = seq_lseek,
7591         .release        = tracing_single_release_tr,
7592 };
7593
7594 #ifdef CONFIG_TRACER_SNAPSHOT
7595 static const struct file_operations snapshot_fops = {
7596         .open           = tracing_snapshot_open,
7597         .read           = seq_read,
7598         .write          = tracing_snapshot_write,
7599         .llseek         = tracing_lseek,
7600         .release        = tracing_snapshot_release,
7601 };
7602
7603 static const struct file_operations snapshot_raw_fops = {
7604         .open           = snapshot_raw_open,
7605         .read           = tracing_buffers_read,
7606         .release        = tracing_buffers_release,
7607         .splice_read    = tracing_buffers_splice_read,
7608         .llseek         = no_llseek,
7609 };
7610
7611 #endif /* CONFIG_TRACER_SNAPSHOT */
7612
7613 /*
7614  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7615  * @filp: The active open file structure
7616  * @ubuf: The userspace provided buffer to read value into
7617  * @cnt: The maximum number of bytes to read
7618  * @ppos: The current "file" position
7619  *
7620  * This function implements the write interface for a struct trace_min_max_param.
7621  * The filp->private_data must point to a trace_min_max_param structure that
7622  * defines where to write the value, the min and the max acceptable values,
7623  * and a lock to protect the write.
7624  */
7625 static ssize_t
7626 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7627 {
7628         struct trace_min_max_param *param = filp->private_data;
7629         u64 val;
7630         int err;
7631
7632         if (!param)
7633                 return -EFAULT;
7634
7635         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7636         if (err)
7637                 return err;
7638
7639         if (param->lock)
7640                 mutex_lock(param->lock);
7641
7642         if (param->min && val < *param->min)
7643                 err = -EINVAL;
7644
7645         if (param->max && val > *param->max)
7646                 err = -EINVAL;
7647
7648         if (!err)
7649                 *param->val = val;
7650
7651         if (param->lock)
7652                 mutex_unlock(param->lock);
7653
7654         if (err)
7655                 return err;
7656
7657         return cnt;
7658 }
7659
7660 /*
7661  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7662  * @filp: The active open file structure
7663  * @ubuf: The userspace provided buffer to read value into
7664  * @cnt: The maximum number of bytes to read
7665  * @ppos: The current "file" position
7666  *
7667  * This function implements the read interface for a struct trace_min_max_param.
7668  * The filp->private_data must point to a trace_min_max_param struct with valid
7669  * data.
7670  */
7671 static ssize_t
7672 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7673 {
7674         struct trace_min_max_param *param = filp->private_data;
7675         char buf[U64_STR_SIZE];
7676         int len;
7677         u64 val;
7678
7679         if (!param)
7680                 return -EFAULT;
7681
7682         val = *param->val;
7683
7684         if (cnt > sizeof(buf))
7685                 cnt = sizeof(buf);
7686
7687         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7688
7689         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7690 }
7691
7692 const struct file_operations trace_min_max_fops = {
7693         .open           = tracing_open_generic,
7694         .read           = trace_min_max_read,
7695         .write          = trace_min_max_write,
7696 };
7697
7698 #define TRACING_LOG_ERRS_MAX    8
7699 #define TRACING_LOG_LOC_MAX     128
7700
7701 #define CMD_PREFIX "  Command: "
7702
7703 struct err_info {
7704         const char      **errs; /* ptr to loc-specific array of err strings */
7705         u8              type;   /* index into errs -> specific err string */
7706         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7707         u64             ts;
7708 };
7709
7710 struct tracing_log_err {
7711         struct list_head        list;
7712         struct err_info         info;
7713         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7714         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7715 };
7716
7717 static DEFINE_MUTEX(tracing_err_log_lock);
7718
7719 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7720 {
7721         struct tracing_log_err *err;
7722
7723         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7724                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7725                 if (!err)
7726                         err = ERR_PTR(-ENOMEM);
7727                 tr->n_err_log_entries++;
7728
7729                 return err;
7730         }
7731
7732         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7733         list_del(&err->list);
7734
7735         return err;
7736 }
7737
7738 /**
7739  * err_pos - find the position of a string within a command for error careting
7740  * @cmd: The tracing command that caused the error
7741  * @str: The string to position the caret at within @cmd
7742  *
7743  * Finds the position of the first occurrence of @str within @cmd.  The
7744  * return value can be passed to tracing_log_err() for caret placement
7745  * within @cmd.
7746  *
7747  * Returns the index within @cmd of the first occurrence of @str or 0
7748  * if @str was not found.
7749  */
7750 unsigned int err_pos(char *cmd, const char *str)
7751 {
7752         char *found;
7753
7754         if (WARN_ON(!strlen(cmd)))
7755                 return 0;
7756
7757         found = strstr(cmd, str);
7758         if (found)
7759                 return found - cmd;
7760
7761         return 0;
7762 }
7763
7764 /**
7765  * tracing_log_err - write an error to the tracing error log
7766  * @tr: The associated trace array for the error (NULL for top level array)
7767  * @loc: A string describing where the error occurred
7768  * @cmd: The tracing command that caused the error
7769  * @errs: The array of loc-specific static error strings
7770  * @type: The index into errs[], which produces the specific static err string
7771  * @pos: The position the caret should be placed in the cmd
7772  *
7773  * Writes an error into tracing/error_log of the form:
7774  *
7775  * <loc>: error: <text>
7776  *   Command: <cmd>
7777  *              ^
7778  *
7779  * tracing/error_log is a small log file containing the last
7780  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7781  * unless there has been a tracing error, and the error log can be
7782  * cleared and have its memory freed by writing the empty string in
7783  * truncation mode to it i.e. echo > tracing/error_log.
7784  *
7785  * NOTE: the @errs array along with the @type param are used to
7786  * produce a static error string - this string is not copied and saved
7787  * when the error is logged - only a pointer to it is saved.  See
7788  * existing callers for examples of how static strings are typically
7789  * defined for use with tracing_log_err().
7790  */
7791 void tracing_log_err(struct trace_array *tr,
7792                      const char *loc, const char *cmd,
7793                      const char **errs, u8 type, u8 pos)
7794 {
7795         struct tracing_log_err *err;
7796
7797         if (!tr)
7798                 tr = &global_trace;
7799
7800         mutex_lock(&tracing_err_log_lock);
7801         err = get_tracing_log_err(tr);
7802         if (PTR_ERR(err) == -ENOMEM) {
7803                 mutex_unlock(&tracing_err_log_lock);
7804                 return;
7805         }
7806
7807         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7808         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7809
7810         err->info.errs = errs;
7811         err->info.type = type;
7812         err->info.pos = pos;
7813         err->info.ts = local_clock();
7814
7815         list_add_tail(&err->list, &tr->err_log);
7816         mutex_unlock(&tracing_err_log_lock);
7817 }
7818
7819 static void clear_tracing_err_log(struct trace_array *tr)
7820 {
7821         struct tracing_log_err *err, *next;
7822
7823         mutex_lock(&tracing_err_log_lock);
7824         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7825                 list_del(&err->list);
7826                 kfree(err);
7827         }
7828
7829         tr->n_err_log_entries = 0;
7830         mutex_unlock(&tracing_err_log_lock);
7831 }
7832
7833 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7834 {
7835         struct trace_array *tr = m->private;
7836
7837         mutex_lock(&tracing_err_log_lock);
7838
7839         return seq_list_start(&tr->err_log, *pos);
7840 }
7841
7842 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7843 {
7844         struct trace_array *tr = m->private;
7845
7846         return seq_list_next(v, &tr->err_log, pos);
7847 }
7848
7849 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7850 {
7851         mutex_unlock(&tracing_err_log_lock);
7852 }
7853
7854 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7855 {
7856         u8 i;
7857
7858         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7859                 seq_putc(m, ' ');
7860         for (i = 0; i < pos; i++)
7861                 seq_putc(m, ' ');
7862         seq_puts(m, "^\n");
7863 }
7864
7865 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7866 {
7867         struct tracing_log_err *err = v;
7868
7869         if (err) {
7870                 const char *err_text = err->info.errs[err->info.type];
7871                 u64 sec = err->info.ts;
7872                 u32 nsec;
7873
7874                 nsec = do_div(sec, NSEC_PER_SEC);
7875                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7876                            err->loc, err_text);
7877                 seq_printf(m, "%s", err->cmd);
7878                 tracing_err_log_show_pos(m, err->info.pos);
7879         }
7880
7881         return 0;
7882 }
7883
7884 static const struct seq_operations tracing_err_log_seq_ops = {
7885         .start  = tracing_err_log_seq_start,
7886         .next   = tracing_err_log_seq_next,
7887         .stop   = tracing_err_log_seq_stop,
7888         .show   = tracing_err_log_seq_show
7889 };
7890
7891 static int tracing_err_log_open(struct inode *inode, struct file *file)
7892 {
7893         struct trace_array *tr = inode->i_private;
7894         int ret = 0;
7895
7896         ret = tracing_check_open_get_tr(tr);
7897         if (ret)
7898                 return ret;
7899
7900         /* If this file was opened for write, then erase contents */
7901         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7902                 clear_tracing_err_log(tr);
7903
7904         if (file->f_mode & FMODE_READ) {
7905                 ret = seq_open(file, &tracing_err_log_seq_ops);
7906                 if (!ret) {
7907                         struct seq_file *m = file->private_data;
7908                         m->private = tr;
7909                 } else {
7910                         trace_array_put(tr);
7911                 }
7912         }
7913         return ret;
7914 }
7915
7916 static ssize_t tracing_err_log_write(struct file *file,
7917                                      const char __user *buffer,
7918                                      size_t count, loff_t *ppos)
7919 {
7920         return count;
7921 }
7922
7923 static int tracing_err_log_release(struct inode *inode, struct file *file)
7924 {
7925         struct trace_array *tr = inode->i_private;
7926
7927         trace_array_put(tr);
7928
7929         if (file->f_mode & FMODE_READ)
7930                 seq_release(inode, file);
7931
7932         return 0;
7933 }
7934
7935 static const struct file_operations tracing_err_log_fops = {
7936         .open           = tracing_err_log_open,
7937         .write          = tracing_err_log_write,
7938         .read           = seq_read,
7939         .llseek         = seq_lseek,
7940         .release        = tracing_err_log_release,
7941 };
7942
7943 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7944 {
7945         struct trace_array *tr = inode->i_private;
7946         struct ftrace_buffer_info *info;
7947         int ret;
7948
7949         ret = tracing_check_open_get_tr(tr);
7950         if (ret)
7951                 return ret;
7952
7953         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7954         if (!info) {
7955                 trace_array_put(tr);
7956                 return -ENOMEM;
7957         }
7958
7959         mutex_lock(&trace_types_lock);
7960
7961         info->iter.tr           = tr;
7962         info->iter.cpu_file     = tracing_get_cpu(inode);
7963         info->iter.trace        = tr->current_trace;
7964         info->iter.array_buffer = &tr->array_buffer;
7965         info->spare             = NULL;
7966         /* Force reading ring buffer for first read */
7967         info->read              = (unsigned int)-1;
7968
7969         filp->private_data = info;
7970
7971         tr->trace_ref++;
7972
7973         mutex_unlock(&trace_types_lock);
7974
7975         ret = nonseekable_open(inode, filp);
7976         if (ret < 0)
7977                 trace_array_put(tr);
7978
7979         return ret;
7980 }
7981
7982 static __poll_t
7983 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7984 {
7985         struct ftrace_buffer_info *info = filp->private_data;
7986         struct trace_iterator *iter = &info->iter;
7987
7988         return trace_poll(iter, filp, poll_table);
7989 }
7990
7991 static ssize_t
7992 tracing_buffers_read(struct file *filp, char __user *ubuf,
7993                      size_t count, loff_t *ppos)
7994 {
7995         struct ftrace_buffer_info *info = filp->private_data;
7996         struct trace_iterator *iter = &info->iter;
7997         ssize_t ret = 0;
7998         ssize_t size;
7999
8000         if (!count)
8001                 return 0;
8002
8003 #ifdef CONFIG_TRACER_MAX_TRACE
8004         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8005                 return -EBUSY;
8006 #endif
8007
8008         if (!info->spare) {
8009                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8010                                                           iter->cpu_file);
8011                 if (IS_ERR(info->spare)) {
8012                         ret = PTR_ERR(info->spare);
8013                         info->spare = NULL;
8014                 } else {
8015                         info->spare_cpu = iter->cpu_file;
8016                 }
8017         }
8018         if (!info->spare)
8019                 return ret;
8020
8021         /* Do we have previous read data to read? */
8022         if (info->read < PAGE_SIZE)
8023                 goto read;
8024
8025  again:
8026         trace_access_lock(iter->cpu_file);
8027         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8028                                     &info->spare,
8029                                     count,
8030                                     iter->cpu_file, 0);
8031         trace_access_unlock(iter->cpu_file);
8032
8033         if (ret < 0) {
8034                 if (trace_empty(iter)) {
8035                         if ((filp->f_flags & O_NONBLOCK))
8036                                 return -EAGAIN;
8037
8038                         ret = wait_on_pipe(iter, 0);
8039                         if (ret)
8040                                 return ret;
8041
8042                         goto again;
8043                 }
8044                 return 0;
8045         }
8046
8047         info->read = 0;
8048  read:
8049         size = PAGE_SIZE - info->read;
8050         if (size > count)
8051                 size = count;
8052
8053         ret = copy_to_user(ubuf, info->spare + info->read, size);
8054         if (ret == size)
8055                 return -EFAULT;
8056
8057         size -= ret;
8058
8059         *ppos += size;
8060         info->read += size;
8061
8062         return size;
8063 }
8064
8065 static int tracing_buffers_release(struct inode *inode, struct file *file)
8066 {
8067         struct ftrace_buffer_info *info = file->private_data;
8068         struct trace_iterator *iter = &info->iter;
8069
8070         mutex_lock(&trace_types_lock);
8071
8072         iter->tr->trace_ref--;
8073
8074         __trace_array_put(iter->tr);
8075
8076         if (info->spare)
8077                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8078                                            info->spare_cpu, info->spare);
8079         kvfree(info);
8080
8081         mutex_unlock(&trace_types_lock);
8082
8083         return 0;
8084 }
8085
8086 struct buffer_ref {
8087         struct trace_buffer     *buffer;
8088         void                    *page;
8089         int                     cpu;
8090         refcount_t              refcount;
8091 };
8092
8093 static void buffer_ref_release(struct buffer_ref *ref)
8094 {
8095         if (!refcount_dec_and_test(&ref->refcount))
8096                 return;
8097         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8098         kfree(ref);
8099 }
8100
8101 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8102                                     struct pipe_buffer *buf)
8103 {
8104         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8105
8106         buffer_ref_release(ref);
8107         buf->private = 0;
8108 }
8109
8110 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8111                                 struct pipe_buffer *buf)
8112 {
8113         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8114
8115         if (refcount_read(&ref->refcount) > INT_MAX/2)
8116                 return false;
8117
8118         refcount_inc(&ref->refcount);
8119         return true;
8120 }
8121
8122 /* Pipe buffer operations for a buffer. */
8123 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8124         .release                = buffer_pipe_buf_release,
8125         .get                    = buffer_pipe_buf_get,
8126 };
8127
8128 /*
8129  * Callback from splice_to_pipe(), if we need to release some pages
8130  * at the end of the spd in case we error'ed out in filling the pipe.
8131  */
8132 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8133 {
8134         struct buffer_ref *ref =
8135                 (struct buffer_ref *)spd->partial[i].private;
8136
8137         buffer_ref_release(ref);
8138         spd->partial[i].private = 0;
8139 }
8140
8141 static ssize_t
8142 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8143                             struct pipe_inode_info *pipe, size_t len,
8144                             unsigned int flags)
8145 {
8146         struct ftrace_buffer_info *info = file->private_data;
8147         struct trace_iterator *iter = &info->iter;
8148         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8149         struct page *pages_def[PIPE_DEF_BUFFERS];
8150         struct splice_pipe_desc spd = {
8151                 .pages          = pages_def,
8152                 .partial        = partial_def,
8153                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8154                 .ops            = &buffer_pipe_buf_ops,
8155                 .spd_release    = buffer_spd_release,
8156         };
8157         struct buffer_ref *ref;
8158         int entries, i;
8159         ssize_t ret = 0;
8160
8161 #ifdef CONFIG_TRACER_MAX_TRACE
8162         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8163                 return -EBUSY;
8164 #endif
8165
8166         if (*ppos & (PAGE_SIZE - 1))
8167                 return -EINVAL;
8168
8169         if (len & (PAGE_SIZE - 1)) {
8170                 if (len < PAGE_SIZE)
8171                         return -EINVAL;
8172                 len &= PAGE_MASK;
8173         }
8174
8175         if (splice_grow_spd(pipe, &spd))
8176                 return -ENOMEM;
8177
8178  again:
8179         trace_access_lock(iter->cpu_file);
8180         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8181
8182         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8183                 struct page *page;
8184                 int r;
8185
8186                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8187                 if (!ref) {
8188                         ret = -ENOMEM;
8189                         break;
8190                 }
8191
8192                 refcount_set(&ref->refcount, 1);
8193                 ref->buffer = iter->array_buffer->buffer;
8194                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8195                 if (IS_ERR(ref->page)) {
8196                         ret = PTR_ERR(ref->page);
8197                         ref->page = NULL;
8198                         kfree(ref);
8199                         break;
8200                 }
8201                 ref->cpu = iter->cpu_file;
8202
8203                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8204                                           len, iter->cpu_file, 1);
8205                 if (r < 0) {
8206                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8207                                                    ref->page);
8208                         kfree(ref);
8209                         break;
8210                 }
8211
8212                 page = virt_to_page(ref->page);
8213
8214                 spd.pages[i] = page;
8215                 spd.partial[i].len = PAGE_SIZE;
8216                 spd.partial[i].offset = 0;
8217                 spd.partial[i].private = (unsigned long)ref;
8218                 spd.nr_pages++;
8219                 *ppos += PAGE_SIZE;
8220
8221                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8222         }
8223
8224         trace_access_unlock(iter->cpu_file);
8225         spd.nr_pages = i;
8226
8227         /* did we read anything? */
8228         if (!spd.nr_pages) {
8229                 if (ret)
8230                         goto out;
8231
8232                 ret = -EAGAIN;
8233                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8234                         goto out;
8235
8236                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8237                 if (ret)
8238                         goto out;
8239
8240                 goto again;
8241         }
8242
8243         ret = splice_to_pipe(pipe, &spd);
8244 out:
8245         splice_shrink_spd(&spd);
8246
8247         return ret;
8248 }
8249
8250 static const struct file_operations tracing_buffers_fops = {
8251         .open           = tracing_buffers_open,
8252         .read           = tracing_buffers_read,
8253         .poll           = tracing_buffers_poll,
8254         .release        = tracing_buffers_release,
8255         .splice_read    = tracing_buffers_splice_read,
8256         .llseek         = no_llseek,
8257 };
8258
8259 static ssize_t
8260 tracing_stats_read(struct file *filp, char __user *ubuf,
8261                    size_t count, loff_t *ppos)
8262 {
8263         struct inode *inode = file_inode(filp);
8264         struct trace_array *tr = inode->i_private;
8265         struct array_buffer *trace_buf = &tr->array_buffer;
8266         int cpu = tracing_get_cpu(inode);
8267         struct trace_seq *s;
8268         unsigned long cnt;
8269         unsigned long long t;
8270         unsigned long usec_rem;
8271
8272         s = kmalloc(sizeof(*s), GFP_KERNEL);
8273         if (!s)
8274                 return -ENOMEM;
8275
8276         trace_seq_init(s);
8277
8278         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8279         trace_seq_printf(s, "entries: %ld\n", cnt);
8280
8281         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8282         trace_seq_printf(s, "overrun: %ld\n", cnt);
8283
8284         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8285         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8286
8287         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8288         trace_seq_printf(s, "bytes: %ld\n", cnt);
8289
8290         if (trace_clocks[tr->clock_id].in_ns) {
8291                 /* local or global for trace_clock */
8292                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8293                 usec_rem = do_div(t, USEC_PER_SEC);
8294                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8295                                                                 t, usec_rem);
8296
8297                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8298                 usec_rem = do_div(t, USEC_PER_SEC);
8299                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8300         } else {
8301                 /* counter or tsc mode for trace_clock */
8302                 trace_seq_printf(s, "oldest event ts: %llu\n",
8303                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8304
8305                 trace_seq_printf(s, "now ts: %llu\n",
8306                                 ring_buffer_time_stamp(trace_buf->buffer));
8307         }
8308
8309         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8310         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8311
8312         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8313         trace_seq_printf(s, "read events: %ld\n", cnt);
8314
8315         count = simple_read_from_buffer(ubuf, count, ppos,
8316                                         s->buffer, trace_seq_used(s));
8317
8318         kfree(s);
8319
8320         return count;
8321 }
8322
8323 static const struct file_operations tracing_stats_fops = {
8324         .open           = tracing_open_generic_tr,
8325         .read           = tracing_stats_read,
8326         .llseek         = generic_file_llseek,
8327         .release        = tracing_release_generic_tr,
8328 };
8329
8330 #ifdef CONFIG_DYNAMIC_FTRACE
8331
8332 static ssize_t
8333 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8334                   size_t cnt, loff_t *ppos)
8335 {
8336         ssize_t ret;
8337         char *buf;
8338         int r;
8339
8340         /* 256 should be plenty to hold the amount needed */
8341         buf = kmalloc(256, GFP_KERNEL);
8342         if (!buf)
8343                 return -ENOMEM;
8344
8345         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8346                       ftrace_update_tot_cnt,
8347                       ftrace_number_of_pages,
8348                       ftrace_number_of_groups);
8349
8350         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8351         kfree(buf);
8352         return ret;
8353 }
8354
8355 static const struct file_operations tracing_dyn_info_fops = {
8356         .open           = tracing_open_generic,
8357         .read           = tracing_read_dyn_info,
8358         .llseek         = generic_file_llseek,
8359 };
8360 #endif /* CONFIG_DYNAMIC_FTRACE */
8361
8362 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8363 static void
8364 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8365                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8366                 void *data)
8367 {
8368         tracing_snapshot_instance(tr);
8369 }
8370
8371 static void
8372 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8373                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8374                       void *data)
8375 {
8376         struct ftrace_func_mapper *mapper = data;
8377         long *count = NULL;
8378
8379         if (mapper)
8380                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8381
8382         if (count) {
8383
8384                 if (*count <= 0)
8385                         return;
8386
8387                 (*count)--;
8388         }
8389
8390         tracing_snapshot_instance(tr);
8391 }
8392
8393 static int
8394 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8395                       struct ftrace_probe_ops *ops, void *data)
8396 {
8397         struct ftrace_func_mapper *mapper = data;
8398         long *count = NULL;
8399
8400         seq_printf(m, "%ps:", (void *)ip);
8401
8402         seq_puts(m, "snapshot");
8403
8404         if (mapper)
8405                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8406
8407         if (count)
8408                 seq_printf(m, ":count=%ld\n", *count);
8409         else
8410                 seq_puts(m, ":unlimited\n");
8411
8412         return 0;
8413 }
8414
8415 static int
8416 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8417                      unsigned long ip, void *init_data, void **data)
8418 {
8419         struct ftrace_func_mapper *mapper = *data;
8420
8421         if (!mapper) {
8422                 mapper = allocate_ftrace_func_mapper();
8423                 if (!mapper)
8424                         return -ENOMEM;
8425                 *data = mapper;
8426         }
8427
8428         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8429 }
8430
8431 static void
8432 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8433                      unsigned long ip, void *data)
8434 {
8435         struct ftrace_func_mapper *mapper = data;
8436
8437         if (!ip) {
8438                 if (!mapper)
8439                         return;
8440                 free_ftrace_func_mapper(mapper, NULL);
8441                 return;
8442         }
8443
8444         ftrace_func_mapper_remove_ip(mapper, ip);
8445 }
8446
8447 static struct ftrace_probe_ops snapshot_probe_ops = {
8448         .func                   = ftrace_snapshot,
8449         .print                  = ftrace_snapshot_print,
8450 };
8451
8452 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8453         .func                   = ftrace_count_snapshot,
8454         .print                  = ftrace_snapshot_print,
8455         .init                   = ftrace_snapshot_init,
8456         .free                   = ftrace_snapshot_free,
8457 };
8458
8459 static int
8460 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8461                                char *glob, char *cmd, char *param, int enable)
8462 {
8463         struct ftrace_probe_ops *ops;
8464         void *count = (void *)-1;
8465         char *number;
8466         int ret;
8467
8468         if (!tr)
8469                 return -ENODEV;
8470
8471         /* hash funcs only work with set_ftrace_filter */
8472         if (!enable)
8473                 return -EINVAL;
8474
8475         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8476
8477         if (glob[0] == '!')
8478                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8479
8480         if (!param)
8481                 goto out_reg;
8482
8483         number = strsep(&param, ":");
8484
8485         if (!strlen(number))
8486                 goto out_reg;
8487
8488         /*
8489          * We use the callback data field (which is a pointer)
8490          * as our counter.
8491          */
8492         ret = kstrtoul(number, 0, (unsigned long *)&count);
8493         if (ret)
8494                 return ret;
8495
8496  out_reg:
8497         ret = tracing_alloc_snapshot_instance(tr);
8498         if (ret < 0)
8499                 goto out;
8500
8501         ret = register_ftrace_function_probe(glob, tr, ops, count);
8502
8503  out:
8504         return ret < 0 ? ret : 0;
8505 }
8506
8507 static struct ftrace_func_command ftrace_snapshot_cmd = {
8508         .name                   = "snapshot",
8509         .func                   = ftrace_trace_snapshot_callback,
8510 };
8511
8512 static __init int register_snapshot_cmd(void)
8513 {
8514         return register_ftrace_command(&ftrace_snapshot_cmd);
8515 }
8516 #else
8517 static inline __init int register_snapshot_cmd(void) { return 0; }
8518 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8519
8520 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8521 {
8522         if (WARN_ON(!tr->dir))
8523                 return ERR_PTR(-ENODEV);
8524
8525         /* Top directory uses NULL as the parent */
8526         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8527                 return NULL;
8528
8529         /* All sub buffers have a descriptor */
8530         return tr->dir;
8531 }
8532
8533 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8534 {
8535         struct dentry *d_tracer;
8536
8537         if (tr->percpu_dir)
8538                 return tr->percpu_dir;
8539
8540         d_tracer = tracing_get_dentry(tr);
8541         if (IS_ERR(d_tracer))
8542                 return NULL;
8543
8544         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8545
8546         MEM_FAIL(!tr->percpu_dir,
8547                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8548
8549         return tr->percpu_dir;
8550 }
8551
8552 static struct dentry *
8553 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8554                       void *data, long cpu, const struct file_operations *fops)
8555 {
8556         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8557
8558         if (ret) /* See tracing_get_cpu() */
8559                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8560         return ret;
8561 }
8562
8563 static void
8564 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8565 {
8566         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8567         struct dentry *d_cpu;
8568         char cpu_dir[30]; /* 30 characters should be more than enough */
8569
8570         if (!d_percpu)
8571                 return;
8572
8573         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8574         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8575         if (!d_cpu) {
8576                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8577                 return;
8578         }
8579
8580         /* per cpu trace_pipe */
8581         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8582                                 tr, cpu, &tracing_pipe_fops);
8583
8584         /* per cpu trace */
8585         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8586                                 tr, cpu, &tracing_fops);
8587
8588         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8589                                 tr, cpu, &tracing_buffers_fops);
8590
8591         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8592                                 tr, cpu, &tracing_stats_fops);
8593
8594         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8595                                 tr, cpu, &tracing_entries_fops);
8596
8597 #ifdef CONFIG_TRACER_SNAPSHOT
8598         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8599                                 tr, cpu, &snapshot_fops);
8600
8601         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8602                                 tr, cpu, &snapshot_raw_fops);
8603 #endif
8604 }
8605
8606 #ifdef CONFIG_FTRACE_SELFTEST
8607 /* Let selftest have access to static functions in this file */
8608 #include "trace_selftest.c"
8609 #endif
8610
8611 static ssize_t
8612 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8613                         loff_t *ppos)
8614 {
8615         struct trace_option_dentry *topt = filp->private_data;
8616         char *buf;
8617
8618         if (topt->flags->val & topt->opt->bit)
8619                 buf = "1\n";
8620         else
8621                 buf = "0\n";
8622
8623         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8624 }
8625
8626 static ssize_t
8627 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8628                          loff_t *ppos)
8629 {
8630         struct trace_option_dentry *topt = filp->private_data;
8631         unsigned long val;
8632         int ret;
8633
8634         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8635         if (ret)
8636                 return ret;
8637
8638         if (val != 0 && val != 1)
8639                 return -EINVAL;
8640
8641         if (!!(topt->flags->val & topt->opt->bit) != val) {
8642                 mutex_lock(&trace_types_lock);
8643                 ret = __set_tracer_option(topt->tr, topt->flags,
8644                                           topt->opt, !val);
8645                 mutex_unlock(&trace_types_lock);
8646                 if (ret)
8647                         return ret;
8648         }
8649
8650         *ppos += cnt;
8651
8652         return cnt;
8653 }
8654
8655
8656 static const struct file_operations trace_options_fops = {
8657         .open = tracing_open_generic,
8658         .read = trace_options_read,
8659         .write = trace_options_write,
8660         .llseek = generic_file_llseek,
8661 };
8662
8663 /*
8664  * In order to pass in both the trace_array descriptor as well as the index
8665  * to the flag that the trace option file represents, the trace_array
8666  * has a character array of trace_flags_index[], which holds the index
8667  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8668  * The address of this character array is passed to the flag option file
8669  * read/write callbacks.
8670  *
8671  * In order to extract both the index and the trace_array descriptor,
8672  * get_tr_index() uses the following algorithm.
8673  *
8674  *   idx = *ptr;
8675  *
8676  * As the pointer itself contains the address of the index (remember
8677  * index[1] == 1).
8678  *
8679  * Then to get the trace_array descriptor, by subtracting that index
8680  * from the ptr, we get to the start of the index itself.
8681  *
8682  *   ptr - idx == &index[0]
8683  *
8684  * Then a simple container_of() from that pointer gets us to the
8685  * trace_array descriptor.
8686  */
8687 static void get_tr_index(void *data, struct trace_array **ptr,
8688                          unsigned int *pindex)
8689 {
8690         *pindex = *(unsigned char *)data;
8691
8692         *ptr = container_of(data - *pindex, struct trace_array,
8693                             trace_flags_index);
8694 }
8695
8696 static ssize_t
8697 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8698                         loff_t *ppos)
8699 {
8700         void *tr_index = filp->private_data;
8701         struct trace_array *tr;
8702         unsigned int index;
8703         char *buf;
8704
8705         get_tr_index(tr_index, &tr, &index);
8706
8707         if (tr->trace_flags & (1 << index))
8708                 buf = "1\n";
8709         else
8710                 buf = "0\n";
8711
8712         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8713 }
8714
8715 static ssize_t
8716 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8717                          loff_t *ppos)
8718 {
8719         void *tr_index = filp->private_data;
8720         struct trace_array *tr;
8721         unsigned int index;
8722         unsigned long val;
8723         int ret;
8724
8725         get_tr_index(tr_index, &tr, &index);
8726
8727         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8728         if (ret)
8729                 return ret;
8730
8731         if (val != 0 && val != 1)
8732                 return -EINVAL;
8733
8734         mutex_lock(&event_mutex);
8735         mutex_lock(&trace_types_lock);
8736         ret = set_tracer_flag(tr, 1 << index, val);
8737         mutex_unlock(&trace_types_lock);
8738         mutex_unlock(&event_mutex);
8739
8740         if (ret < 0)
8741                 return ret;
8742
8743         *ppos += cnt;
8744
8745         return cnt;
8746 }
8747
8748 static const struct file_operations trace_options_core_fops = {
8749         .open = tracing_open_generic,
8750         .read = trace_options_core_read,
8751         .write = trace_options_core_write,
8752         .llseek = generic_file_llseek,
8753 };
8754
8755 struct dentry *trace_create_file(const char *name,
8756                                  umode_t mode,
8757                                  struct dentry *parent,
8758                                  void *data,
8759                                  const struct file_operations *fops)
8760 {
8761         struct dentry *ret;
8762
8763         ret = tracefs_create_file(name, mode, parent, data, fops);
8764         if (!ret)
8765                 pr_warn("Could not create tracefs '%s' entry\n", name);
8766
8767         return ret;
8768 }
8769
8770
8771 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8772 {
8773         struct dentry *d_tracer;
8774
8775         if (tr->options)
8776                 return tr->options;
8777
8778         d_tracer = tracing_get_dentry(tr);
8779         if (IS_ERR(d_tracer))
8780                 return NULL;
8781
8782         tr->options = tracefs_create_dir("options", d_tracer);
8783         if (!tr->options) {
8784                 pr_warn("Could not create tracefs directory 'options'\n");
8785                 return NULL;
8786         }
8787
8788         return tr->options;
8789 }
8790
8791 static void
8792 create_trace_option_file(struct trace_array *tr,
8793                          struct trace_option_dentry *topt,
8794                          struct tracer_flags *flags,
8795                          struct tracer_opt *opt)
8796 {
8797         struct dentry *t_options;
8798
8799         t_options = trace_options_init_dentry(tr);
8800         if (!t_options)
8801                 return;
8802
8803         topt->flags = flags;
8804         topt->opt = opt;
8805         topt->tr = tr;
8806
8807         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8808                                         t_options, topt, &trace_options_fops);
8809
8810 }
8811
8812 static void
8813 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8814 {
8815         struct trace_option_dentry *topts;
8816         struct trace_options *tr_topts;
8817         struct tracer_flags *flags;
8818         struct tracer_opt *opts;
8819         int cnt;
8820         int i;
8821
8822         if (!tracer)
8823                 return;
8824
8825         flags = tracer->flags;
8826
8827         if (!flags || !flags->opts)
8828                 return;
8829
8830         /*
8831          * If this is an instance, only create flags for tracers
8832          * the instance may have.
8833          */
8834         if (!trace_ok_for_array(tracer, tr))
8835                 return;
8836
8837         for (i = 0; i < tr->nr_topts; i++) {
8838                 /* Make sure there's no duplicate flags. */
8839                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8840                         return;
8841         }
8842
8843         opts = flags->opts;
8844
8845         for (cnt = 0; opts[cnt].name; cnt++)
8846                 ;
8847
8848         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8849         if (!topts)
8850                 return;
8851
8852         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8853                             GFP_KERNEL);
8854         if (!tr_topts) {
8855                 kfree(topts);
8856                 return;
8857         }
8858
8859         tr->topts = tr_topts;
8860         tr->topts[tr->nr_topts].tracer = tracer;
8861         tr->topts[tr->nr_topts].topts = topts;
8862         tr->nr_topts++;
8863
8864         for (cnt = 0; opts[cnt].name; cnt++) {
8865                 create_trace_option_file(tr, &topts[cnt], flags,
8866                                          &opts[cnt]);
8867                 MEM_FAIL(topts[cnt].entry == NULL,
8868                           "Failed to create trace option: %s",
8869                           opts[cnt].name);
8870         }
8871 }
8872
8873 static struct dentry *
8874 create_trace_option_core_file(struct trace_array *tr,
8875                               const char *option, long index)
8876 {
8877         struct dentry *t_options;
8878
8879         t_options = trace_options_init_dentry(tr);
8880         if (!t_options)
8881                 return NULL;
8882
8883         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8884                                  (void *)&tr->trace_flags_index[index],
8885                                  &trace_options_core_fops);
8886 }
8887
8888 static void create_trace_options_dir(struct trace_array *tr)
8889 {
8890         struct dentry *t_options;
8891         bool top_level = tr == &global_trace;
8892         int i;
8893
8894         t_options = trace_options_init_dentry(tr);
8895         if (!t_options)
8896                 return;
8897
8898         for (i = 0; trace_options[i]; i++) {
8899                 if (top_level ||
8900                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8901                         create_trace_option_core_file(tr, trace_options[i], i);
8902         }
8903 }
8904
8905 static ssize_t
8906 rb_simple_read(struct file *filp, char __user *ubuf,
8907                size_t cnt, loff_t *ppos)
8908 {
8909         struct trace_array *tr = filp->private_data;
8910         char buf[64];
8911         int r;
8912
8913         r = tracer_tracing_is_on(tr);
8914         r = sprintf(buf, "%d\n", r);
8915
8916         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8917 }
8918
8919 static ssize_t
8920 rb_simple_write(struct file *filp, const char __user *ubuf,
8921                 size_t cnt, loff_t *ppos)
8922 {
8923         struct trace_array *tr = filp->private_data;
8924         struct trace_buffer *buffer = tr->array_buffer.buffer;
8925         unsigned long val;
8926         int ret;
8927
8928         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8929         if (ret)
8930                 return ret;
8931
8932         if (buffer) {
8933                 mutex_lock(&trace_types_lock);
8934                 if (!!val == tracer_tracing_is_on(tr)) {
8935                         val = 0; /* do nothing */
8936                 } else if (val) {
8937                         tracer_tracing_on(tr);
8938                         if (tr->current_trace->start)
8939                                 tr->current_trace->start(tr);
8940                 } else {
8941                         tracer_tracing_off(tr);
8942                         if (tr->current_trace->stop)
8943                                 tr->current_trace->stop(tr);
8944                 }
8945                 mutex_unlock(&trace_types_lock);
8946         }
8947
8948         (*ppos)++;
8949
8950         return cnt;
8951 }
8952
8953 static const struct file_operations rb_simple_fops = {
8954         .open           = tracing_open_generic_tr,
8955         .read           = rb_simple_read,
8956         .write          = rb_simple_write,
8957         .release        = tracing_release_generic_tr,
8958         .llseek         = default_llseek,
8959 };
8960
8961 static ssize_t
8962 buffer_percent_read(struct file *filp, char __user *ubuf,
8963                     size_t cnt, loff_t *ppos)
8964 {
8965         struct trace_array *tr = filp->private_data;
8966         char buf[64];
8967         int r;
8968
8969         r = tr->buffer_percent;
8970         r = sprintf(buf, "%d\n", r);
8971
8972         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8973 }
8974
8975 static ssize_t
8976 buffer_percent_write(struct file *filp, const char __user *ubuf,
8977                      size_t cnt, loff_t *ppos)
8978 {
8979         struct trace_array *tr = filp->private_data;
8980         unsigned long val;
8981         int ret;
8982
8983         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8984         if (ret)
8985                 return ret;
8986
8987         if (val > 100)
8988                 return -EINVAL;
8989
8990         if (!val)
8991                 val = 1;
8992
8993         tr->buffer_percent = val;
8994
8995         (*ppos)++;
8996
8997         return cnt;
8998 }
8999
9000 static const struct file_operations buffer_percent_fops = {
9001         .open           = tracing_open_generic_tr,
9002         .read           = buffer_percent_read,
9003         .write          = buffer_percent_write,
9004         .release        = tracing_release_generic_tr,
9005         .llseek         = default_llseek,
9006 };
9007
9008 static struct dentry *trace_instance_dir;
9009
9010 static void
9011 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9012
9013 static int
9014 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9015 {
9016         enum ring_buffer_flags rb_flags;
9017
9018         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9019
9020         buf->tr = tr;
9021
9022         buf->buffer = ring_buffer_alloc(size, rb_flags);
9023         if (!buf->buffer)
9024                 return -ENOMEM;
9025
9026         buf->data = alloc_percpu(struct trace_array_cpu);
9027         if (!buf->data) {
9028                 ring_buffer_free(buf->buffer);
9029                 buf->buffer = NULL;
9030                 return -ENOMEM;
9031         }
9032
9033         /* Allocate the first page for all buffers */
9034         set_buffer_entries(&tr->array_buffer,
9035                            ring_buffer_size(tr->array_buffer.buffer, 0));
9036
9037         return 0;
9038 }
9039
9040 static int allocate_trace_buffers(struct trace_array *tr, int size)
9041 {
9042         int ret;
9043
9044         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9045         if (ret)
9046                 return ret;
9047
9048 #ifdef CONFIG_TRACER_MAX_TRACE
9049         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9050                                     allocate_snapshot ? size : 1);
9051         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9052                 ring_buffer_free(tr->array_buffer.buffer);
9053                 tr->array_buffer.buffer = NULL;
9054                 free_percpu(tr->array_buffer.data);
9055                 tr->array_buffer.data = NULL;
9056                 return -ENOMEM;
9057         }
9058         tr->allocated_snapshot = allocate_snapshot;
9059
9060         /*
9061          * Only the top level trace array gets its snapshot allocated
9062          * from the kernel command line.
9063          */
9064         allocate_snapshot = false;
9065 #endif
9066
9067         return 0;
9068 }
9069
9070 static void free_trace_buffer(struct array_buffer *buf)
9071 {
9072         if (buf->buffer) {
9073                 ring_buffer_free(buf->buffer);
9074                 buf->buffer = NULL;
9075                 free_percpu(buf->data);
9076                 buf->data = NULL;
9077         }
9078 }
9079
9080 static void free_trace_buffers(struct trace_array *tr)
9081 {
9082         if (!tr)
9083                 return;
9084
9085         free_trace_buffer(&tr->array_buffer);
9086
9087 #ifdef CONFIG_TRACER_MAX_TRACE
9088         free_trace_buffer(&tr->max_buffer);
9089 #endif
9090 }
9091
9092 static void init_trace_flags_index(struct trace_array *tr)
9093 {
9094         int i;
9095
9096         /* Used by the trace options files */
9097         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9098                 tr->trace_flags_index[i] = i;
9099 }
9100
9101 static void __update_tracer_options(struct trace_array *tr)
9102 {
9103         struct tracer *t;
9104
9105         for (t = trace_types; t; t = t->next)
9106                 add_tracer_options(tr, t);
9107 }
9108
9109 static void update_tracer_options(struct trace_array *tr)
9110 {
9111         mutex_lock(&trace_types_lock);
9112         __update_tracer_options(tr);
9113         mutex_unlock(&trace_types_lock);
9114 }
9115
9116 /* Must have trace_types_lock held */
9117 struct trace_array *trace_array_find(const char *instance)
9118 {
9119         struct trace_array *tr, *found = NULL;
9120
9121         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9122                 if (tr->name && strcmp(tr->name, instance) == 0) {
9123                         found = tr;
9124                         break;
9125                 }
9126         }
9127
9128         return found;
9129 }
9130
9131 struct trace_array *trace_array_find_get(const char *instance)
9132 {
9133         struct trace_array *tr;
9134
9135         mutex_lock(&trace_types_lock);
9136         tr = trace_array_find(instance);
9137         if (tr)
9138                 tr->ref++;
9139         mutex_unlock(&trace_types_lock);
9140
9141         return tr;
9142 }
9143
9144 static int trace_array_create_dir(struct trace_array *tr)
9145 {
9146         int ret;
9147
9148         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9149         if (!tr->dir)
9150                 return -EINVAL;
9151
9152         ret = event_trace_add_tracer(tr->dir, tr);
9153         if (ret) {
9154                 tracefs_remove(tr->dir);
9155                 return ret;
9156         }
9157
9158         init_tracer_tracefs(tr, tr->dir);
9159         __update_tracer_options(tr);
9160
9161         return ret;
9162 }
9163
9164 static struct trace_array *trace_array_create(const char *name)
9165 {
9166         struct trace_array *tr;
9167         int ret;
9168
9169         ret = -ENOMEM;
9170         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9171         if (!tr)
9172                 return ERR_PTR(ret);
9173
9174         tr->name = kstrdup(name, GFP_KERNEL);
9175         if (!tr->name)
9176                 goto out_free_tr;
9177
9178         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9179                 goto out_free_tr;
9180
9181         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9182
9183         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9184
9185         raw_spin_lock_init(&tr->start_lock);
9186
9187         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9188
9189         tr->current_trace = &nop_trace;
9190
9191         INIT_LIST_HEAD(&tr->systems);
9192         INIT_LIST_HEAD(&tr->events);
9193         INIT_LIST_HEAD(&tr->hist_vars);
9194         INIT_LIST_HEAD(&tr->err_log);
9195
9196         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9197                 goto out_free_tr;
9198
9199         if (ftrace_allocate_ftrace_ops(tr) < 0)
9200                 goto out_free_tr;
9201
9202         ftrace_init_trace_array(tr);
9203
9204         init_trace_flags_index(tr);
9205
9206         if (trace_instance_dir) {
9207                 ret = trace_array_create_dir(tr);
9208                 if (ret)
9209                         goto out_free_tr;
9210         } else
9211                 __trace_early_add_events(tr);
9212
9213         list_add(&tr->list, &ftrace_trace_arrays);
9214
9215         tr->ref++;
9216
9217         return tr;
9218
9219  out_free_tr:
9220         ftrace_free_ftrace_ops(tr);
9221         free_trace_buffers(tr);
9222         free_cpumask_var(tr->tracing_cpumask);
9223         kfree(tr->name);
9224         kfree(tr);
9225
9226         return ERR_PTR(ret);
9227 }
9228
9229 static int instance_mkdir(const char *name)
9230 {
9231         struct trace_array *tr;
9232         int ret;
9233
9234         mutex_lock(&event_mutex);
9235         mutex_lock(&trace_types_lock);
9236
9237         ret = -EEXIST;
9238         if (trace_array_find(name))
9239                 goto out_unlock;
9240
9241         tr = trace_array_create(name);
9242
9243         ret = PTR_ERR_OR_ZERO(tr);
9244
9245 out_unlock:
9246         mutex_unlock(&trace_types_lock);
9247         mutex_unlock(&event_mutex);
9248         return ret;
9249 }
9250
9251 /**
9252  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9253  * @name: The name of the trace array to be looked up/created.
9254  *
9255  * Returns pointer to trace array with given name.
9256  * NULL, if it cannot be created.
9257  *
9258  * NOTE: This function increments the reference counter associated with the
9259  * trace array returned. This makes sure it cannot be freed while in use.
9260  * Use trace_array_put() once the trace array is no longer needed.
9261  * If the trace_array is to be freed, trace_array_destroy() needs to
9262  * be called after the trace_array_put(), or simply let user space delete
9263  * it from the tracefs instances directory. But until the
9264  * trace_array_put() is called, user space can not delete it.
9265  *
9266  */
9267 struct trace_array *trace_array_get_by_name(const char *name)
9268 {
9269         struct trace_array *tr;
9270
9271         mutex_lock(&event_mutex);
9272         mutex_lock(&trace_types_lock);
9273
9274         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9275                 if (tr->name && strcmp(tr->name, name) == 0)
9276                         goto out_unlock;
9277         }
9278
9279         tr = trace_array_create(name);
9280
9281         if (IS_ERR(tr))
9282                 tr = NULL;
9283 out_unlock:
9284         if (tr)
9285                 tr->ref++;
9286
9287         mutex_unlock(&trace_types_lock);
9288         mutex_unlock(&event_mutex);
9289         return tr;
9290 }
9291 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9292
9293 static int __remove_instance(struct trace_array *tr)
9294 {
9295         int i;
9296
9297         /* Reference counter for a newly created trace array = 1. */
9298         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9299                 return -EBUSY;
9300
9301         list_del(&tr->list);
9302
9303         /* Disable all the flags that were enabled coming in */
9304         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9305                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9306                         set_tracer_flag(tr, 1 << i, 0);
9307         }
9308
9309         tracing_set_nop(tr);
9310         clear_ftrace_function_probes(tr);
9311         event_trace_del_tracer(tr);
9312         ftrace_clear_pids(tr);
9313         ftrace_destroy_function_files(tr);
9314         tracefs_remove(tr->dir);
9315         free_percpu(tr->last_func_repeats);
9316         free_trace_buffers(tr);
9317
9318         for (i = 0; i < tr->nr_topts; i++) {
9319                 kfree(tr->topts[i].topts);
9320         }
9321         kfree(tr->topts);
9322
9323         free_cpumask_var(tr->tracing_cpumask);
9324         kfree(tr->name);
9325         kfree(tr);
9326
9327         return 0;
9328 }
9329
9330 int trace_array_destroy(struct trace_array *this_tr)
9331 {
9332         struct trace_array *tr;
9333         int ret;
9334
9335         if (!this_tr)
9336                 return -EINVAL;
9337
9338         mutex_lock(&event_mutex);
9339         mutex_lock(&trace_types_lock);
9340
9341         ret = -ENODEV;
9342
9343         /* Making sure trace array exists before destroying it. */
9344         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9345                 if (tr == this_tr) {
9346                         ret = __remove_instance(tr);
9347                         break;
9348                 }
9349         }
9350
9351         mutex_unlock(&trace_types_lock);
9352         mutex_unlock(&event_mutex);
9353
9354         return ret;
9355 }
9356 EXPORT_SYMBOL_GPL(trace_array_destroy);
9357
9358 static int instance_rmdir(const char *name)
9359 {
9360         struct trace_array *tr;
9361         int ret;
9362
9363         mutex_lock(&event_mutex);
9364         mutex_lock(&trace_types_lock);
9365
9366         ret = -ENODEV;
9367         tr = trace_array_find(name);
9368         if (tr)
9369                 ret = __remove_instance(tr);
9370
9371         mutex_unlock(&trace_types_lock);
9372         mutex_unlock(&event_mutex);
9373
9374         return ret;
9375 }
9376
9377 static __init void create_trace_instances(struct dentry *d_tracer)
9378 {
9379         struct trace_array *tr;
9380
9381         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9382                                                          instance_mkdir,
9383                                                          instance_rmdir);
9384         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9385                 return;
9386
9387         mutex_lock(&event_mutex);
9388         mutex_lock(&trace_types_lock);
9389
9390         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9391                 if (!tr->name)
9392                         continue;
9393                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9394                              "Failed to create instance directory\n"))
9395                         break;
9396         }
9397
9398         mutex_unlock(&trace_types_lock);
9399         mutex_unlock(&event_mutex);
9400 }
9401
9402 static void
9403 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9404 {
9405         struct trace_event_file *file;
9406         int cpu;
9407
9408         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9409                         tr, &show_traces_fops);
9410
9411         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9412                         tr, &set_tracer_fops);
9413
9414         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9415                           tr, &tracing_cpumask_fops);
9416
9417         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9418                           tr, &tracing_iter_fops);
9419
9420         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9421                           tr, &tracing_fops);
9422
9423         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9424                           tr, &tracing_pipe_fops);
9425
9426         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9427                           tr, &tracing_entries_fops);
9428
9429         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9430                           tr, &tracing_total_entries_fops);
9431
9432         trace_create_file("free_buffer", 0200, d_tracer,
9433                           tr, &tracing_free_buffer_fops);
9434
9435         trace_create_file("trace_marker", 0220, d_tracer,
9436                           tr, &tracing_mark_fops);
9437
9438         file = __find_event_file(tr, "ftrace", "print");
9439         if (file && file->dir)
9440                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9441                                   file, &event_trigger_fops);
9442         tr->trace_marker_file = file;
9443
9444         trace_create_file("trace_marker_raw", 0220, d_tracer,
9445                           tr, &tracing_mark_raw_fops);
9446
9447         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9448                           &trace_clock_fops);
9449
9450         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9451                           tr, &rb_simple_fops);
9452
9453         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9454                           &trace_time_stamp_mode_fops);
9455
9456         tr->buffer_percent = 50;
9457
9458         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9459                         tr, &buffer_percent_fops);
9460
9461         create_trace_options_dir(tr);
9462
9463         trace_create_maxlat_file(tr, d_tracer);
9464
9465         if (ftrace_create_function_files(tr, d_tracer))
9466                 MEM_FAIL(1, "Could not allocate function filter files");
9467
9468 #ifdef CONFIG_TRACER_SNAPSHOT
9469         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9470                           tr, &snapshot_fops);
9471 #endif
9472
9473         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9474                           tr, &tracing_err_log_fops);
9475
9476         for_each_tracing_cpu(cpu)
9477                 tracing_init_tracefs_percpu(tr, cpu);
9478
9479         ftrace_init_tracefs(tr, d_tracer);
9480 }
9481
9482 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9483 {
9484         struct vfsmount *mnt;
9485         struct file_system_type *type;
9486
9487         /*
9488          * To maintain backward compatibility for tools that mount
9489          * debugfs to get to the tracing facility, tracefs is automatically
9490          * mounted to the debugfs/tracing directory.
9491          */
9492         type = get_fs_type("tracefs");
9493         if (!type)
9494                 return NULL;
9495         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9496         put_filesystem(type);
9497         if (IS_ERR(mnt))
9498                 return NULL;
9499         mntget(mnt);
9500
9501         return mnt;
9502 }
9503
9504 /**
9505  * tracing_init_dentry - initialize top level trace array
9506  *
9507  * This is called when creating files or directories in the tracing
9508  * directory. It is called via fs_initcall() by any of the boot up code
9509  * and expects to return the dentry of the top level tracing directory.
9510  */
9511 int tracing_init_dentry(void)
9512 {
9513         struct trace_array *tr = &global_trace;
9514
9515         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9516                 pr_warn("Tracing disabled due to lockdown\n");
9517                 return -EPERM;
9518         }
9519
9520         /* The top level trace array uses  NULL as parent */
9521         if (tr->dir)
9522                 return 0;
9523
9524         if (WARN_ON(!tracefs_initialized()))
9525                 return -ENODEV;
9526
9527         /*
9528          * As there may still be users that expect the tracing
9529          * files to exist in debugfs/tracing, we must automount
9530          * the tracefs file system there, so older tools still
9531          * work with the newer kernel.
9532          */
9533         tr->dir = debugfs_create_automount("tracing", NULL,
9534                                            trace_automount, NULL);
9535
9536         return 0;
9537 }
9538
9539 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9540 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9541
9542 static struct workqueue_struct *eval_map_wq __initdata;
9543 static struct work_struct eval_map_work __initdata;
9544
9545 static void __init eval_map_work_func(struct work_struct *work)
9546 {
9547         int len;
9548
9549         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9550         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9551 }
9552
9553 static int __init trace_eval_init(void)
9554 {
9555         INIT_WORK(&eval_map_work, eval_map_work_func);
9556
9557         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9558         if (!eval_map_wq) {
9559                 pr_err("Unable to allocate eval_map_wq\n");
9560                 /* Do work here */
9561                 eval_map_work_func(&eval_map_work);
9562                 return -ENOMEM;
9563         }
9564
9565         queue_work(eval_map_wq, &eval_map_work);
9566         return 0;
9567 }
9568
9569 static int __init trace_eval_sync(void)
9570 {
9571         /* Make sure the eval map updates are finished */
9572         if (eval_map_wq)
9573                 destroy_workqueue(eval_map_wq);
9574         return 0;
9575 }
9576
9577 late_initcall_sync(trace_eval_sync);
9578
9579
9580 #ifdef CONFIG_MODULES
9581 static void trace_module_add_evals(struct module *mod)
9582 {
9583         if (!mod->num_trace_evals)
9584                 return;
9585
9586         /*
9587          * Modules with bad taint do not have events created, do
9588          * not bother with enums either.
9589          */
9590         if (trace_module_has_bad_taint(mod))
9591                 return;
9592
9593         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9594 }
9595
9596 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9597 static void trace_module_remove_evals(struct module *mod)
9598 {
9599         union trace_eval_map_item *map;
9600         union trace_eval_map_item **last = &trace_eval_maps;
9601
9602         if (!mod->num_trace_evals)
9603                 return;
9604
9605         mutex_lock(&trace_eval_mutex);
9606
9607         map = trace_eval_maps;
9608
9609         while (map) {
9610                 if (map->head.mod == mod)
9611                         break;
9612                 map = trace_eval_jmp_to_tail(map);
9613                 last = &map->tail.next;
9614                 map = map->tail.next;
9615         }
9616         if (!map)
9617                 goto out;
9618
9619         *last = trace_eval_jmp_to_tail(map)->tail.next;
9620         kfree(map);
9621  out:
9622         mutex_unlock(&trace_eval_mutex);
9623 }
9624 #else
9625 static inline void trace_module_remove_evals(struct module *mod) { }
9626 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9627
9628 static int trace_module_notify(struct notifier_block *self,
9629                                unsigned long val, void *data)
9630 {
9631         struct module *mod = data;
9632
9633         switch (val) {
9634         case MODULE_STATE_COMING:
9635                 trace_module_add_evals(mod);
9636                 break;
9637         case MODULE_STATE_GOING:
9638                 trace_module_remove_evals(mod);
9639                 break;
9640         }
9641
9642         return NOTIFY_OK;
9643 }
9644
9645 static struct notifier_block trace_module_nb = {
9646         .notifier_call = trace_module_notify,
9647         .priority = 0,
9648 };
9649 #endif /* CONFIG_MODULES */
9650
9651 static __init int tracer_init_tracefs(void)
9652 {
9653         int ret;
9654
9655         trace_access_lock_init();
9656
9657         ret = tracing_init_dentry();
9658         if (ret)
9659                 return 0;
9660
9661         event_trace_init();
9662
9663         init_tracer_tracefs(&global_trace, NULL);
9664         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9665
9666         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9667                         &global_trace, &tracing_thresh_fops);
9668
9669         trace_create_file("README", TRACE_MODE_READ, NULL,
9670                         NULL, &tracing_readme_fops);
9671
9672         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9673                         NULL, &tracing_saved_cmdlines_fops);
9674
9675         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9676                           NULL, &tracing_saved_cmdlines_size_fops);
9677
9678         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9679                         NULL, &tracing_saved_tgids_fops);
9680
9681         trace_eval_init();
9682
9683         trace_create_eval_file(NULL);
9684
9685 #ifdef CONFIG_MODULES
9686         register_module_notifier(&trace_module_nb);
9687 #endif
9688
9689 #ifdef CONFIG_DYNAMIC_FTRACE
9690         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9691                         NULL, &tracing_dyn_info_fops);
9692 #endif
9693
9694         create_trace_instances(NULL);
9695
9696         update_tracer_options(&global_trace);
9697
9698         return 0;
9699 }
9700
9701 fs_initcall(tracer_init_tracefs);
9702
9703 static int trace_panic_handler(struct notifier_block *this,
9704                                unsigned long event, void *unused)
9705 {
9706         if (ftrace_dump_on_oops)
9707                 ftrace_dump(ftrace_dump_on_oops);
9708         return NOTIFY_OK;
9709 }
9710
9711 static struct notifier_block trace_panic_notifier = {
9712         .notifier_call  = trace_panic_handler,
9713         .next           = NULL,
9714         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9715 };
9716
9717 static int trace_die_handler(struct notifier_block *self,
9718                              unsigned long val,
9719                              void *data)
9720 {
9721         switch (val) {
9722         case DIE_OOPS:
9723                 if (ftrace_dump_on_oops)
9724                         ftrace_dump(ftrace_dump_on_oops);
9725                 break;
9726         default:
9727                 break;
9728         }
9729         return NOTIFY_OK;
9730 }
9731
9732 static struct notifier_block trace_die_notifier = {
9733         .notifier_call = trace_die_handler,
9734         .priority = 200
9735 };
9736
9737 /*
9738  * printk is set to max of 1024, we really don't need it that big.
9739  * Nothing should be printing 1000 characters anyway.
9740  */
9741 #define TRACE_MAX_PRINT         1000
9742
9743 /*
9744  * Define here KERN_TRACE so that we have one place to modify
9745  * it if we decide to change what log level the ftrace dump
9746  * should be at.
9747  */
9748 #define KERN_TRACE              KERN_EMERG
9749
9750 void
9751 trace_printk_seq(struct trace_seq *s)
9752 {
9753         /* Probably should print a warning here. */
9754         if (s->seq.len >= TRACE_MAX_PRINT)
9755                 s->seq.len = TRACE_MAX_PRINT;
9756
9757         /*
9758          * More paranoid code. Although the buffer size is set to
9759          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9760          * an extra layer of protection.
9761          */
9762         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9763                 s->seq.len = s->seq.size - 1;
9764
9765         /* should be zero ended, but we are paranoid. */
9766         s->buffer[s->seq.len] = 0;
9767
9768         printk(KERN_TRACE "%s", s->buffer);
9769
9770         trace_seq_init(s);
9771 }
9772
9773 void trace_init_global_iter(struct trace_iterator *iter)
9774 {
9775         iter->tr = &global_trace;
9776         iter->trace = iter->tr->current_trace;
9777         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9778         iter->array_buffer = &global_trace.array_buffer;
9779
9780         if (iter->trace && iter->trace->open)
9781                 iter->trace->open(iter);
9782
9783         /* Annotate start of buffers if we had overruns */
9784         if (ring_buffer_overruns(iter->array_buffer->buffer))
9785                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9786
9787         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9788         if (trace_clocks[iter->tr->clock_id].in_ns)
9789                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9790 }
9791
9792 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9793 {
9794         /* use static because iter can be a bit big for the stack */
9795         static struct trace_iterator iter;
9796         static atomic_t dump_running;
9797         struct trace_array *tr = &global_trace;
9798         unsigned int old_userobj;
9799         unsigned long flags;
9800         int cnt = 0, cpu;
9801
9802         /* Only allow one dump user at a time. */
9803         if (atomic_inc_return(&dump_running) != 1) {
9804                 atomic_dec(&dump_running);
9805                 return;
9806         }
9807
9808         /*
9809          * Always turn off tracing when we dump.
9810          * We don't need to show trace output of what happens
9811          * between multiple crashes.
9812          *
9813          * If the user does a sysrq-z, then they can re-enable
9814          * tracing with echo 1 > tracing_on.
9815          */
9816         tracing_off();
9817
9818         local_irq_save(flags);
9819
9820         /* Simulate the iterator */
9821         trace_init_global_iter(&iter);
9822         /* Can not use kmalloc for iter.temp and iter.fmt */
9823         iter.temp = static_temp_buf;
9824         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9825         iter.fmt = static_fmt_buf;
9826         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9827
9828         for_each_tracing_cpu(cpu) {
9829                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9830         }
9831
9832         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9833
9834         /* don't look at user memory in panic mode */
9835         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9836
9837         switch (oops_dump_mode) {
9838         case DUMP_ALL:
9839                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9840                 break;
9841         case DUMP_ORIG:
9842                 iter.cpu_file = raw_smp_processor_id();
9843                 break;
9844         case DUMP_NONE:
9845                 goto out_enable;
9846         default:
9847                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9848                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9849         }
9850
9851         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9852
9853         /* Did function tracer already get disabled? */
9854         if (ftrace_is_dead()) {
9855                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9856                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9857         }
9858
9859         /*
9860          * We need to stop all tracing on all CPUS to read
9861          * the next buffer. This is a bit expensive, but is
9862          * not done often. We fill all what we can read,
9863          * and then release the locks again.
9864          */
9865
9866         while (!trace_empty(&iter)) {
9867
9868                 if (!cnt)
9869                         printk(KERN_TRACE "---------------------------------\n");
9870
9871                 cnt++;
9872
9873                 trace_iterator_reset(&iter);
9874                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9875
9876                 if (trace_find_next_entry_inc(&iter) != NULL) {
9877                         int ret;
9878
9879                         ret = print_trace_line(&iter);
9880                         if (ret != TRACE_TYPE_NO_CONSUME)
9881                                 trace_consume(&iter);
9882                 }
9883                 touch_nmi_watchdog();
9884
9885                 trace_printk_seq(&iter.seq);
9886         }
9887
9888         if (!cnt)
9889                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9890         else
9891                 printk(KERN_TRACE "---------------------------------\n");
9892
9893  out_enable:
9894         tr->trace_flags |= old_userobj;
9895
9896         for_each_tracing_cpu(cpu) {
9897                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9898         }
9899         atomic_dec(&dump_running);
9900         local_irq_restore(flags);
9901 }
9902 EXPORT_SYMBOL_GPL(ftrace_dump);
9903
9904 #define WRITE_BUFSIZE  4096
9905
9906 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9907                                 size_t count, loff_t *ppos,
9908                                 int (*createfn)(const char *))
9909 {
9910         char *kbuf, *buf, *tmp;
9911         int ret = 0;
9912         size_t done = 0;
9913         size_t size;
9914
9915         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9916         if (!kbuf)
9917                 return -ENOMEM;
9918
9919         while (done < count) {
9920                 size = count - done;
9921
9922                 if (size >= WRITE_BUFSIZE)
9923                         size = WRITE_BUFSIZE - 1;
9924
9925                 if (copy_from_user(kbuf, buffer + done, size)) {
9926                         ret = -EFAULT;
9927                         goto out;
9928                 }
9929                 kbuf[size] = '\0';
9930                 buf = kbuf;
9931                 do {
9932                         tmp = strchr(buf, '\n');
9933                         if (tmp) {
9934                                 *tmp = '\0';
9935                                 size = tmp - buf + 1;
9936                         } else {
9937                                 size = strlen(buf);
9938                                 if (done + size < count) {
9939                                         if (buf != kbuf)
9940                                                 break;
9941                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9942                                         pr_warn("Line length is too long: Should be less than %d\n",
9943                                                 WRITE_BUFSIZE - 2);
9944                                         ret = -EINVAL;
9945                                         goto out;
9946                                 }
9947                         }
9948                         done += size;
9949
9950                         /* Remove comments */
9951                         tmp = strchr(buf, '#');
9952
9953                         if (tmp)
9954                                 *tmp = '\0';
9955
9956                         ret = createfn(buf);
9957                         if (ret)
9958                                 goto out;
9959                         buf += size;
9960
9961                 } while (done < count);
9962         }
9963         ret = done;
9964
9965 out:
9966         kfree(kbuf);
9967
9968         return ret;
9969 }
9970
9971 __init static int tracer_alloc_buffers(void)
9972 {
9973         int ring_buf_size;
9974         int ret = -ENOMEM;
9975
9976
9977         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9978                 pr_warn("Tracing disabled due to lockdown\n");
9979                 return -EPERM;
9980         }
9981
9982         /*
9983          * Make sure we don't accidentally add more trace options
9984          * than we have bits for.
9985          */
9986         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9987
9988         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9989                 goto out;
9990
9991         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9992                 goto out_free_buffer_mask;
9993
9994         /* Only allocate trace_printk buffers if a trace_printk exists */
9995         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9996                 /* Must be called before global_trace.buffer is allocated */
9997                 trace_printk_init_buffers();
9998
9999         /* To save memory, keep the ring buffer size to its minimum */
10000         if (ring_buffer_expanded)
10001                 ring_buf_size = trace_buf_size;
10002         else
10003                 ring_buf_size = 1;
10004
10005         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10006         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10007
10008         raw_spin_lock_init(&global_trace.start_lock);
10009
10010         /*
10011          * The prepare callbacks allocates some memory for the ring buffer. We
10012          * don't free the buffer if the CPU goes down. If we were to free
10013          * the buffer, then the user would lose any trace that was in the
10014          * buffer. The memory will be removed once the "instance" is removed.
10015          */
10016         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10017                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10018                                       NULL);
10019         if (ret < 0)
10020                 goto out_free_cpumask;
10021         /* Used for event triggers */
10022         ret = -ENOMEM;
10023         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10024         if (!temp_buffer)
10025                 goto out_rm_hp_state;
10026
10027         if (trace_create_savedcmd() < 0)
10028                 goto out_free_temp_buffer;
10029
10030         /* TODO: make the number of buffers hot pluggable with CPUS */
10031         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10032                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10033                 goto out_free_savedcmd;
10034         }
10035
10036         if (global_trace.buffer_disabled)
10037                 tracing_off();
10038
10039         if (trace_boot_clock) {
10040                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10041                 if (ret < 0)
10042                         pr_warn("Trace clock %s not defined, going back to default\n",
10043                                 trace_boot_clock);
10044         }
10045
10046         /*
10047          * register_tracer() might reference current_trace, so it
10048          * needs to be set before we register anything. This is
10049          * just a bootstrap of current_trace anyway.
10050          */
10051         global_trace.current_trace = &nop_trace;
10052
10053         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10054
10055         ftrace_init_global_array_ops(&global_trace);
10056
10057         init_trace_flags_index(&global_trace);
10058
10059         register_tracer(&nop_trace);
10060
10061         /* Function tracing may start here (via kernel command line) */
10062         init_function_trace();
10063
10064         /* All seems OK, enable tracing */
10065         tracing_disabled = 0;
10066
10067         atomic_notifier_chain_register(&panic_notifier_list,
10068                                        &trace_panic_notifier);
10069
10070         register_die_notifier(&trace_die_notifier);
10071
10072         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10073
10074         INIT_LIST_HEAD(&global_trace.systems);
10075         INIT_LIST_HEAD(&global_trace.events);
10076         INIT_LIST_HEAD(&global_trace.hist_vars);
10077         INIT_LIST_HEAD(&global_trace.err_log);
10078         list_add(&global_trace.list, &ftrace_trace_arrays);
10079
10080         apply_trace_boot_options();
10081
10082         register_snapshot_cmd();
10083
10084         test_can_verify();
10085
10086         return 0;
10087
10088 out_free_savedcmd:
10089         free_saved_cmdlines_buffer(savedcmd);
10090 out_free_temp_buffer:
10091         ring_buffer_free(temp_buffer);
10092 out_rm_hp_state:
10093         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10094 out_free_cpumask:
10095         free_cpumask_var(global_trace.tracing_cpumask);
10096 out_free_buffer_mask:
10097         free_cpumask_var(tracing_buffer_mask);
10098 out:
10099         return ret;
10100 }
10101
10102 void __init early_trace_init(void)
10103 {
10104         if (tracepoint_printk) {
10105                 tracepoint_print_iter =
10106                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10107                 if (MEM_FAIL(!tracepoint_print_iter,
10108                              "Failed to allocate trace iterator\n"))
10109                         tracepoint_printk = 0;
10110                 else
10111                         static_key_enable(&tracepoint_printk_key.key);
10112         }
10113         tracer_alloc_buffers();
10114 }
10115
10116 void __init trace_init(void)
10117 {
10118         trace_event_init();
10119 }
10120
10121 __init static void clear_boot_tracer(void)
10122 {
10123         /*
10124          * The default tracer at boot buffer is an init section.
10125          * This function is called in lateinit. If we did not
10126          * find the boot tracer, then clear it out, to prevent
10127          * later registration from accessing the buffer that is
10128          * about to be freed.
10129          */
10130         if (!default_bootup_tracer)
10131                 return;
10132
10133         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10134                default_bootup_tracer);
10135         default_bootup_tracer = NULL;
10136 }
10137
10138 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10139 __init static void tracing_set_default_clock(void)
10140 {
10141         /* sched_clock_stable() is determined in late_initcall */
10142         if (!trace_boot_clock && !sched_clock_stable()) {
10143                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10144                         pr_warn("Can not set tracing clock due to lockdown\n");
10145                         return;
10146                 }
10147
10148                 printk(KERN_WARNING
10149                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10150                        "If you want to keep using the local clock, then add:\n"
10151                        "  \"trace_clock=local\"\n"
10152                        "on the kernel command line\n");
10153                 tracing_set_clock(&global_trace, "global");
10154         }
10155 }
10156 #else
10157 static inline void tracing_set_default_clock(void) { }
10158 #endif
10159
10160 __init static int late_trace_init(void)
10161 {
10162         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10163                 static_key_disable(&tracepoint_printk_key.key);
10164                 tracepoint_printk = 0;
10165         }
10166
10167         tracing_set_default_clock();
10168         clear_boot_tracer();
10169         return 0;
10170 }
10171
10172 late_initcall_sync(late_trace_init);